本文整理汇总了Python中sgmllib.SGMLParser类的典型用法代码示例。如果您正苦于以下问题:Python SGMLParser类的具体用法?Python SGMLParser怎么用?Python SGMLParser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SGMLParser类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self):
SGMLParser.__init__(self)
self.is_div = ""
self.name = []
self.is_a = ""
self.div_counter = 0
self.a_counter = 0
开发者ID:mrobot2,项目名称:GitHubProjectParser,代码行数:7,代码来源:github.py
示例2: __init__
def __init__(self, email, password):
SGMLParser.__init__(self)
self.h3 = False
self.h3_is_ready = False
self.div = False
self.h3_and_div = False
self.a = False
self.depth = 0
self.names = ""
self.dic = {}
self.email = email
self.password = password
self.domain = 'renren.com'
self.file = None
self.friend_file = None
self.ghost = Ghost()
self.cookie = None
self.group_url = "http://friend.renren.com/groupsdata"
self.group_home = "http://friend.renren.com/managefriends"
self.file_url = "renren_cookie.txt"
self.mongodb = MongoClient("127.0.0.1", 27017)
try:
self.cookie = cookielib.LWPCookieJar(self.file_url)
cookieProc = urllib2.HTTPCookieProcessor(self.cookie)
except:
raise
else:
opener = urllib2.build_opener(cookieProc)
urllib2.install_opener(opener)
print "init finished successfully!!"
开发者ID:Aireed,项目名称:self-code,代码行数:35,代码来源:test_renren.py
示例3: __init__
def __init__(self, term, verbose=False):
SGMLParser.__init__(self)
self.inside_pre = False
self.inside_a = False
self.term = term
self.verbose = verbose
self.major = Major.objects.all()[0]
开发者ID:tpetr,项目名称:schedr,代码行数:7,代码来源:importer.py
示例4: reset
def reset(self):
try:
SGMLParser.reset(self)
self.urls = []
except Exception,e:
print "reset111111111111",e
return 0
开发者ID:shuanzi,项目名称:test_python,代码行数:7,代码来源:urllister.py
示例5: __init__
def __init__( self, verbose=0 ):
SGMLParser.__init__( self, verbose )
self.savedata = None
self.title = ''
self.metatags = {}
self.body = ''
开发者ID:goschtl,项目名称:zope,代码行数:7,代码来源:utils.py
示例6: reset
def reset(self):
SGMLParser.reset(self)
self.addText = False
self.currentMeaning = ''
self.meanings = []
self.processingMeaning = False
self.processMoreMeanings = True
开发者ID:PepeGuerrero,项目名称:LearningPython,代码行数:7,代码来源:rae.py
示例7: reset
def reset(self):
SGMLParser.reset(self)
self.majors = {}
self.inside_majors = False
self.current_name = None
self.capture = False
self.data = ''
开发者ID:tpetr,项目名称:schedr,代码行数:7,代码来源:importer.py
示例8: __init__
def __init__(self, httpResponse, normalizeMarkup=True, verbose=0):
abstractParser.__init__( self, httpResponse )
SGMLParser.__init__(self, verbose)
# Set some constants
self._tagsContainingURLs = ('go', 'a','img', 'link', 'script', 'iframe', 'object',
'embed', 'area', 'frame', 'applet', 'input', 'base',
'div', 'layer', 'ilayer', 'bgsound', 'form')
self._urlAttrs = ('href', 'src', 'data', 'action' )
# And some internal variables
self._tag_and_url = []
self._parsed_URLs = []
self._re_URLs = []
self._encoding = httpResponse.getCharset()
self._forms = []
self._insideForm = False
self._insideSelect = False
self._insideTextarea = False
self._insideScript = False
self._commentsInDocument = []
self._scriptsInDocument = []
# Meta tags
self._metaRedirs = []
self._metaTags = []
self._normalizeMarkup = normalizeMarkup
# Fill self._re_URLs list with url objects
self._regex_url_parse( httpResponse )
# Now we are ready to work
self._preParse( httpResponse )
开发者ID:adambaldwin2,项目名称:test,代码行数:34,代码来源:sgmlParser.py
示例9: __init__
def __init__(self,url):
SGMLParser.__init__(self)
SGMLParser.reset(self)
self.vul_value = []
self.url_value = ""
self.is_table = None
self.is_date = None
self.is_tr = None
self.is_span = None
self.is_vul_name = None
self.is_td = None
self.is_tr = None
self.is_name = None
self.is_vul = None
self.is_name_desc = None
self.is_date_desc = None
self.is_cvss_desc = None
self.is_a = None
self.is_cvss = None
self.is_href_desc = None
self.is_url = None
self.is_vul_desc = None
self.table_count = 0
self.cvss_name = ""
self.vul_name = ""
self.href_name = ""
self.vul_type_name = ""
self.date_name = ""
self.td_count = 0
self.cve_name = ""
self.a_count = 0
self.desc_name = ""
self.url_name = url
开发者ID:forprevious,项目名称:auto_crawler,代码行数:35,代码来源:uscert_info.py
示例10: __init__
def __init__(self):
BaseModule.__init__(self)
SGMLParser.__init__(self)
self.is_b = 0
self.is_dl = 0
self.is_p = 0
self.is_br = 0
self.is_tr = 0
self.is_td = 0
self.is_a = 0
self.is_span = 0
self.is_font = 0
self.is_li = 0
self.is_div = 0
self.is_dt = 0
self.is_ul = 0
self.is_dd = 0
self.is_table = 0
self.is_em = 0
self.is_tbody = 0
self.is_img = 0
self.is_h1 = 0
self.is_h3 = 0
self.is_i = 0
self.req = None
开发者ID:lixiaoyu-code,项目名称:jreading.com,代码行数:27,代码来源:base_module.py
示例11: reset
def reset(self):
SGMLParser.reset(self)
self.name = "Jokes2go.com"
self.url = "http://www.jokes2go.com/cgi-perl/randjoke.cgi?type=j"
self.quote = []
self.inside_pre_element = False # indicates that the parser is currently processing the content inside <pre></pre> tag
self.current_quote = ""
开发者ID:Awkee,项目名称:cairo-dock-plug-ins-extras,代码行数:7,代码来源:JokestogoParser.py
示例12: reset
def reset(self):
self.enum = 0
self.enumstrs = ("", " * ", " + ")
self.enumbrks = ("", " ", " ")
self.pieces = []
self.refs = []
SGMLParser.reset(self)
开发者ID:frugalware,项目名称:homepage-ng,代码行数:7,代码来源:post-receive-news.py
示例13: __init__
def __init__(self, input_file, mapping):
input_file.seek(0)
self.input_file = input_file
self.mapping = mapping
self.current_offset = 0
self.count = 0
SGMLParser.__init__(self)
开发者ID:adamar,项目名称:wikidump,代码行数:7,代码来源:parser.py
示例14: __init__
def __init__(self,username,password):
SGMLParser.__init__(self)
self.username = username
self.password = password
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)
开发者ID:Timeship,项目名称:Pycode,代码行数:7,代码来源:renren.py
示例15: reset
def reset(self):
SGMLParser.reset(self)
self.in_a_watchers = 0
self.in_a_forks = 0
self.in_div = 0
self.in_fork_flag = 0
self.exist_in_fork_flag = 0 ##### 유뮤 체크 (만에하나라도 없을수 있는 것은 유무 체크를 해야한다) 모듈화 해서 쓸수 있는 방법이 없을려나... ㅜ
开发者ID:YSHCI,项目名称:GithubParser,代码行数:7,代码来源:parseSourceV1_0.py
示例16: __init__
def __init__(self, base, username, password):
SGMLParser.__init__(self)
self.h3 = False
self.h3_is_ready = False
self.div = False
self.h3_and_div = False
self.a = False
self.depth = 0
self.names = ""
self.dic = {}
self.base = base
self.fastloginfield = "username"
self.username = username
self.password = password
self.quickforward = "yes"
self.handlekey = "ls"
try:
cookie = cookielib.CookieJar()
cookieProc = urllib2.HTTPCookieProcessor(cookie)
except:
raise
else:
opener = urllib2.build_opener(cookieProc)
urllib2.install_opener(opener)
开发者ID:HDRorz,项目名称:python,代码行数:25,代码来源:DiscuzLogin.py
示例17: reset
def reset(self):
SGMLParser.reset(self)
self.url = "http://www.100blagues.com/random"
self.quote = [] # list of quotes to be filled
self.inside_div_element = False # indicates if the parser is inside the <div class="left comment">...</div> tag
self.inside_div_a_element = False # indicates if the parser is inside the <div class="left comment">...<a href>...</a>...</div> tag
self.current_quote = ""
开发者ID:Awkee,项目名称:cairo-dock-plug-ins-extras,代码行数:7,代码来源:HundredblaguesParser.py
示例18: __init__
def __init__(self):
self.url = None
self.links = []
self.linkpos = {}
self.images = []
# Keywords
self.keywords = []
# Description of page
self.description = ''
# Title of page
self.title = ''
self.title_flag = True
# Fix for <base href="..."> links
self.base_href = False
# Base url for above
self.base = None
# anchor links flag
self._anchors = True
# For META robots tag
self.can_index = True
self.can_follow = True
# Current tag
self._tag = ''
SGMLParser.__init__(self)
# Type
self.typ = 0
开发者ID:pombredanne,项目名称:harvestman-crawler-2,代码行数:26,代码来源:pageparser.py
示例19: reset
def reset(self):
SGMLParser.reset(self)
self.url = []
self.is_td = False
self.is_just = False
self.is_target = True
self.next_url = []
开发者ID:kymo,项目名称:CrawlerBBS,代码行数:7,代码来源:parser.py
示例20: reset
def reset(self):
SGMLParser.reset(self)
self.url = "http://www.vitadimerda.it/aleatorie"
self.quote = [] # list of quotes to be filled
self.inside_div_element = False # indicates if the parser is inside the <div></div> tag
self.inside_div_p_element = False
self.current_quote = ""
开发者ID:Awkee,项目名称:cairo-dock-plug-ins-extras,代码行数:7,代码来源:VitadimerdaParser.py
注:本文中的sgmllib.SGMLParser类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论