• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python sgmllib.SGMLParser类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sgmllib.SGMLParser的典型用法代码示例。如果您正苦于以下问题:Python SGMLParser类的具体用法?Python SGMLParser怎么用?Python SGMLParser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了SGMLParser类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: __init__

	def __init__(self):
		SGMLParser.__init__(self)
		self.is_div = ""
		self.name = []
		self.is_a = ""
		self.div_counter = 0
		self.a_counter = 0
开发者ID:mrobot2,项目名称:GitHubProjectParser,代码行数:7,代码来源:github.py


示例2: __init__

    def __init__(self, email, password):
        SGMLParser.__init__(self)
        self.h3 = False
        self.h3_is_ready = False
        self.div = False
        self.h3_and_div = False
        self.a = False
        self.depth = 0
        self.names = ""
        self.dic = {}

        self.email = email
        self.password = password
        self.domain = 'renren.com'
        self.file = None
        self.friend_file = None
        self.ghost = Ghost()
        self.cookie = None
        self.group_url = "http://friend.renren.com/groupsdata"
        self.group_home = "http://friend.renren.com/managefriends"
        self.file_url = "renren_cookie.txt"

        self.mongodb = MongoClient("127.0.0.1", 27017)


        try:
            self.cookie = cookielib.LWPCookieJar(self.file_url)
            cookieProc = urllib2.HTTPCookieProcessor(self.cookie)
        except:
            raise
        else:
            opener = urllib2.build_opener(cookieProc)
            urllib2.install_opener(opener)

        print "init finished successfully!!"
开发者ID:Aireed,项目名称:self-code,代码行数:35,代码来源:test_renren.py


示例3: __init__

 def __init__(self, term, verbose=False):
     SGMLParser.__init__(self)
     self.inside_pre = False
     self.inside_a = False
     self.term = term
     self.verbose = verbose
     self.major = Major.objects.all()[0]
开发者ID:tpetr,项目名称:schedr,代码行数:7,代码来源:importer.py


示例4: reset

 def reset(self):
     try:
         SGMLParser.reset(self)
         self.urls = []
     except Exception,e:
         print "reset111111111111",e
         return 0
开发者ID:shuanzi,项目名称:test_python,代码行数:7,代码来源:urllister.py


示例5: __init__

    def __init__( self, verbose=0 ):

        SGMLParser.__init__( self, verbose )
        self.savedata = None
        self.title = ''
        self.metatags = {}
        self.body = ''
开发者ID:goschtl,项目名称:zope,代码行数:7,代码来源:utils.py


示例6: reset

 def reset(self):
     SGMLParser.reset(self) 
     self.addText = False
     self.currentMeaning = ''
     self.meanings = []
     self.processingMeaning = False
     self.processMoreMeanings = True
开发者ID:PepeGuerrero,项目名称:LearningPython,代码行数:7,代码来源:rae.py


示例7: reset

 def reset(self):
     SGMLParser.reset(self)
     self.majors = {}
     self.inside_majors = False
     self.current_name = None
     self.capture = False
     self.data = ''
开发者ID:tpetr,项目名称:schedr,代码行数:7,代码来源:importer.py


示例8: __init__

    def __init__(self, httpResponse, normalizeMarkup=True, verbose=0):
        abstractParser.__init__( self, httpResponse )
        SGMLParser.__init__(self, verbose)

        # Set some constants
        self._tagsContainingURLs =  ('go', 'a','img', 'link', 'script', 'iframe', 'object',
                'embed', 'area', 'frame', 'applet', 'input', 'base',
                'div', 'layer', 'ilayer', 'bgsound', 'form')
        self._urlAttrs = ('href', 'src', 'data', 'action' )
        
        # And some internal variables
        self._tag_and_url = []
        self._parsed_URLs = []
        self._re_URLs = []
        self._encoding = httpResponse.getCharset()
        self._forms = []
        self._insideForm = False
        self._insideSelect = False
        self._insideTextarea = False
        self._insideScript = False
        self._commentsInDocument = []
        self._scriptsInDocument = []
        
        # Meta tags
        self._metaRedirs = []
        self._metaTags = []
        
        self._normalizeMarkup = normalizeMarkup
        
        #    Fill self._re_URLs list with url objects
        self._regex_url_parse( httpResponse )
        
        # Now we are ready to work
        self._preParse( httpResponse )
开发者ID:adambaldwin2,项目名称:test,代码行数:34,代码来源:sgmlParser.py


示例9: __init__

 def __init__(self,url):
     SGMLParser.__init__(self)                             
     SGMLParser.reset(self)
     self.vul_value = []
     self.url_value = ""
     
     self.is_table = None
     self.is_date = None
     self.is_tr = None
     self.is_span = None
     self.is_vul_name = None
     self.is_td = None
     self.is_tr = None
     self.is_name = None
     self.is_vul = None
     self.is_name_desc = None
     self.is_date_desc = None
     self.is_cvss_desc = None
     self.is_a = None
     self.is_cvss = None
     self.is_href_desc = None
     self.is_url = None
     self.is_vul_desc = None
     
     self.table_count = 0
     self.cvss_name = ""
     self.vul_name = ""
     self.href_name = ""
     self.vul_type_name = ""
     self.date_name = ""
     self.td_count = 0
     self.cve_name = ""
     self.a_count = 0
     self.desc_name = ""
     self.url_name = url    
开发者ID:forprevious,项目名称:auto_crawler,代码行数:35,代码来源:uscert_info.py


示例10: __init__

    def __init__(self):
        BaseModule.__init__(self)
        SGMLParser.__init__(self)
     
        self.is_b = 0
        self.is_dl = 0
        self.is_p = 0
        self.is_br = 0
        self.is_tr = 0
        self.is_td = 0
        self.is_a = 0
        self.is_span = 0
        self.is_font = 0
        self.is_li = 0
        self.is_div = 0
        self.is_dt = 0
        self.is_ul = 0
        self.is_dd = 0
        self.is_table = 0
        self.is_em = 0
        self.is_tbody = 0
        self.is_img = 0
        self.is_h1 = 0
        self.is_h3 = 0
        self.is_i = 0

        self.req = None
开发者ID:lixiaoyu-code,项目名称:jreading.com,代码行数:27,代码来源:base_module.py


示例11: reset

 def reset(self):                              
   SGMLParser.reset(self)
   self.name = "Jokes2go.com"
   self.url = "http://www.jokes2go.com/cgi-perl/randjoke.cgi?type=j"
   self.quote = []
   self.inside_pre_element = False                                             # indicates that the parser is currently processing the content inside <pre></pre> tag
   self.current_quote = ""
开发者ID:Awkee,项目名称:cairo-dock-plug-ins-extras,代码行数:7,代码来源:JokestogoParser.py


示例12: reset

	def reset(self):
		self.enum = 0
		self.enumstrs = ("", "  * ", "	   + ")
		self.enumbrks = ("", "    ", "	     ")
		self.pieces = []
		self.refs = []
		SGMLParser.reset(self)
开发者ID:frugalware,项目名称:homepage-ng,代码行数:7,代码来源:post-receive-news.py


示例13: __init__

 def __init__(self, input_file, mapping):
   input_file.seek(0)
   self.input_file = input_file
   self.mapping = mapping
   self.current_offset = 0
   self.count = 0
   SGMLParser.__init__(self)
开发者ID:adamar,项目名称:wikidump,代码行数:7,代码来源:parser.py


示例14: __init__

 def __init__(self,username,password):
     SGMLParser.__init__(self)
     self.username = username
     self.password = password
     cj = cookielib.CookieJar()
     opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
     urllib2.install_opener(opener)
开发者ID:Timeship,项目名称:Pycode,代码行数:7,代码来源:renren.py


示例15: reset

 def reset(self):
     SGMLParser.reset(self)
     self.in_a_watchers = 0
     self.in_a_forks = 0
     self.in_div = 0
     self.in_fork_flag = 0
     self.exist_in_fork_flag = 0  ##### 유뮤 체크 (만에하나라도 없을수 있는 것은 유무 체크를 해야한다) 모듈화 해서 쓸수 있는 방법이 없을려나... ㅜ
开发者ID:YSHCI,项目名称:GithubParser,代码行数:7,代码来源:parseSourceV1_0.py


示例16: __init__

    def __init__(self, base, username, password):
        SGMLParser.__init__(self)
        self.h3 = False
        self.h3_is_ready = False
        self.div = False
        self.h3_and_div = False
        self.a = False
        self.depth = 0
        self.names = ""
        self.dic = {}

        self.base = base
        self.fastloginfield = "username"
        self.username = username
        self.password = password
        self.quickforward = "yes"
        self.handlekey = "ls"
        try:
            cookie = cookielib.CookieJar()
            cookieProc = urllib2.HTTPCookieProcessor(cookie)
        except:
            raise
        else:
            opener = urllib2.build_opener(cookieProc)
            urllib2.install_opener(opener)
开发者ID:HDRorz,项目名称:python,代码行数:25,代码来源:DiscuzLogin.py


示例17: reset

 def reset(self):
   SGMLParser.reset(self)
   self.url = "http://www.100blagues.com/random"
   self.quote = []                                         # list of quotes to be filled
   self.inside_div_element = False                         # indicates if the parser is inside the <div class="left comment">...</div> tag
   self.inside_div_a_element = False                       # indicates if the parser is inside the <div class="left comment">...<a href>...</a>...</div> tag
   self.current_quote = ""
开发者ID:Awkee,项目名称:cairo-dock-plug-ins-extras,代码行数:7,代码来源:HundredblaguesParser.py


示例18: __init__

 def __init__(self):
     self.url = None
     self.links = []
     self.linkpos = {}
     self.images = []
     # Keywords
     self.keywords = []
     # Description of page
     self.description = ''
     # Title of page
     self.title = ''
     self.title_flag = True
     # Fix for <base href="..."> links
     self.base_href = False
     # Base url for above
     self.base = None
     # anchor links flag
     self._anchors = True
     # For META robots tag
     self.can_index = True
     self.can_follow = True
     # Current tag
     self._tag = ''
     SGMLParser.__init__(self)
     # Type
     self.typ = 0
开发者ID:pombredanne,项目名称:harvestman-crawler-2,代码行数:26,代码来源:pageparser.py


示例19: reset

 def reset(self):
     SGMLParser.reset(self)
     self.url = []
     self.is_td = False
     self.is_just = False
     self.is_target = True
     self.next_url = []
开发者ID:kymo,项目名称:CrawlerBBS,代码行数:7,代码来源:parser.py


示例20: reset

 def reset(self):
   SGMLParser.reset(self)
   self.url = "http://www.vitadimerda.it/aleatorie"
   self.quote = []                                         # list of quotes to be filled
   self.inside_div_element = False                         # indicates if the parser is inside the <div></div> tag
   self.inside_div_p_element = False
   self.current_quote = ""
开发者ID:Awkee,项目名称:cairo-dock-plug-ins-extras,代码行数:7,代码来源:VitadimerdaParser.py



注:本文中的sgmllib.SGMLParser类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python ItemMan.ItemManager类代码示例发布时间:2022-05-27
下一篇:
Python HTMLParser.HTMLParser类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap