• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python re.finditer函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中re.finditer函数的典型用法代码示例。如果您正苦于以下问题:Python finditer函数的具体用法?Python finditer怎么用?Python finditer使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了finditer函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: get_pronoun_label_zh

def get_pronoun_label_zh(line):
	f_pronouns = ['我', '我们', '我 的']
	s_pronouns = ['你', '你们', '你 的']
	f_count = 0
	s_count = 0
	f_positions = []
	s_positions = []
	for pro in f_pronouns:
		f_zh = re.findall('^' + pro + ' ', line) + re.findall(' ' + pro + ' ', line) + re.findall(' ' + pro + '$', line)
		f_positions += [m.span()[0] for m in re.finditer('^' + pro + ' ', line)] +  [m.span()[0] for m in re.finditer(' ' + pro + ' ', line)] + [m.span()[0] for m in re.finditer(' ' + pro + '$', line)]
		f_count += len(f_zh)
	for pro in s_pronouns:
		s_zh = re.findall('^' + pro + ' ', line) + re.findall(' ' + pro + ' ', line) + re.findall(' ' + pro + '$', line)
		s_positions += [m.span()[0] for m in re.finditer('^' + pro + ' ', line)] +  [m.span()[0] for m in re.finditer(' ' + pro + ' ', line)] + [m.span()[0] for m in re.finditer(' ' + pro + '$', line)]
		s_count += len(s_zh)

	if f_count == 0 and s_count == 0:
		return ('none', 0, 0, [], [])
	if(f_count == s_count):
		f_min = min(f_positions)
		s_min = min(s_positions)
		starts_with = '1v' if f_min < s_min else '2v'
		return (starts_with, f_count, s_count, f_zh, s_zh)
		#return ('1v', f_count, s_count, f_zh, s_zh)
	elif(f_count > s_count):
		return ('1v', f_count, s_count, f_zh, s_zh)
	else:
		return ('2v', f_count, s_count, f_zh, s_zh)
开发者ID:raosudha89,项目名称:pronoun_restoration,代码行数:28,代码来源:to_vw_input_format.py


示例2: ParseMethodAnnotation

  def ParseMethodAnnotation(self, annotation):
    if annotation.find('reservable = true') >= 0:
      self._is_reservable = True

    delegate_re = re.compile('delegate\s*=\s*'
        '(?P<delegate>(true|false))')
    for match in re.finditer(delegate_re, annotation):
      delegate = match.group('delegate')
      if delegate == 'true':
        self._is_delegate = True
      elif delegate == 'false':
        self._is_delegate = False

    disable_reflect_method_re = re.compile('disableReflectMethod\s*=\s*'
        '(?P<disableReflectMethod>(true|false))')
    for match in re.finditer(disable_reflect_method_re, annotation):
      disable_reflect_method = match.group('disableReflectMethod')
      if disable_reflect_method == 'true':
        self._disable_reflect_method = True
      else:
        self._disable_reflect_method = False

    pre_wrapline_re = re.compile('preWrapperLines\s*=\s*\{\s*('
        '?P<pre_wrapline>(".*")(,\s*".*")*)\s*\}')
    for match in re.finditer(pre_wrapline_re, annotation):
      pre_wrapline = self.FormatWrapperLine(match.group('pre_wrapline'))
      self._method_annotations[self.ANNOTATION_PRE_WRAPLINE] = pre_wrapline

    post_wrapline_re = re.compile('postWrapperLines\s*=\s*\{\s*('
        '?P<post_wrapline>(".*")(,\s*".*")*)\s*\}')
    for match in re.finditer(post_wrapline_re, annotation):
      post_wrapline = self.FormatWrapperLine(match.group('post_wrapline'))
      self._method_annotations[self.ANNOTATION_POST_WRAPLINE] = post_wrapline
开发者ID:Avanznow,项目名称:crosswalk,代码行数:33,代码来源:java_method.py


示例3: scan_page

def scan_page(url, data=None):
    retval, usable = False, False
    url, data = re.sub(r"=(&|\Z)", "=1\g<1>", url) if url else url, re.sub(r"=(&|\Z)", "=1\g<1>", data) if data else data
    try:
        for phase in (GET, POST):
            current = url if phase is GET else (data or "")
            for match in re.finditer(r"((\A|[?&])(?P<parameter>[\w\[\]]+)=)(?P<value>[^&]+)", current):
                found, usable = False, True
                print "* scanning %s parameter '%s'" % (phase, match.group("parameter"))
                prefix, suffix = ("".join(random.sample(string.ascii_lowercase, PREFIX_SUFFIX_LENGTH)) for i in xrange(2))
                for pool in (LARGER_CHAR_POOL, SMALLER_CHAR_POOL):
                    if not found:
                        tampered = current.replace(match.group(0), "%s%s" % (match.group(0), urllib.quote("%s%s%s%s" % ("'" if pool == LARGER_CHAR_POOL else "", prefix, "".join(random.sample(pool, len(pool))), suffix))))
                        content = (_retrieve_content(tampered, data) if phase is GET else _retrieve_content(url, tampered)).replace("%s%s" % ("'" if pool == LARGER_CHAR_POOL else "", prefix), prefix)
                        for sample in re.finditer("%s([^ ]+?)%s" % (prefix, suffix), content, re.I):
                            for regex, condition, info, content_removal_regex in XSS_PATTERNS:
                                context = re.search(regex % {"chars": re.escape(sample.group(0))}, re.sub(content_removal_regex or "", "", content), re.I)
                                if context and not found and sample.group(1).strip():
                                    if _contains(sample.group(1), condition):
                                        print " (i) %s parameter '%s' appears to be XSS vulnerable (%s)" % (phase, match.group("parameter"), info % dict((("filtering", "no" if all(char in sample.group(1) for char in LARGER_CHAR_POOL) else "some"),)))
                                        found = retval = True
                                    break
        if not usable:
            print " (x) no usable GET/POST parameters found"
    except KeyboardInterrupt:
        print "\r (x) Ctrl-C pressed"
    return retval
开发者ID:brock7,项目名称:scripts,代码行数:27,代码来源:dsxs.py


示例4: tableViewInHierarchy

def tableViewInHierarchy():
    viewDescription = fb.evaluateExpressionValue(
        "(id)[(id)[[UIApplication sharedApplication] keyWindow] recursiveDescription]"
    ).GetObjectDescription()

    searchView = None

    # Try to find an instance of
    classPattern = re.compile(r"UITableView: (0x[0-9a-fA-F]+);")
    for match in re.finditer(classPattern, viewDescription):
        searchView = match.group(1)
        break

    # Try to find a direct subclass
    if not searchView:
        subclassPattern = re.compile(r"(0x[0-9a-fA-F]+); baseClass = UITableView;")
        for match in re.finditer(subclassPattern, viewDescription):
            searchView = match.group(1)
            break

    # SLOW: check every pointer in town
    if not searchView:
        pattern = re.compile(r"(0x[0-9a-fA-F]+)[;>]")
        for view in re.findall(pattern, viewDescription):
            if fb.evaluateBooleanExpression("[" + view + " isKindOfClass:(id)[UITableView class]]"):
                searchView = view
                break

    return searchView
开发者ID:kristiandelay,项目名称:chisel,代码行数:29,代码来源:FBPrintCommands.py


示例5: setupTranslations

def setupTranslations(type, locales, projectName, key):
  # Copy locales list, we don't want to change the parameter
  locales = set(locales)

  # Fill up with locales that we don't have but the browser supports
  if type == 'chrome':
    for locale in chromeLocales:
      locales.add(locale)
  else:
    firefoxLocales = urllib2.urlopen('http://www.mozilla.org/en-US/firefox/all.html').read()
    for match in re.finditer(r'&amp;lang=([\w\-]+)"', firefoxLocales):
      locales.add(mapLocale(type, match.group(1)))
    langPacks = urllib2.urlopen('https://addons.mozilla.org/en-US/firefox/language-tools/').read()
    for match in re.finditer(r'<tr>.*?</tr>', langPacks, re.S):
      if match.group(0).find('Install Language Pack') >= 0:
        match2 = re.search(r'lang="([\w\-]+)"', match.group(0))
        if match2:
          locales.add(mapLocale(type, match2.group(1)))

  # Convert locale codes to the ones that Crowdin will understand
  locales = set(map(lambda locale: mapLocale(type, locale), locales))

  allowed = set()
  allowedLocales = urllib2.urlopen('http://crowdin.net/page/language-codes').read()
  for match in re.finditer(r'<tr>\s*<td\b[^<>]*>([\w\-]+)</td>', allowedLocales, re.S):
    allowed.add(match.group(1))
  if not allowed.issuperset(locales):
    print 'Warning, following locales aren\'t allowed by server: ' + ', '.join(locales - allowed)

  locales = list(locales & allowed)
  locales.sort()
  params = urllib.urlencode([('languages[]', locale) for locale in locales])
  result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/edit-project?key=%s' % (projectName, key), params).read()
  if result.find('<success') < 0:
    raise Exception('Server indicated that the operation was not successful\n' + result)
开发者ID:chinnurtb,项目名称:buildtools,代码行数:35,代码来源:localeTools.py


示例6: consistency_check

def consistency_check(text, word_pairs, err, msg, offset=0):
    """Build a consistency checker for the given word_pairs."""
    errors = []

    msg = " ".join(msg.split())

    for w in word_pairs:
        match1 = [m for m in re.finditer(w[0], text)]
        match2 = [m for m in re.finditer(w[1], text)]

        if len(match1) > 0 and len(match2) > 0:

            if len(match1) > len(match2):
                for m in match2:
                    errors.append((
                        m.start() + offset,
                        m.end() + offset,
                        err,
                        msg.format(m.group(0), w[0])))
            else:
                for m in match1:
                    errors.append((
                        m.start() + offset,
                        m.end() + offset,
                        err,
                        msg.format(m.group(0), w[1])))

    return errors
开发者ID:ComSecNinja,项目名称:proselint,代码行数:28,代码来源:tools.py


示例7: get_sources

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            match = re.search('This movie is of poor quality', html, re.I)
            if match:
                quality = QUALITIES.LOW
            else:
                quality = QUALITIES.HIGH

            for match in re.finditer('href="([^"]+/embed\d*/[^"]+)', html):
                url = match.group(1)
                embed_html = self._http_get(url, cache_limit=.5)
                r = re.search('{\s*write\("([^"]+)', embed_html)
                if r:
                    plaintext = self._caesar(r.group(1), 13).decode('base-64')
                    if 'http' not in plaintext:
                        plaintext = self._caesar(r.group(1).decode('base-64'), 13).decode('base-64')
                else:
                    plaintext = embed_html
                hosters += self._get_links(plaintext)
            
            pattern = 'href="([^"]+)".*play_video.gif'
            for match in re.finditer(pattern, html, re.I):
                url = match.group(1)
                host = urlparse.urlparse(url).hostname
                hoster = {'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, quality), 'rating': None, 'views': None, 'direct': False}
                hosters.append(hoster)
        return hosters
开发者ID:assli100,项目名称:kodi-openelec,代码行数:32,代码来源:afdah_scraper.py


示例8: _generate_entry_probe

        def _generate_entry_probe(self):
                # Any $entry(name) expressions result in saving that argument
                # when entering the function.
                self.args_to_probe = set()
                regex = r"\$entry\((\w+)\)"
                for expr in self.exprs:
                        for arg in re.finditer(regex, expr):
                                self.args_to_probe.add(arg.group(1))
                for arg in re.finditer(regex, self.filter):
                        self.args_to_probe.add(arg.group(1))
                if any(map(lambda expr: "$latency" in expr, self.exprs)) or \
                   "$latency" in self.filter:
                        self.args_to_probe.add("__latency")
                        self.param_types["__latency"] = "u64"    # nanoseconds
                for pname in self.args_to_probe:
                        if pname not in self.param_types:
                                raise ValueError("$entry(%s): no such param" \
                                                % arg)

                self.hashname_prefix = "%s_param_" % self.probe_hash_name
                text = ""
                for pname in self.args_to_probe:
                        # Each argument is stored in a separate hash that is
                        # keyed by pid.
                        text += "BPF_HASH(%s, u32, %s);\n" % \
                             (self.hashname_prefix + pname,
                              self.param_types[pname])
                text += self._generate_entry()
                return text
开发者ID:att-innovate,项目名称:bcc,代码行数:29,代码来源:argdist.py


示例9: list_show_page

 def list_show_page(self, url, page, seasons=False, episodes=False):
     result = []
     if "/p/epizody" in url or "/p/epiz%C3%B3dy" in url or "p/archiv" in url:
         if seasons:
             season_data = util.substr(page, SERIES_START2, SERIES_END2)
             for m in re.finditer(SERIES_ITER_RE2, season_data, re.DOTALL | re.IGNORECASE):
                 item = self.dir_item()
                 item['title'] = m.group('title')
                 item['url'] = url + '#post=%s' % (m.group('id'))
                 self._filter(result, item)
         if episodes:
             for m in re.finditer(EPISODE_ITER_RE2, page, re.DOTALL | re.IGNORECASE):
                 item = self.video_item()
                 item['title'] = "%s (%s)" % (m.group('title'), m.group('date'))
                 item['url'] = m.group('url')
                 self._filter(result, item)
     else:
         if seasons:
             season_data = util.substr(page, SERIES_START, SERIES_END)
             for m in re.finditer(SERIES_ITER_RE, season_data, re.DOTALL | re.IGNORECASE):
                 item = self.dir_item()
                 item['title'] = m.group('title')
                 item['url'] = 'http://' + urlparse.urlparse(url).netloc + '/ajax.json?' + m.group('url')
                 self._filter(result, item)
         if episodes:
             episodes_data = util.substr(page, EPISODE_START, EPISODE_END)
             for m in re.finditer(EPISODE_ITER_RE, page, re.DOTALL | re.IGNORECASE):
                 item = self.video_item()
                 item['title'] = "%s. %s (%s)" % (m.group('episode'), m.group('title'), m.group('date'))
                 item['url'] = m.group('url')
                 self._filter(result, item)
     return result
开发者ID:Jakub2013,项目名称:plugin.video.joj.sk,代码行数:32,代码来源:joj.py


示例10: dotransform

def dotransform(request, response):
  
  emailaddr = []
  msgfile = request.value
  lookFor = ['To', 'From']
  tmpfolder = request.fields['sniffMyPackets.outputfld']
  
  with open(msgfile, mode='r') as msgfile:
    reader = msgfile.read()
    reader = str(reader)
    for x in lookFor:
      if x in reader:
        for s in re.finditer('RCPT TO: <([\w.-][email protected][\w.-]+)>', reader):
          to_addr = s.group(1), 'mail_to'
          emailaddr.append(to_addr)
        for t in re.finditer('MAIL FROM: <([\w.-][email protected][\w.-]+)>', reader):
          from_addr = t.group(1), 'mail_from'
          emailaddr.append(from_addr)

  
	
  for addr, addrfield in emailaddr:
    e = EmailAddress(addr)
    e.linklabel = addrfield
    e += Field('filelocation', request.value, displayname='File Location', matchingrule='loose')
    e += Field('emailaddr', addrfield, displayname='Header Info')
    response += e
  return response
开发者ID:FomkaV,项目名称:wifi-arsenal,代码行数:28,代码来源:smtpaddress.py


示例11: list_archive_page

 def list_archive_page(self, show_page, showon=False, showoff=False):
     showonlist = []
     if showon:
         page = util.substr(show_page, VYSIELANE_START, NEVYSIELANE_START)
         for m in re.finditer(VYSIELANE_ITER_RE, page, re.DOTALL | re.IGNORECASE):
             item = self.dir_item()
             item['title'] = m.group('title')
             item['plot'] = m.group('desc')
             item['url'] = m.group('url') + "#season_episode"
             if m.group('itime') is not None:
                 item['type'] = "showon7d"
             else:
                 item['type'] = "showon"
             showonlist.append(item)
     showonlist.sort(key=lambda x: x['title'].lower())
     showofflist = []
     if showoff:
         page = util.substr(show_page, NEVYSIELANE_START, NEVYSIELANE_END)
         for m in re.finditer(NEVYSIELANE_ITER_RE, page, re.DOTALL | re.IGNORECASE):
             item = self.dir_item()
             item['title'] = m.group('title')
             item['url'] = m.group('url') + "#season_episode"
             item['type'] = "showoff"
             showofflist.append(item)
     showofflist.sort(key=lambda x: x['title'].lower())
     result = showonlist + showofflist
     return result
开发者ID:Jakub2013,项目名称:plugin.video.joj.sk,代码行数:27,代码来源:joj.py


示例12: setupTranslations

def setupTranslations(localeConfig, projectName, key):
  # Make a new set from the locales list, mapping to Crowdin friendly format
  locales = {mapLocale(localeConfig['name_format'], locale)
             for locale in localeConfig['locales']}

  # Fill up with locales that we don't have but the browser supports
  if 'chrome' in localeConfig['target_platforms']:
    for locale in chromeLocales:
      locales.add(mapLocale('ISO-15897', locale))

  if 'gecko' in localeConfig['target_platforms']:
    firefoxLocales = urllib2.urlopen('http://www.mozilla.org/en-US/firefox/all.html').read()
    for match in re.finditer(r'&amp;lang=([\w\-]+)"', firefoxLocales):
      locales.add(mapLocale('BCP-47', match.group(1)))
    langPacks = urllib2.urlopen('https://addons.mozilla.org/en-US/firefox/language-tools/').read()
    for match in re.finditer(r'<tr>.*?</tr>', langPacks, re.S):
      if match.group(0).find('Install Language Pack') >= 0:
        match2 = re.search(r'lang="([\w\-]+)"', match.group(0))
        if match2:
          locales.add(mapLocale('BCP-47', match2.group(1)))

  allowed = set()
  allowedLocales = urllib2.urlopen('http://crowdin.net/page/language-codes').read()
  for match in re.finditer(r'<tr>\s*<td\b[^<>]*>([\w\-]+)</td>', allowedLocales, re.S):
    allowed.add(match.group(1))
  if not allowed.issuperset(locales):
    print 'Warning, following locales aren\'t allowed by server: ' + ', '.join(locales - allowed)

  locales = list(locales & allowed)
  locales.sort()
  params = urllib.urlencode([('languages[]', locale) for locale in locales])
  result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/edit-project?key=%s' % (projectName, key), params).read()
  if result.find('<success') < 0:
    raise Exception('Server indicated that the operation was not successful\n' + result)
开发者ID:HoverHell,项目名称:abp_buildtools,代码行数:34,代码来源:localeTools.py


示例13: ExtendCurlys

	def ExtendCurlys(self, list_of_terms, target_body):
		"""
		Run FindWordsInBracketsAndCurlies first.
		Adds brackets to the same words if they have not yet received brackets.
		"""
		self.target_body = ' ' + target_body + ' '

		self.dbrackets = [m.span(0) for m in re.finditer(r"\[([\w \(\)\-,.]+)\]", self.target_body)]
		self.sbrackets = [m.span(0) for m in re.finditer(r"\{([\w \(\)\-,.]+)\}", self.target_body)]
		self.allbrackets = self.dbrackets + self.sbrackets

		def repl(matchobj):
			for span in self.allbrackets:
				if matchobj.start(0) in range(*span):
					return matchobj.group(0)

			self.curly_count += 1
			return (matchobj.group(1) + self.curly_term + matchobj.group(2))


		self.curly_count = 0
		for i, term in enumerate(list_of_terms):
			self.curly_term = '{' + term + '}'

			regex = re.compile(r"([^\{\w])%s([^\}\w])" %term, re.IGNORECASE)
			if i ==0:
				self.ecoutput = re.sub(regex, repl, self.target_body)
			else:
				self.ecoutput = re.sub(regex, repl, self.ecoutput)

		self.ecoutput = self.ecoutput[1:-1]
开发者ID:law826,项目名称:radiology_diagnoser,代码行数:31,代码来源:importnlp.py


示例14: consistency_check

def consistency_check(text, word_pairs, err, msg, offset=0):
    """Build a consistency checker for the given word_pairs."""
    errors = []

    msg = " ".join(msg.split())

    for w in word_pairs:
        matches = [
            [m for m in re.finditer(w[0], text)],
            [m for m in re.finditer(w[1], text)]
        ]

        if len(matches[0]) > 0 and len(matches[1]) > 0:

            idx_minority = len(matches[0]) > len(matches[1])

            for m in matches[idx_minority]:
                errors.append((
                    m.start() + offset,
                    m.end() + offset,
                    err,
                    msg.format(w[~idx_minority], m.group(0)),
                    w[~idx_minority]))

    return errors
开发者ID:CatherineH,项目名称:proselint,代码行数:25,代码来源:tools.py


示例15: stem_helper

def stem_helper(word, rem_umlaut = True):
	"""rem_umlat: Remove umlaut from text"""
	
	#Define R1 and R2 regions
	
	#R1 is defined as the region after the first consonant followed by a vowel
	
	try:
		R1 = list(re.finditer(r"[aëeiouäöüâêîôûæœ][bdghfcjklmnspqrtvwz]",word))[0].start() + 2
	except:
		R1 = len(word)
		
	#R2 is defined as the region within R1 after the first consonant followed by a vowel
	
	try:
		R2 = list(re.finditer(r"[aëeiouäöüâêîôûæœ][bdghfcjklmnspqrtvwz]",word[R1:]))[0].start() + 2 + R1
	except:
		R2 = len(word)
		
	#Make sure the index of R1 is at least 3. 
	
	if R1<3:
		try:
			R1 = list(re.finditer(r"[aëeiouäöüâêîôûæœ][bdghfcjklmnspqrtvwz]",word[1:]))[0].start() + 2
		except:
			R1 = len(word)
	
	if rem_umlaut:
		word = remove_umlaut(word)
	
	word = word[:R1] + re.sub(r'(wes|wen|est|ern|em|en|er|es|eȥ(?=[klmrt])s|(?=[lr])n|e)$',"",word[R1:])
	word = word[:R1] + re.sub(r'(est|er|en|re|in|iu|(?=.{3})st,word[R1:])$',"",word[R1:])
	word = word[:R2] + re.sub(r'(lich?.?.|keit|inc|isch?.?.)$',"",word[R2:])
	
	return word
开发者ID:TylerKirby,项目名称:cltk,代码行数:35,代码来源:stem.py


示例16: get_media_url

    def get_media_url(self, host, media_id):
        web_url = self.get_url(host, media_id)
        html = self.net.http_GET(web_url).content
        form_values = {}
        stream_url = ''
        for i in re.finditer('<input type="hidden" name="([^"]+)" value="([^"]+)', html):
            form_values[i.group(1)] = i.group(2)

        xbmc.sleep(2000)
        html = self.net.http_POST(web_url, form_data=form_values).content
        
        r = re.search("file\s*:\s*'([^']+)'", html)
        if r:
            stream_url = r.group(1)

        for match in re.finditer('(eval\(function.*?)</script>', html, re.DOTALL):
            js_data = jsunpack.unpack(match.group(1))
            match2 = re.search('<param\s+name="src"\s*value="([^"]+)', js_data)
            if match2:
                stream_url = match2.group(1)
            else:
                match2 = re.search('<embed.*?type="video.*?src="([^"]+)', js_data)
                if match2:
                    stream_url = match2.group(1)
            
        if stream_url:
            return stream_url + '|User-Agent=%s&Referer=%s' % (common.IE_USER_AGENT, web_url)

        raise UrlResolver.ResolverError('Unable to resolve cloudyvideos link. Filelink not found.')
开发者ID:Lynx187,项目名称:script.module.urlresolver,代码行数:29,代码来源:cloudyvideos.py


示例17: _boundary_of_alternatives_indices

def _boundary_of_alternatives_indices(pattern):
    """
    Determines the location of a set of alternatives in a glob pattern.
    Alternatives are defined by a matching set of non-bracketed parentheses.

    :param pattern: Glob pattern with wildcards.
    :return:        Indices of the innermost set of matching non-bracketed
                    parentheses in a tuple. The Index of a missing parenthesis
                    will be passed as None.
    """
    # Taking the leftmost closing parenthesis and the rightmost opening
    # parenthesis left of it ensures that the parentheses belong together and
    # the pattern is parsed correctly from the most nested section outwards.
    end_pos = None
    for match in re.finditer('\\)', pattern):
        if not _position_is_bracketed(pattern, match.start()):
            end_pos = match.start()
            break  # Break to get leftmost.

    start_pos = None
    for match in re.finditer('\\(', pattern[:end_pos]):
        if not _position_is_bracketed(pattern, match.start()):
            start_pos = match.end()
            # No break to get rightmost.

    return start_pos, end_pos
开发者ID:netman92,项目名称:coala,代码行数:26,代码来源:Globbing.py


示例18: parseHtmlForm

def parseHtmlForm(attr_str, html, input_names=None):
    for form in re.finditer(r"(?P<tag><form[^>]*%s[^>]*>)(?P<content>.*?)</?(form|body|html)[^>]*>" % attr_str, html, re.S | re.I):
        inputs = {}
        action = parseHtmlTagAttrValue("action", form.group('tag'))
        for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('content'), re.S | re.I):
            name = parseHtmlTagAttrValue("name", inputtag.group(1))
            if name:
                value = parseHtmlTagAttrValue("value", inputtag.group(1))
                if value is None:
                    inputs[name] = inputtag.group(3) or ''
                else:
                    inputs[name] = value

        if isinstance(input_names, dict):
            # check input attributes
            for key, val in input_names.items():
                if key in inputs:
                    if isinstance(val, basestring) and inputs[key] == val:
                        continue
                    elif isinstance(val, tuple) and inputs[key] in val:
                        continue
                    elif hasattr(val, "search") and re.match(val, inputs[key]):
                        continue
                    break # attibute value does not match
                else:
                    break # attibute name does not match
            else:
                return action, inputs # passed attribute check
        else:
            # no attribute check
            return action, inputs

    return {}, None # no matching form found
开发者ID:Dmanugm,项目名称:pyload,代码行数:33,代码来源:SimpleHoster.py


示例19: get_media_url

    def get_media_url(self, host, media_id):
        web_url = self.get_url(host, media_id)
        html = self.net.http_GET(web_url).content

        data = {}
        for match in re.finditer('input type="hidden" name="([^"]+)" value="([^"]+)', html):
            key, value = match.groups()
            data[key] = value
        data['method_free'] = 'Proceed to Video'

        html = self.net.http_POST(web_url, form_data=data).content

        stream_url = ''
        for match in re.finditer('(eval\(function.*?)</script>', html, re.DOTALL):
            js_data = jsunpack.unpack(match.group(1))
            match2 = re.search('<param\s+name="src"\s*value="([^"]+)', js_data)
            if match2:
                stream_url = match2.group(1)
            else:
                match2 = re.search('file\s*:\s*"([^"]+)', js_data)
                if match2:
                    stream_url = match2.group(1)

        if stream_url:
            return stream_url + '|' + urllib.urlencode({'User-Agent': common.IE_USER_AGENT, 'Referer': web_url})

        raise ResolverError('Unable to resolve grifthost link. Filelink not found.')
开发者ID:AMOboxTV,项目名称:AMOBox.LegoBuild,代码行数:27,代码来源:grifthost.py


示例20: google_scrap

 def google_scrap(self):
     #iter = re.finditer('''<!--sMSL-->([\s\S]*)<!--sMSR-->''',self.the_page)
     #for it in iter:
     #    self.res_html = it.group(1)
     iter = re.finditer('''<p>([\s\S]*?)</p>''',self.the_page)
     for it in iter:
         self.search_item.append(it.group(1))
     for i in range(len(self.search_item)):
         href = ""
         title = ""
         iter = re.finditer('''<a.href="/url\?q=[^"]*">([\s\S]*?)</a>[\s\S]*?<a.href="/search\?q=related:([^"]*)&amp;hl=">''',self.search_item[i])
         #iter = re.finditer('''<a.href="[^"]*">([^"]*)</a>[\s\S]*?<a.href="/search?q=related:([^"]*)&amp;hl=">''',self.search_item[i])
         for it in iter:
             href = it.group(2)
             if href == '':
                 break
             title = it.group(1)
             textProcess = textprocess.TextProcess(title)
             textProcess.getTitle()
             textProcess.clearPoint()
             textProcess.clearSpace()
             self.html_titles[href] = textProcess.text
         if href == '':
             continue
         textProcess = textprocess.TextProcess(self.search_item[i])
         textProcess.clearHtml()
         textProcess.clearPoint()
         textProcess.clearSpace()
         text = textProcess.text
         self.res_items[href] = [self.html_titles[href],text]
     print self.html_titles
开发者ID:SkTim,项目名称:GScraper,代码行数:31,代码来源:htmlscraper.py



注:本文中的re.finditer函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python re.fullmatch函数代码示例发布时间:2022-05-26
下一篇:
Python re.findall函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap