本文整理汇总了Python中re.finditer函数的典型用法代码示例。如果您正苦于以下问题:Python finditer函数的具体用法?Python finditer怎么用?Python finditer使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了finditer函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: get_pronoun_label_zh
def get_pronoun_label_zh(line):
f_pronouns = ['我', '我们', '我 的']
s_pronouns = ['你', '你们', '你 的']
f_count = 0
s_count = 0
f_positions = []
s_positions = []
for pro in f_pronouns:
f_zh = re.findall('^' + pro + ' ', line) + re.findall(' ' + pro + ' ', line) + re.findall(' ' + pro + '$', line)
f_positions += [m.span()[0] for m in re.finditer('^' + pro + ' ', line)] + [m.span()[0] for m in re.finditer(' ' + pro + ' ', line)] + [m.span()[0] for m in re.finditer(' ' + pro + '$', line)]
f_count += len(f_zh)
for pro in s_pronouns:
s_zh = re.findall('^' + pro + ' ', line) + re.findall(' ' + pro + ' ', line) + re.findall(' ' + pro + '$', line)
s_positions += [m.span()[0] for m in re.finditer('^' + pro + ' ', line)] + [m.span()[0] for m in re.finditer(' ' + pro + ' ', line)] + [m.span()[0] for m in re.finditer(' ' + pro + '$', line)]
s_count += len(s_zh)
if f_count == 0 and s_count == 0:
return ('none', 0, 0, [], [])
if(f_count == s_count):
f_min = min(f_positions)
s_min = min(s_positions)
starts_with = '1v' if f_min < s_min else '2v'
return (starts_with, f_count, s_count, f_zh, s_zh)
#return ('1v', f_count, s_count, f_zh, s_zh)
elif(f_count > s_count):
return ('1v', f_count, s_count, f_zh, s_zh)
else:
return ('2v', f_count, s_count, f_zh, s_zh)
开发者ID:raosudha89,项目名称:pronoun_restoration,代码行数:28,代码来源:to_vw_input_format.py
示例2: ParseMethodAnnotation
def ParseMethodAnnotation(self, annotation):
if annotation.find('reservable = true') >= 0:
self._is_reservable = True
delegate_re = re.compile('delegate\s*=\s*'
'(?P<delegate>(true|false))')
for match in re.finditer(delegate_re, annotation):
delegate = match.group('delegate')
if delegate == 'true':
self._is_delegate = True
elif delegate == 'false':
self._is_delegate = False
disable_reflect_method_re = re.compile('disableReflectMethod\s*=\s*'
'(?P<disableReflectMethod>(true|false))')
for match in re.finditer(disable_reflect_method_re, annotation):
disable_reflect_method = match.group('disableReflectMethod')
if disable_reflect_method == 'true':
self._disable_reflect_method = True
else:
self._disable_reflect_method = False
pre_wrapline_re = re.compile('preWrapperLines\s*=\s*\{\s*('
'?P<pre_wrapline>(".*")(,\s*".*")*)\s*\}')
for match in re.finditer(pre_wrapline_re, annotation):
pre_wrapline = self.FormatWrapperLine(match.group('pre_wrapline'))
self._method_annotations[self.ANNOTATION_PRE_WRAPLINE] = pre_wrapline
post_wrapline_re = re.compile('postWrapperLines\s*=\s*\{\s*('
'?P<post_wrapline>(".*")(,\s*".*")*)\s*\}')
for match in re.finditer(post_wrapline_re, annotation):
post_wrapline = self.FormatWrapperLine(match.group('post_wrapline'))
self._method_annotations[self.ANNOTATION_POST_WRAPLINE] = post_wrapline
开发者ID:Avanznow,项目名称:crosswalk,代码行数:33,代码来源:java_method.py
示例3: scan_page
def scan_page(url, data=None):
retval, usable = False, False
url, data = re.sub(r"=(&|\Z)", "=1\g<1>", url) if url else url, re.sub(r"=(&|\Z)", "=1\g<1>", data) if data else data
try:
for phase in (GET, POST):
current = url if phase is GET else (data or "")
for match in re.finditer(r"((\A|[?&])(?P<parameter>[\w\[\]]+)=)(?P<value>[^&]+)", current):
found, usable = False, True
print "* scanning %s parameter '%s'" % (phase, match.group("parameter"))
prefix, suffix = ("".join(random.sample(string.ascii_lowercase, PREFIX_SUFFIX_LENGTH)) for i in xrange(2))
for pool in (LARGER_CHAR_POOL, SMALLER_CHAR_POOL):
if not found:
tampered = current.replace(match.group(0), "%s%s" % (match.group(0), urllib.quote("%s%s%s%s" % ("'" if pool == LARGER_CHAR_POOL else "", prefix, "".join(random.sample(pool, len(pool))), suffix))))
content = (_retrieve_content(tampered, data) if phase is GET else _retrieve_content(url, tampered)).replace("%s%s" % ("'" if pool == LARGER_CHAR_POOL else "", prefix), prefix)
for sample in re.finditer("%s([^ ]+?)%s" % (prefix, suffix), content, re.I):
for regex, condition, info, content_removal_regex in XSS_PATTERNS:
context = re.search(regex % {"chars": re.escape(sample.group(0))}, re.sub(content_removal_regex or "", "", content), re.I)
if context and not found and sample.group(1).strip():
if _contains(sample.group(1), condition):
print " (i) %s parameter '%s' appears to be XSS vulnerable (%s)" % (phase, match.group("parameter"), info % dict((("filtering", "no" if all(char in sample.group(1) for char in LARGER_CHAR_POOL) else "some"),)))
found = retval = True
break
if not usable:
print " (x) no usable GET/POST parameters found"
except KeyboardInterrupt:
print "\r (x) Ctrl-C pressed"
return retval
开发者ID:brock7,项目名称:scripts,代码行数:27,代码来源:dsxs.py
示例4: tableViewInHierarchy
def tableViewInHierarchy():
viewDescription = fb.evaluateExpressionValue(
"(id)[(id)[[UIApplication sharedApplication] keyWindow] recursiveDescription]"
).GetObjectDescription()
searchView = None
# Try to find an instance of
classPattern = re.compile(r"UITableView: (0x[0-9a-fA-F]+);")
for match in re.finditer(classPattern, viewDescription):
searchView = match.group(1)
break
# Try to find a direct subclass
if not searchView:
subclassPattern = re.compile(r"(0x[0-9a-fA-F]+); baseClass = UITableView;")
for match in re.finditer(subclassPattern, viewDescription):
searchView = match.group(1)
break
# SLOW: check every pointer in town
if not searchView:
pattern = re.compile(r"(0x[0-9a-fA-F]+)[;>]")
for view in re.findall(pattern, viewDescription):
if fb.evaluateBooleanExpression("[" + view + " isKindOfClass:(id)[UITableView class]]"):
searchView = view
break
return searchView
开发者ID:kristiandelay,项目名称:chisel,代码行数:29,代码来源:FBPrintCommands.py
示例5: setupTranslations
def setupTranslations(type, locales, projectName, key):
# Copy locales list, we don't want to change the parameter
locales = set(locales)
# Fill up with locales that we don't have but the browser supports
if type == 'chrome':
for locale in chromeLocales:
locales.add(locale)
else:
firefoxLocales = urllib2.urlopen('http://www.mozilla.org/en-US/firefox/all.html').read()
for match in re.finditer(r'&lang=([\w\-]+)"', firefoxLocales):
locales.add(mapLocale(type, match.group(1)))
langPacks = urllib2.urlopen('https://addons.mozilla.org/en-US/firefox/language-tools/').read()
for match in re.finditer(r'<tr>.*?</tr>', langPacks, re.S):
if match.group(0).find('Install Language Pack') >= 0:
match2 = re.search(r'lang="([\w\-]+)"', match.group(0))
if match2:
locales.add(mapLocale(type, match2.group(1)))
# Convert locale codes to the ones that Crowdin will understand
locales = set(map(lambda locale: mapLocale(type, locale), locales))
allowed = set()
allowedLocales = urllib2.urlopen('http://crowdin.net/page/language-codes').read()
for match in re.finditer(r'<tr>\s*<td\b[^<>]*>([\w\-]+)</td>', allowedLocales, re.S):
allowed.add(match.group(1))
if not allowed.issuperset(locales):
print 'Warning, following locales aren\'t allowed by server: ' + ', '.join(locales - allowed)
locales = list(locales & allowed)
locales.sort()
params = urllib.urlencode([('languages[]', locale) for locale in locales])
result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/edit-project?key=%s' % (projectName, key), params).read()
if result.find('<success') < 0:
raise Exception('Server indicated that the operation was not successful\n' + result)
开发者ID:chinnurtb,项目名称:buildtools,代码行数:35,代码来源:localeTools.py
示例6: consistency_check
def consistency_check(text, word_pairs, err, msg, offset=0):
"""Build a consistency checker for the given word_pairs."""
errors = []
msg = " ".join(msg.split())
for w in word_pairs:
match1 = [m for m in re.finditer(w[0], text)]
match2 = [m for m in re.finditer(w[1], text)]
if len(match1) > 0 and len(match2) > 0:
if len(match1) > len(match2):
for m in match2:
errors.append((
m.start() + offset,
m.end() + offset,
err,
msg.format(m.group(0), w[0])))
else:
for m in match1:
errors.append((
m.start() + offset,
m.end() + offset,
err,
msg.format(m.group(0), w[1])))
return errors
开发者ID:ComSecNinja,项目名称:proselint,代码行数:28,代码来源:tools.py
示例7: get_sources
def get_sources(self, video):
source_url = self.get_url(video)
hosters = []
if source_url and source_url != FORCE_NO_MATCH:
url = urlparse.urljoin(self.base_url, source_url)
html = self._http_get(url, cache_limit=.5)
match = re.search('This movie is of poor quality', html, re.I)
if match:
quality = QUALITIES.LOW
else:
quality = QUALITIES.HIGH
for match in re.finditer('href="([^"]+/embed\d*/[^"]+)', html):
url = match.group(1)
embed_html = self._http_get(url, cache_limit=.5)
r = re.search('{\s*write\("([^"]+)', embed_html)
if r:
plaintext = self._caesar(r.group(1), 13).decode('base-64')
if 'http' not in plaintext:
plaintext = self._caesar(r.group(1).decode('base-64'), 13).decode('base-64')
else:
plaintext = embed_html
hosters += self._get_links(plaintext)
pattern = 'href="([^"]+)".*play_video.gif'
for match in re.finditer(pattern, html, re.I):
url = match.group(1)
host = urlparse.urlparse(url).hostname
hoster = {'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, quality), 'rating': None, 'views': None, 'direct': False}
hosters.append(hoster)
return hosters
开发者ID:assli100,项目名称:kodi-openelec,代码行数:32,代码来源:afdah_scraper.py
示例8: _generate_entry_probe
def _generate_entry_probe(self):
# Any $entry(name) expressions result in saving that argument
# when entering the function.
self.args_to_probe = set()
regex = r"\$entry\((\w+)\)"
for expr in self.exprs:
for arg in re.finditer(regex, expr):
self.args_to_probe.add(arg.group(1))
for arg in re.finditer(regex, self.filter):
self.args_to_probe.add(arg.group(1))
if any(map(lambda expr: "$latency" in expr, self.exprs)) or \
"$latency" in self.filter:
self.args_to_probe.add("__latency")
self.param_types["__latency"] = "u64" # nanoseconds
for pname in self.args_to_probe:
if pname not in self.param_types:
raise ValueError("$entry(%s): no such param" \
% arg)
self.hashname_prefix = "%s_param_" % self.probe_hash_name
text = ""
for pname in self.args_to_probe:
# Each argument is stored in a separate hash that is
# keyed by pid.
text += "BPF_HASH(%s, u32, %s);\n" % \
(self.hashname_prefix + pname,
self.param_types[pname])
text += self._generate_entry()
return text
开发者ID:att-innovate,项目名称:bcc,代码行数:29,代码来源:argdist.py
示例9: list_show_page
def list_show_page(self, url, page, seasons=False, episodes=False):
result = []
if "/p/epizody" in url or "/p/epiz%C3%B3dy" in url or "p/archiv" in url:
if seasons:
season_data = util.substr(page, SERIES_START2, SERIES_END2)
for m in re.finditer(SERIES_ITER_RE2, season_data, re.DOTALL | re.IGNORECASE):
item = self.dir_item()
item['title'] = m.group('title')
item['url'] = url + '#post=%s' % (m.group('id'))
self._filter(result, item)
if episodes:
for m in re.finditer(EPISODE_ITER_RE2, page, re.DOTALL | re.IGNORECASE):
item = self.video_item()
item['title'] = "%s (%s)" % (m.group('title'), m.group('date'))
item['url'] = m.group('url')
self._filter(result, item)
else:
if seasons:
season_data = util.substr(page, SERIES_START, SERIES_END)
for m in re.finditer(SERIES_ITER_RE, season_data, re.DOTALL | re.IGNORECASE):
item = self.dir_item()
item['title'] = m.group('title')
item['url'] = 'http://' + urlparse.urlparse(url).netloc + '/ajax.json?' + m.group('url')
self._filter(result, item)
if episodes:
episodes_data = util.substr(page, EPISODE_START, EPISODE_END)
for m in re.finditer(EPISODE_ITER_RE, page, re.DOTALL | re.IGNORECASE):
item = self.video_item()
item['title'] = "%s. %s (%s)" % (m.group('episode'), m.group('title'), m.group('date'))
item['url'] = m.group('url')
self._filter(result, item)
return result
开发者ID:Jakub2013,项目名称:plugin.video.joj.sk,代码行数:32,代码来源:joj.py
示例10: dotransform
def dotransform(request, response):
emailaddr = []
msgfile = request.value
lookFor = ['To', 'From']
tmpfolder = request.fields['sniffMyPackets.outputfld']
with open(msgfile, mode='r') as msgfile:
reader = msgfile.read()
reader = str(reader)
for x in lookFor:
if x in reader:
for s in re.finditer('RCPT TO: <([\w.-][email protected][\w.-]+)>', reader):
to_addr = s.group(1), 'mail_to'
emailaddr.append(to_addr)
for t in re.finditer('MAIL FROM: <([\w.-][email protected][\w.-]+)>', reader):
from_addr = t.group(1), 'mail_from'
emailaddr.append(from_addr)
for addr, addrfield in emailaddr:
e = EmailAddress(addr)
e.linklabel = addrfield
e += Field('filelocation', request.value, displayname='File Location', matchingrule='loose')
e += Field('emailaddr', addrfield, displayname='Header Info')
response += e
return response
开发者ID:FomkaV,项目名称:wifi-arsenal,代码行数:28,代码来源:smtpaddress.py
示例11: list_archive_page
def list_archive_page(self, show_page, showon=False, showoff=False):
showonlist = []
if showon:
page = util.substr(show_page, VYSIELANE_START, NEVYSIELANE_START)
for m in re.finditer(VYSIELANE_ITER_RE, page, re.DOTALL | re.IGNORECASE):
item = self.dir_item()
item['title'] = m.group('title')
item['plot'] = m.group('desc')
item['url'] = m.group('url') + "#season_episode"
if m.group('itime') is not None:
item['type'] = "showon7d"
else:
item['type'] = "showon"
showonlist.append(item)
showonlist.sort(key=lambda x: x['title'].lower())
showofflist = []
if showoff:
page = util.substr(show_page, NEVYSIELANE_START, NEVYSIELANE_END)
for m in re.finditer(NEVYSIELANE_ITER_RE, page, re.DOTALL | re.IGNORECASE):
item = self.dir_item()
item['title'] = m.group('title')
item['url'] = m.group('url') + "#season_episode"
item['type'] = "showoff"
showofflist.append(item)
showofflist.sort(key=lambda x: x['title'].lower())
result = showonlist + showofflist
return result
开发者ID:Jakub2013,项目名称:plugin.video.joj.sk,代码行数:27,代码来源:joj.py
示例12: setupTranslations
def setupTranslations(localeConfig, projectName, key):
# Make a new set from the locales list, mapping to Crowdin friendly format
locales = {mapLocale(localeConfig['name_format'], locale)
for locale in localeConfig['locales']}
# Fill up with locales that we don't have but the browser supports
if 'chrome' in localeConfig['target_platforms']:
for locale in chromeLocales:
locales.add(mapLocale('ISO-15897', locale))
if 'gecko' in localeConfig['target_platforms']:
firefoxLocales = urllib2.urlopen('http://www.mozilla.org/en-US/firefox/all.html').read()
for match in re.finditer(r'&lang=([\w\-]+)"', firefoxLocales):
locales.add(mapLocale('BCP-47', match.group(1)))
langPacks = urllib2.urlopen('https://addons.mozilla.org/en-US/firefox/language-tools/').read()
for match in re.finditer(r'<tr>.*?</tr>', langPacks, re.S):
if match.group(0).find('Install Language Pack') >= 0:
match2 = re.search(r'lang="([\w\-]+)"', match.group(0))
if match2:
locales.add(mapLocale('BCP-47', match2.group(1)))
allowed = set()
allowedLocales = urllib2.urlopen('http://crowdin.net/page/language-codes').read()
for match in re.finditer(r'<tr>\s*<td\b[^<>]*>([\w\-]+)</td>', allowedLocales, re.S):
allowed.add(match.group(1))
if not allowed.issuperset(locales):
print 'Warning, following locales aren\'t allowed by server: ' + ', '.join(locales - allowed)
locales = list(locales & allowed)
locales.sort()
params = urllib.urlencode([('languages[]', locale) for locale in locales])
result = urllib2.urlopen('http://api.crowdin.net/api/project/%s/edit-project?key=%s' % (projectName, key), params).read()
if result.find('<success') < 0:
raise Exception('Server indicated that the operation was not successful\n' + result)
开发者ID:HoverHell,项目名称:abp_buildtools,代码行数:34,代码来源:localeTools.py
示例13: ExtendCurlys
def ExtendCurlys(self, list_of_terms, target_body):
"""
Run FindWordsInBracketsAndCurlies first.
Adds brackets to the same words if they have not yet received brackets.
"""
self.target_body = ' ' + target_body + ' '
self.dbrackets = [m.span(0) for m in re.finditer(r"\[([\w \(\)\-,.]+)\]", self.target_body)]
self.sbrackets = [m.span(0) for m in re.finditer(r"\{([\w \(\)\-,.]+)\}", self.target_body)]
self.allbrackets = self.dbrackets + self.sbrackets
def repl(matchobj):
for span in self.allbrackets:
if matchobj.start(0) in range(*span):
return matchobj.group(0)
self.curly_count += 1
return (matchobj.group(1) + self.curly_term + matchobj.group(2))
self.curly_count = 0
for i, term in enumerate(list_of_terms):
self.curly_term = '{' + term + '}'
regex = re.compile(r"([^\{\w])%s([^\}\w])" %term, re.IGNORECASE)
if i ==0:
self.ecoutput = re.sub(regex, repl, self.target_body)
else:
self.ecoutput = re.sub(regex, repl, self.ecoutput)
self.ecoutput = self.ecoutput[1:-1]
开发者ID:law826,项目名称:radiology_diagnoser,代码行数:31,代码来源:importnlp.py
示例14: consistency_check
def consistency_check(text, word_pairs, err, msg, offset=0):
"""Build a consistency checker for the given word_pairs."""
errors = []
msg = " ".join(msg.split())
for w in word_pairs:
matches = [
[m for m in re.finditer(w[0], text)],
[m for m in re.finditer(w[1], text)]
]
if len(matches[0]) > 0 and len(matches[1]) > 0:
idx_minority = len(matches[0]) > len(matches[1])
for m in matches[idx_minority]:
errors.append((
m.start() + offset,
m.end() + offset,
err,
msg.format(w[~idx_minority], m.group(0)),
w[~idx_minority]))
return errors
开发者ID:CatherineH,项目名称:proselint,代码行数:25,代码来源:tools.py
示例15: stem_helper
def stem_helper(word, rem_umlaut = True):
"""rem_umlat: Remove umlaut from text"""
#Define R1 and R2 regions
#R1 is defined as the region after the first consonant followed by a vowel
try:
R1 = list(re.finditer(r"[aëeiouäöüâêîôûæœ][bdghfcjklmnspqrtvwz]",word))[0].start() + 2
except:
R1 = len(word)
#R2 is defined as the region within R1 after the first consonant followed by a vowel
try:
R2 = list(re.finditer(r"[aëeiouäöüâêîôûæœ][bdghfcjklmnspqrtvwz]",word[R1:]))[0].start() + 2 + R1
except:
R2 = len(word)
#Make sure the index of R1 is at least 3.
if R1<3:
try:
R1 = list(re.finditer(r"[aëeiouäöüâêîôûæœ][bdghfcjklmnspqrtvwz]",word[1:]))[0].start() + 2
except:
R1 = len(word)
if rem_umlaut:
word = remove_umlaut(word)
word = word[:R1] + re.sub(r'(wes|wen|est|ern|em|en|er|es|eȥ(?=[klmrt])s|(?=[lr])n|e)$',"",word[R1:])
word = word[:R1] + re.sub(r'(est|er|en|re|in|iu|(?=.{3})st,word[R1:])$',"",word[R1:])
word = word[:R2] + re.sub(r'(lich?.?.|keit|inc|isch?.?.)$',"",word[R2:])
return word
开发者ID:TylerKirby,项目名称:cltk,代码行数:35,代码来源:stem.py
示例16: get_media_url
def get_media_url(self, host, media_id):
web_url = self.get_url(host, media_id)
html = self.net.http_GET(web_url).content
form_values = {}
stream_url = ''
for i in re.finditer('<input type="hidden" name="([^"]+)" value="([^"]+)', html):
form_values[i.group(1)] = i.group(2)
xbmc.sleep(2000)
html = self.net.http_POST(web_url, form_data=form_values).content
r = re.search("file\s*:\s*'([^']+)'", html)
if r:
stream_url = r.group(1)
for match in re.finditer('(eval\(function.*?)</script>', html, re.DOTALL):
js_data = jsunpack.unpack(match.group(1))
match2 = re.search('<param\s+name="src"\s*value="([^"]+)', js_data)
if match2:
stream_url = match2.group(1)
else:
match2 = re.search('<embed.*?type="video.*?src="([^"]+)', js_data)
if match2:
stream_url = match2.group(1)
if stream_url:
return stream_url + '|User-Agent=%s&Referer=%s' % (common.IE_USER_AGENT, web_url)
raise UrlResolver.ResolverError('Unable to resolve cloudyvideos link. Filelink not found.')
开发者ID:Lynx187,项目名称:script.module.urlresolver,代码行数:29,代码来源:cloudyvideos.py
示例17: _boundary_of_alternatives_indices
def _boundary_of_alternatives_indices(pattern):
"""
Determines the location of a set of alternatives in a glob pattern.
Alternatives are defined by a matching set of non-bracketed parentheses.
:param pattern: Glob pattern with wildcards.
:return: Indices of the innermost set of matching non-bracketed
parentheses in a tuple. The Index of a missing parenthesis
will be passed as None.
"""
# Taking the leftmost closing parenthesis and the rightmost opening
# parenthesis left of it ensures that the parentheses belong together and
# the pattern is parsed correctly from the most nested section outwards.
end_pos = None
for match in re.finditer('\\)', pattern):
if not _position_is_bracketed(pattern, match.start()):
end_pos = match.start()
break # Break to get leftmost.
start_pos = None
for match in re.finditer('\\(', pattern[:end_pos]):
if not _position_is_bracketed(pattern, match.start()):
start_pos = match.end()
# No break to get rightmost.
return start_pos, end_pos
开发者ID:netman92,项目名称:coala,代码行数:26,代码来源:Globbing.py
示例18: parseHtmlForm
def parseHtmlForm(attr_str, html, input_names=None):
for form in re.finditer(r"(?P<tag><form[^>]*%s[^>]*>)(?P<content>.*?)</?(form|body|html)[^>]*>" % attr_str, html, re.S | re.I):
inputs = {}
action = parseHtmlTagAttrValue("action", form.group('tag'))
for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('content'), re.S | re.I):
name = parseHtmlTagAttrValue("name", inputtag.group(1))
if name:
value = parseHtmlTagAttrValue("value", inputtag.group(1))
if value is None:
inputs[name] = inputtag.group(3) or ''
else:
inputs[name] = value
if isinstance(input_names, dict):
# check input attributes
for key, val in input_names.items():
if key in inputs:
if isinstance(val, basestring) and inputs[key] == val:
continue
elif isinstance(val, tuple) and inputs[key] in val:
continue
elif hasattr(val, "search") and re.match(val, inputs[key]):
continue
break # attibute value does not match
else:
break # attibute name does not match
else:
return action, inputs # passed attribute check
else:
# no attribute check
return action, inputs
return {}, None # no matching form found
开发者ID:Dmanugm,项目名称:pyload,代码行数:33,代码来源:SimpleHoster.py
示例19: get_media_url
def get_media_url(self, host, media_id):
web_url = self.get_url(host, media_id)
html = self.net.http_GET(web_url).content
data = {}
for match in re.finditer('input type="hidden" name="([^"]+)" value="([^"]+)', html):
key, value = match.groups()
data[key] = value
data['method_free'] = 'Proceed to Video'
html = self.net.http_POST(web_url, form_data=data).content
stream_url = ''
for match in re.finditer('(eval\(function.*?)</script>', html, re.DOTALL):
js_data = jsunpack.unpack(match.group(1))
match2 = re.search('<param\s+name="src"\s*value="([^"]+)', js_data)
if match2:
stream_url = match2.group(1)
else:
match2 = re.search('file\s*:\s*"([^"]+)', js_data)
if match2:
stream_url = match2.group(1)
if stream_url:
return stream_url + '|' + urllib.urlencode({'User-Agent': common.IE_USER_AGENT, 'Referer': web_url})
raise ResolverError('Unable to resolve grifthost link. Filelink not found.')
开发者ID:AMOboxTV,项目名称:AMOBox.LegoBuild,代码行数:27,代码来源:grifthost.py
示例20: google_scrap
def google_scrap(self):
#iter = re.finditer('''<!--sMSL-->([\s\S]*)<!--sMSR-->''',self.the_page)
#for it in iter:
# self.res_html = it.group(1)
iter = re.finditer('''<p>([\s\S]*?)</p>''',self.the_page)
for it in iter:
self.search_item.append(it.group(1))
for i in range(len(self.search_item)):
href = ""
title = ""
iter = re.finditer('''<a.href="/url\?q=[^"]*">([\s\S]*?)</a>[\s\S]*?<a.href="/search\?q=related:([^"]*)&hl=">''',self.search_item[i])
#iter = re.finditer('''<a.href="[^"]*">([^"]*)</a>[\s\S]*?<a.href="/search?q=related:([^"]*)&hl=">''',self.search_item[i])
for it in iter:
href = it.group(2)
if href == '':
break
title = it.group(1)
textProcess = textprocess.TextProcess(title)
textProcess.getTitle()
textProcess.clearPoint()
textProcess.clearSpace()
self.html_titles[href] = textProcess.text
if href == '':
continue
textProcess = textprocess.TextProcess(self.search_item[i])
textProcess.clearHtml()
textProcess.clearPoint()
textProcess.clearSpace()
text = textProcess.text
self.res_items[href] = [self.html_titles[href],text]
print self.html_titles
开发者ID:SkTim,项目名称:GScraper,代码行数:31,代码来源:htmlscraper.py
注:本文中的re.finditer函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论