本文整理汇总了Python中re2.findall函数的典型用法代码示例。如果您正苦于以下问题:Python findall函数的具体用法?Python findall怎么用?Python findall使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了findall函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __findTagAttributes
def __findTagAttributes(tag):
att_double = re.findall('<\w*[ ]| *(.*?)[ ]*=[ ]*"(.*?)"[ +|>]', tag)
att_single = re.findall('<\w*[ ]| *(.*?)[ ]*=[ ]*\'(.*?)\'[ +|>]', tag)
att_none = re.findall('<\w*[ ]| *(.*?)[ ]*=[ ]*["|\']?(.*?)["|\']?[ +|>]', tag)
att_none.extend(att_single)
att_none.extend(att_double)
return att_none
开发者ID:psuedoelastic,项目名称:wapiti,代码行数:7,代码来源:lswww.py
示例2: _unpack
def _unpack(self, buf):
"""Extract into a list irc messages of a tcp streams.
@buf: tcp stream data
"""
try:
f = cStringIO.StringIO(buf)
lines = f.readlines()
except Exception:
log.error("Failed reading tcp stream buffer")
return False
logirc = False
for element in lines:
if not re.match("^:", element) is None:
command = "([a-zA-Z]+|[0-9]{3})"
params = "(\x20.+)"
irc_server_msg = re.findall("(^:[\w+.{}[email protected]|()]+\x20)" + command + params, element)
if irc_server_msg:
self._sc["prefix"] = convert_to_printable(irc_server_msg[0][0].strip())
self._sc["command"] = convert_to_printable(irc_server_msg[0][1].strip())
self._sc["params"] = convert_to_printable(irc_server_msg[0][2].strip())
self._sc["type"] = "server"
if logirc:
self._messages.append(dict(self._sc))
else:
irc_client_msg = re.findall("([a-zA-Z]+\x20)(.+[\x0a\0x0d])", element)
if irc_client_msg and irc_client_msg[0][0].strip() in self.__methods_client:
self._cc["command"] = convert_to_printable(irc_client_msg[0][0].strip())
if self._cc["command"] in ["NICK", "USER"]:
logirc = True
self._cc["params"] = convert_to_printable(irc_client_msg[0][1].strip())
self._cc["type"] = "client"
if logirc:
self._messages.append(dict(self._cc))
开发者ID:CIRCL,项目名称:cuckoo-modified,代码行数:34,代码来源:irc.py
示例3: run
def run(self):
"""Run extract of printable strings.
@return: list of printable strings.
"""
self.key = "strings"
strings = []
if self.task["category"] == "file":
if not os.path.exists(self.file_path):
raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path)
try:
data = open(self.file_path, "rb").read()
except (IOError, OSError) as e:
raise CuckooProcessingError("Error opening file %s" % e)
nulltermonly = self.options.get("nullterminated_only", True)
minchars = self.options.get("minchars", 5)
if nulltermonly:
apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + ",})\x00\x00"
else:
apat = "[\x20-\x7e]{" + str(minchars) + ",}"
upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}"
strings = re.findall(apat, data)
for ws in re.findall(upat, data):
strings.append(str(ws.decode("utf-16le")))
return strings
开发者ID:Abdullah-Mughal,项目名称:cuckoo-modified-1,代码行数:31,代码来源:strings.py
示例4: do_strings
def do_strings(self):
strings_path = None
if self.voptions.basic.dostrings:
try:
data = open(self.memfile, "rb").read()
except (IOError, OSError) as e:
raise CuckooProcessingError("Error opening file %s" % e)
nulltermonly = self.voptions.basic.get("strings_nullterminated_only", True)
minchars = self.voptions.basic.get("strings_minchars", 5)
if nulltermonly:
apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + ",})\x00\x00"
else:
apat = "[\x20-\x7e]{" + str(minchars) + ",}"
upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}"
strings = re.findall(apat, data)
for ws in re.findall(upat, data):
strings.append(str(ws.decode("utf-16le")))
data = None
f=open(self.memfile + ".strings", "w")
f.write("\n".join(strings))
f.close()
开发者ID:453483289,项目名称:cuckoo-modified,代码行数:25,代码来源:memory.py
示例5: test_re_findall
def test_re_findall(self):
self.assertEqual(re.findall(":+", "abc"), [])
self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
(":", ":"),
(":", "::")])
开发者ID:PeterScott,项目名称:pyre2,代码行数:7,代码来源:test_re.py
示例6: handle_data
def handle_data(self, data):
if self.inscript:
allowed_ext = [".php", ".asp", ".xml", ".js", ".json", ".jsp"]
self.liens.extend(lamejs.lamejs(data).getLinks())
candidates = re.findall(r'"([A-Za-z0-9_=#&%\.\+\?/-]*)"', data)
candidates += re.findall(r"'([A-Za-z0-9_=#&%\.\+\?/-]*)'", data)
for jstr in candidates:
if jstr not in self.common_js_strings:
for ext in allowed_ext:
if ext in jstr:
self.liens.append(jstr)
开发者ID:psuedoelastic,项目名称:wapiti,代码行数:11,代码来源:lswww.py
示例7: extract_urls
def extract_urls(msg, html=False):
if html:
msg = msg.replace("=3D", '=')
for x in REPLACE:
msg = msg.replace(x, '')
urls = re.findall(RE_URL_HTML, msg)
else:
urls = re.findall(RE_URL_PLAIN, msg)
pprint(urls)
links = set()
for u in urls:
u = str(u.decode()).rstrip("/")
links.add(u)
return links
开发者ID:aeppert,项目名称:py-cifsdk,代码行数:15,代码来源:urls.py
示例8: get_schedule_line_groups
def get_schedule_line_groups(classified_event):
text = classified_event.processed_text.get_tokenized_text()
# (?!20[01][05])
time = r'\b[012]?\d[:.,h]?(?:[0-5][05])?(?:am|pm)?\b'
time_with_minutes = r'\b[012]?\d[:.,h]?(?:[0-5][05])(?:am|pm)?\b'
time_to_time = r'%s ?(?:to|do|до|til|till|alle|a|-|–|[^\w,.]) ?%s' % (time, time)
# We try to grab all lines in schedule up until schedule ends,
# so we need a "non-schedule line at the end", aka ['']
lines = text.split('\n') + ['']
idx = 0
schedule_lines = []
while idx < len(lines):
first_idx = idx
while idx < len(lines):
line = lines[idx]
# if it has
# grab time one and time two, store diff
# store delimiters
# maybe store description as well?
# compare delimiters, times, time diffs, styles, etc
times = re.findall(time_to_time, line)
if not times or len(line) > 80:
if idx - first_idx >= 1:
schedule_lines.append(lines[first_idx:idx])
break
idx += 1
first_idx = idx
while idx < len(lines):
line = lines[idx]
times = re.findall(time, line)
# TODO(lambert): Somehow track "1)" that might show up here? :(
times = [x for x in times if x not in ['1.', '2.']]
if not times or len(line) > 80:
if idx - first_idx >= 3:
schedule_lines.append(lines[first_idx:idx])
break
idx += 1
idx += 1
schedule_groups = []
for sub_lines in schedule_lines:
if not [x for x in sub_lines if re.search(time_with_minutes, x)]:
continue
schedule_groups.append(sub_lines)
return schedule_groups
开发者ID:mikelambert,项目名称:dancedeets-monorepo,代码行数:48,代码来源:event_structure.py
示例9: on_call
def on_call(self, call, process):
if process["process_name"].lower() not in self.whitelistprocs:
buff = call["arguments"]["buffer"].lower()
if len(buff) >= 128 and (call["arguments"]["filepath"].endswith(".txt") or call["arguments"]["filepath"].endswith(".htm") or call["arguments"]["filepath"].endswith(".html")):
patterns = "|".join(indicators)
if len(re.findall(patterns, buff)) > 1:
self.mark_call()
开发者ID:RicoVZ,项目名称:community,代码行数:7,代码来源:ransomware_message.py
示例10: handleEvent
def handleEvent(self, event):
eventName = event.eventType
srcModuleName = event.module
eventData = event.data
# We only want web content from the target
if srcModuleName != "sfp_spider":
return None
eventSource = event.sourceEvent.data
self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)
if eventSource not in self.results.keys():
self.results[eventSource] = list()
# We only want web content for pages on the target site
if not self.getTarget().matches(self.sf.urlFQDN(eventSource)):
self.sf.debug("Not collecting web content information for external sites.")
return None
for regexpGrp in regexps.keys():
if regexpGrp in self.results[eventSource]:
continue
for regex in regexps[regexpGrp]:
pat = re.compile(regex, re.IGNORECASE)
matches = re.findall(pat, eventData)
if len(matches) > 0 and regexpGrp not in self.results[eventSource]:
self.sf.info("Matched " + regexpGrp + " in content from " + eventSource)
self.results[eventSource].append(regexpGrp)
evt = SpiderFootEvent("ERROR_MESSAGE", regexpGrp,
self.__name__, event.sourceEvent)
self.notifyListeners(evt)
return None
开发者ID:ITh4cker,项目名称:spiderfoot,代码行数:35,代码来源:sfp_errors.py
示例11: on_complete
def on_complete(self):
matches = [
r'(https?:\/\/)?([\da-z\.-]+)\.([0-9a-z\.]{2,6})(:\d{1,5})?([\/\w\.-]*)\/?',
]
dedup = list()
extracted_config = False
for potential_ioc in self.iocs:
for entry in matches:
all_matches = re.findall(entry, potential_ioc)
if all_matches:
extracted_config = True
for buf in all_matches:
ioc = ""
idx = 0
for tmp in buf:
idx += 1
if tmp == '':
pass
# Account for match groups and the second
# (or third depending on match) period as a
# delimiter. We need to add it in manually.
if idx == 2:
ioc += tmp + "."
else:
ioc += tmp
if ioc not in dedup:
dedup.append(ioc)
if dedup:
for ioc in dedup:
self.data.append({"ioc": ioc})
return extracted_config
开发者ID:zpriddy,项目名称:cuckoo-modified,代码行数:32,代码来源:encrypted_ioc.py
示例12: find_competitor_list
def find_competitor_list(search_text):
processed_text = grammar_matcher.StringProcessor(search_text)
results_match = re.search(r'\n0*1[^\d].+\n^0*2[^\d].+\n(?:^\d+.+\n){2,}', processed_text.text, re.MULTILINE)
if results_match:
numbered_list = results_match.group(0)
num_lines = numbered_list.count('\n')
if len(re.findall(r'\d ?[.:h] ?\d\d|\bam\b|\bpm\b', numbered_list)) > num_lines / 4:
return None # good list of times! workshops, etc! performance/shows/club-set times!
processed_numbered_list = grammar_matcher.StringProcessor(numbered_list, processed_text.match_on_word_boundaries)
event_keywords = processed_numbered_list.get_tokens(rules.EVENT)
if len(event_keywords) > num_lines / 8:
return None
if processed_text.has_token(keywords.WRONG_NUMBERED_LIST):
return None
if num_lines > 10:
return numbered_list
else:
lines = numbered_list.split('\n')
qualified_lines = len([x for x in lines if re.search(r'[^\d\W].*[-(]', x)])
if qualified_lines > num_lines / 2:
return numbered_list
for type in ['crew', 'pop|boog', 'lock', 'b\W?(?:boy|girl)']:
qualified_lines = len([x for x in lines if re.search(type, x)])
if qualified_lines > num_lines / 8:
return numbered_list
if processed_text.match_on_word_boundaries == regex_keywords.WORD_BOUNDARIES: # maybe separate on kana vs kanji?
avg_words = 1.0 * sum([len([y for y in x.split(' ')]) for x in lines]) / num_lines
if avg_words < 3:
return numbered_list
return None
开发者ID:mikelambert,项目名称:dancedeets-monorepo,代码行数:30,代码来源:event_structure.py
示例13: parsenamedacts
def parsenamedacts(pattern, intext):
namedacts = re.findall(pattern, intext)
namedacts = list(set(namedacts))
outtext = intext
for namedact in namedacts:
#outtext = outtext.replace(namedact+r'@/', encode_act(namedact)+r'@/')
outtext = outtext.replace(r'ref-namedact-'+namedact,r'ref-namedact-'+encode_act(namedact))
return outtext
开发者ID:aih,项目名称:uscites,代码行数:8,代码来源:autoparser.py
示例14: on_call
def on_call(self, call, process):
if self.checkEvent and self.lastapi == "CryptHashData":
if call["api"] == "NtOpenEvent":
event = self.get_argument(call, "EventName")
event = event.split("\\")
if len(event) == 2:
if event[1] in self.hashes and event[0] in ["Global", "Local"]:
self.found = True
if call["api"] == "GetVolumeNameForVolumeMountPointW":
if call["status"]:
name = self.get_argument(call, "VolumeName")
if name and len(name) > 10:
name = name[10:-1]
if name not in self.volumes:
self.volumes.add(name)
md5 = hashlib.md5(name).hexdigest()[:16].upper()
self.hashes.add(md5)
elif call["api"] == "CryptHashData":
if self.hashes:
buf = self.get_argument(call, "Buffer")
if buf and all(word in buf for word in self.keywords):
# Try/Except handles when this behavior changes in the future
try:
args = parse_qs(urlparse("/?" + buf).query,
keep_blank_values=True)
except:
self.sigchanged = True
self.severity = 1
self.description = "Potential Locky ransomware behavioral characteristics observed. (See Note)"
self.data.append({"Note": "Unexpected behavior observed for Locky. Please " \
"report this sample to https://github.com/spende" \
"rsandbox/community-modified/issues"})
if args and "id" in args.keys():
if args["id"][0] in self.hashes:
self.found = process["process_id"]
if "affid" in args:
tmp = {"Affid": args["affid"][0]}
if tmp not in self.data:
self.data.append(tmp)
elif buf in self.volumes and self.lastapi == "GetVolumeNameForVolumeMountPointW":
checkEvent = True
else:
check = re.findall(r"\s((?:https?://)?\w+(?:\.onion|\.tor2web)[/.](?:\w+\/)?)",
buf, re.I)
if check:
for payment in check:
self.payment.add(payment)
elif call["api"] == "InternetCrackUrlA":
if self.found and process["process_id"] == self.found:
url = self.get_argument(call, "Url")
if url and url.endswith(".php"):
self.c2s.add(url)
开发者ID:Magicked,项目名称:community-modified,代码行数:58,代码来源:locky_apis.py
示例15: on_complete
def on_complete(self):
for screenshot in self.get_results("screenshots", []):
if "ocr" in screenshot:
ocr = screenshot["ocr"].lower()
patterns = "|".join(indicators)
if len(re.findall(patterns, ocr)) > 1:
self.mark_ioc("message", ocr)
return self.has_marks()
开发者ID:RicoVZ,项目名称:community,代码行数:9,代码来源:ransomware_message.py
示例16: on_call
def on_call(self, call, process):
if call["api"] == "NtWriteFile":
filescore = 0
buff = self.get_raw_argument(call, "Buffer").lower()
filepath = self.get_raw_argument(call, "HandleName")
patterns = "|".join(self.indicators)
if (filepath.lower() == "\\??\\physicaldrive0" or filepath.lower().startswith("\\device\\harddisk")) and len(buff) >= 128:
if len(re.findall(patterns, buff)) > 1:
if filepath not in self.ransomfile:
self.ransomfile.append(filepath)
开发者ID:jgajek,项目名称:community-modified,代码行数:10,代码来源:ransomware_message.py
示例17: do_strings
def do_strings(self):
strings_path = None
if self.voptions.basic.dostrings:
try:
data = open(self.memfile, "r").read()
except (IOError, OSError) as e:
raise CuckooProcessingError("Error opening file %s" % e)
nulltermonly = self.voptions.basic.get("strings_nullterminated_only", True)
minchars = self.voptions.basic.get("strings_minchars", 5)
if nulltermonly:
apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
strings = re.findall(apat, data)
upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + ",})\x00\x00"
strings += [str(ws.decode("utf-16le")) for ws in re.findall(upat, data)]
data = None
f=open(dmp_path + ".strings", "w")
f.write("\n".join(strings))
f.close()
strings_path = self.memfile + ".strings"
else:
apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
strings = re.findall(apat, data)
upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}"
strings += [str(ws.decode("utf-16le")) for ws in re.findall(upat, data)]
data = None
f=open(self.memfile + ".strings", "w")
f.write("\n".join(strings))
f.close()
strings_path = self.memfile + ".strings"
if self.voptions.basic.zipstrings:
try:
f = zipfile.ZipFile("%s.zip" % (strings_path), "w",allowZip64=True)
f.write(strings_path, os.path.basename(strings_path), zipfile.ZIP_DEFLATED)
f.close()
os.remove(strings_path)
strings_path = "%s.zip" % (strings_path)
except Exception as e:
raise CuckooProcessingError("Error creating Process Memory Strings Zip File %s" % e)
开发者ID:kevross33,项目名称:cuckoo-modified,代码行数:41,代码来源:memory.py
示例18: is_workshop
def is_workshop(classified_event):
trimmed_title = classified_event.processed_title.delete_with_rule(rules.WRONG_CLASS)
if classified_event.processed_text.get_tokens(dance_keywords.ROMANCE):
has_class_title = trimmed_title.get_tokens(rules.ROMANCE_EXTENDED_CLASS_ONLY)
else:
has_class_title = trimmed_title.get_tokens(dance_keywords.CLASS_ONLY)
has_good_dance_class_title = trimmed_title.has_token(rules.GOOD_DANCE_CLASS)
has_non_dance_event_title = classified_event.processed_title.has_token(keywords.BAD_COMPETITION_TITLE_ONLY)
has_good_dance_title = trimmed_title.has_token(rules.GOOD_DANCE)
has_extended_good_crew_title = trimmed_title.has_token(rules.MANUAL_DANCER[grammar.STRONG_WEAK])
has_wrong_style_title = classified_event.processed_title.has_token(all_styles.DANCE_WRONG_STYLE_TITLE)
final_title = classified_event.processed_title.get_tokenized_text()
lee_lee_hiphop = 'lee lee' in final_title and re.findall('hip\W?hop', final_title)
trimmed_text = classified_event.processed_text.delete_with_rule(rules.WRONG_CLASS)
has_good_dance_class = trimmed_text.has_token(rules.GOOD_DANCE_CLASS)
has_good_dance = classified_event.processed_text.has_token(rules.GOOD_DANCE)
has_wrong_style = classified_event.processed_text.has_token(all_styles.DANCE_WRONG_STYLE_TITLE)
has_good_crew = classified_event.processed_text.has_token(rules.MANUAL_DANCER[grammar.STRONG])
# print has_class_title
# print has_good_dance_title
# print has_extended_good_crew_title
# print has_wrong_style_title
# print classified_event.processed_text.get_tokenized_text()
# print ''
# print has_class_title
# print has_wrong_style
# print has_good_dance
# print has_good_crew
if has_class_title and (has_good_dance_title or has_extended_good_crew_title) and not has_wrong_style_title:
return (
True, 'has class with strong class-title: %s %s' % (has_class_title, (has_good_dance_title or has_extended_good_crew_title))
)
elif classified_event.is_dance_event(
) and has_good_dance_title and has_extended_good_crew_title and not has_wrong_style_title and not has_non_dance_event_title:
return (True, 'has class with strong style-title: %s %s' % (has_good_dance_title, has_extended_good_crew_title))
elif classified_event.is_dance_event() and lee_lee_hiphop and not has_wrong_style_title and not has_non_dance_event_title:
return (True, 'has class with strong style-title: %s %s' % (has_good_dance_title, has_extended_good_crew_title))
elif has_class_title and not has_wrong_style and (has_good_dance or has_good_crew):
return (True, 'has class title: %s, that contains strong description %s, %s' % (has_class_title, has_good_dance, has_good_crew))
elif has_good_dance_class_title:
return (True, 'has good dance class title: %s' % has_good_dance_class_title)
elif has_good_dance_class and not has_wrong_style_title:
return (True, 'has good dance class: %s' % has_good_dance_class)
return (False, 'nothing')
开发者ID:mikelambert,项目名称:dancedeets-monorepo,代码行数:51,代码来源:classifier.py
示例19: handleEvent
def handleEvent(self, event):
eventName = event.eventType
srcModuleName = event.module
eventData = event.data
self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)
pat = re.compile("([\%a-zA-Z\.0-9_\-\+][email protected][a-zA-Z\.0-9\-]+\.[a-zA-Z\.0-9\-]+)")
matches = re.findall(pat, eventData)
myres = list()
for match in matches:
evttype = "EMAILADDR"
if len(match) < 4:
self.sf.debug("Likely invalid address: " + match)
continue
# Handle messed up encodings
if "%" in match:
self.sf.debug("Skipped address: " + match)
continue
# Get the domain and strip potential ending .
mailDom = match.lower().split('@')[1].strip('.')
if not self.getTarget().matches(mailDom) and not self.getTarget().matches(match):
self.sf.debug("External domain, so possible affiliate e-mail")
evttype = "AFFILIATE_EMAILADDR"
if eventName.startswith("AFFILIATE_"):
evttype = "AFFILIATE_EMAILADDR"
self.sf.info("Found e-mail address: " + match)
if type(match) == str:
mail = unicode(match.strip('.'), 'utf-8', errors='replace')
else:
mail = match.strip('.')
if mail in myres:
self.sf.debug("Already found from this source.")
continue
else:
myres.append(mail)
evt = SpiderFootEvent(evttype, mail, self.__name__, event)
if event.moduleDataSource:
evt.moduleDataSource = event.moduleDataSource
else:
evt.moduleDataSource = "Unknown"
self.notifyListeners(evt)
return None
开发者ID:smicallef,项目名称:spiderfoot,代码行数:50,代码来源:sfp_email.py
示例20: get_tags
def get_tags(src, tags='page,title,revision,text'):
# find namespace (eg: http://www.mediawiki.org/xml/export-0.3/)
try:
root = src.readline() + src.readline()
ns = unicode(re.findall(r'xmlns="([^"]*)', root)[0])
tag_prefix = u'{%s}' % (ns,)
tag = {}
for t in tags.split(','):
tag[t] = tag_prefix + unicode(t)
finally:
src.seek(0)
return tag
开发者ID:davkal,项目名称:wiki-network,代码行数:15,代码来源:__init__.py
注:本文中的re2.findall函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论