本文整理汇总了Python中re2.sub函数的典型用法代码示例。如果您正苦于以下问题:Python sub函数的具体用法?Python sub怎么用?Python sub使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sub函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: parsers
def parsers(uscfiles, findreplace):
parsedfiles = []
for counter, section in enumerate(uscfiles):
#print ""
#print "File", counter
parsedfile = subfile(section, findreplace)
#print parsedfile
# Replace Multiple Section references with links
# Include [^<] to make sure no group is transformed twice
pattern = r'@@@\s[Ss]ections?\s([^<]*?)@@@@@(.*?)@@'
pattern_replace = [r'%s' % u'(\d+\w*(?:\(\w+\))*[-|–]?\d*)([, @])', r'<a href="/laws/target/%s/\1" class="sec">\1</a>\2']
parsedfile = parsesections(pattern, pattern_replace, parsedfile)
#parsedfile = re.sub(r'@[email protected]', r'ref-Title-'+title, parsedfile)
parsedfile = re.sub(r'@@ref-.*[email protected]', r'', parsedfile)
#parsedfile = re.sub(r'ref-title-this', r'ref-title-'+title, parsedfile)
# Encode Named Acts by removing lowercase and non-word characters, and appending the length of the name w/o non-word characters
#pattern = r'@@ref-namedact-(.*?)@@'
pattern = r'/ref-namedact-(.*?)/'
parsedfile = parsenamedacts(pattern, parsedfile)
parsedfile = re.sub(r'@[email protected]', r'ref-title-this', parsedfile)
parsedfile = re.sub(r'@@ref-.*[email protected]', r'', parsedfile)
# Remove remaining @
parsedfile = parsedfile.replace('@','')#.translate(None, '@')
parsedfiles.append(parsedfile)
return parsedfiles
开发者ID:aih,项目名称:uscites,代码行数:31,代码来源:autoparser.py
示例2: handle_techniques
def handle_techniques(line, **opts):
vb_vars = opts["vb_vars"]
enc_func_name = opts["enc_func_name"]
decrypt_func = opts["decrypt_func"]
def var_substitute(m):
var = m.group(1)
line = line.replace('"', '"""')
line = re.sub(r'"""([A-F0-9]{2,})"""', decode_hex, line)
line = re.sub(r'"""([\w_+=/]{2,})"""', decode_base64, line)
line = re.sub(r'(?i)Chr[A-Z$]\(Asc[A-Z$](.+?)\)\)', r"\1", line)
line = re.sub(r'(?i)Asc[A-Z$]\("""(\w)\w*"""\)', lambda m: str(ord(m.group(1))), line)
line = re.sub(r'(?i)((?:Chr[A-Z$]?\(\d+\)\s*&?\s*)+)', decode_chr, line)
line = re.sub(r'(?i)\b%s\s*\(\w+\("""(.+?)"""\),\s*\w+\("""(.+?)"""' % enc_func_name, decrypt_func, line)
line = re.sub(r'(?i)\b%s\((?:""")?(.+?)(?:""")?,\s*(?:""")?(.+?)(?:""")?\)' % enc_func_name, decrypt_func, line)
line = re.sub(r'(?i)StrReverse\(.+?"""(.+?)"""\)', decode_reverse, line)
line = re.sub(r'""".+?"""\s+&+\s+""".+?""".+', concatenate, line)
while "Chr(Asc(" in line:
lastline = line
line = re.sub(r'(?i)Chr\(Asc\((.+?)\)\)', r"\1",line)
if line == lastline:
break
# Remove quotes before regexing against them.
line = line.replace('""" + """','')
line = line.replace('"""','')
# Remove a few concat patterns. Theres a bug with some obfuscation
# techniques.
line = line.replace(" + ", "")
line = line.replace(" & ","")
return line
开发者ID:CIRCL,项目名称:cuckoo-modified,代码行数:32,代码来源:vbadeobf.py
示例3: clean
def clean(t):
"""
normalize numbers, discard some punctuation that can be ambiguous
"""
t = re.sub('[.,\d]*\d', '<NUM>', t)
t = re.sub('[^a-zA-Z0-9,.;:<>\-\'\/?!$% ]', '', t)
t = t.replace('--', ' ') # sometimes starts a sentence... trouble
return t
开发者ID:katherinehuwu,项目名称:Word_Fit,代码行数:8,代码来源:sbd.py
示例4: test_bug_449000
def test_bug_449000(self):
# Test for sub() on escaped characters
self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
'abc\ndef\n')
self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
'abc\ndef\n')
self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
'abc\ndef\n')
self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
'abc\ndef\n')
开发者ID:PeterScott,项目名称:pyre2,代码行数:10,代码来源:test_re.py
示例5: termcenter
def termcenter():
parser = argparse.ArgumentParser(description='Center stuff on terminals')
parser.add_argument('string', nargs='*', type=str)
args = parser.parse_args()
for e in [sys.stdin] + args.string:
lines = [e] if isinstance(e, str) else e.readlines()
if lines:
width = max(map(len, map(lambda s: re.sub(r'\x1B\[[0-9;]+m|\$.*\$', '', s), lines)))
pad = int((os.get_terminal_size()[0]- width)/2)
for line in lines:
print(' '*pad + re.sub(r'\$.*\$|\n', '', line))
开发者ID:jaseg,项目名称:ponysay,代码行数:12,代码来源:__init__.py
示例6: get_features
def get_features(frag, model):
"""
... w1. (sb?) w2 ...
Features, listed roughly in order of importance:
(1) w1: word that includes a period
(2) w2: the next word, if it exists
(3) w1length: number of alphabetic characters in w1
(4) w2cap: true if w2 is capitalized
(5) both: w1 and w2
(6) w1abbr: log count of w1 in training without a final period
(7) w2lower: log count of w2 in training as lowercased
(8) w1w2upper: w1 and w2 is capitalized
"""
words1 = clean(frag.tokenized).split()
if not words1: w1 = ''
else: w1 = words1[-1]
if frag.next:
words2 = clean(frag.next.tokenized).split()
if not words2: w2 = ''
else: w2 = words2[0]
else:
words2 = []
w2 = ''
c1 = re.sub('(^.+?\-)', '', w1)
c2 = re.sub('(\-.+?)$', '', w2)
feats = {}
feats['w1'] = c1
feats['w2'] = c2
feats['both'] = c1 + '_' + c2
len1 = min(10, len(re.sub('\W', '', c1)))
if c1.replace('.','').isalpha():
feats['w1length'] = str(len1)
try: feats['w1abbr'] = str(int(math.log(1+model.non_abbrs[c1[:-1]])))
except: feats['w1abbr'] = str(int(math.log(1)))
if c2.replace('.','').isalpha():
feats['w2cap'] = str(c2[0].isupper())
try: feats['w2lower'] = str(int(math.log(1+model.lower_words[c2.lower()])))
except: feats['w2lower'] = str(int(math.log(1)))
feats['w1w2upper'] = c1 + '_' + str(c2[0].isupper())
return feats
开发者ID:katherinehuwu,项目名称:Word_Fit,代码行数:48,代码来源:sbd.py
示例7: on_call
def on_call(self, call, process):
# Legacy, modern Dyre doesn't have hardcoded hashes in
# CryptHashData anymore
iocs = [
"J7dnlDvybciDvu8d46D\\x00",
"qwererthwebfsdvjaf+\\x00",
]
pipe = [
"\\??\\pipe\\3obdw5e5w4",
"\\??\\pipe\\g2fabg5713",
]
if call["api"] == "CryptHashData":
buf = self.get_argument(call, "Buffer")
if buf in iocs:
self.cryptoapis = True
tmp = re.sub(r"\\x[0-9A-Fa-f]{2}", "", buf)
if self.compname in tmp:
if re.match("^" + self.compname + "[0-9 ]+$", tmp):
self.cryptoapis = True
elif call["api"] == "HttpOpenRequestA":
buf = self.get_argument(call, "Path")
if len(buf) > 10:
self.networkapis.add(buf)
elif call["api"] == "NtCreateNamedPipeFile":
buf = self.get_argument(call, "PipeName")
for npipe in pipe:
if buf == npipe:
self.syncapis = True
break
return None
开发者ID:453483289,项目名称:community-modified,代码行数:31,代码来源:dyre_apis.py
示例8: process_entry
def process_entry(self, entry):
out = {'type': 'feed'}
# UGnich - mooduck
if entry['published'].endswith('UT'):
entry['published'] = '%sC' % entry['published']
tz = timezone(settings.timezone)
try:
out['created'] = \
dateutil.parser.parse(entry['published']).astimezone(tz)
except ValueError:
entry['created'] = \
dateutil.parser.parse(entry['published'])
out['link'] = entry['link']
out['title'] = re.sub(r'&#(?P<c>\d+);',
lambda c: unichr(int(c.group('c'))),
unescape(entry['title'])) \
if 'title' in entry else ''
out['text'] = self.process_text(entry['summary'])
out['tags'] = [ t['label'] or t['term'] for t in entry['tags'] ] \
if 'tags' in entry else []
return out
开发者ID:isqua-test,项目名称:point-core,代码行数:25,代码来源:feedproc.py
示例9: _files
def _files(files):
if not files:
files = []
hash = md5(str(datetime.now())).hexdigest()
dest = '%s/%s/%s/%s' % (env.user.login[0], env.user.login,
hash[:2], hash[2:4])
files_del = env.request.args('del-attach', [])
if not isinstance(files_del, (list, tuple)):
files_del = [files_del]
for f in files_del:
if f not in files:
continue
remove_attach(f)
files.remove(f)
files_in = env.request.args('attach', [])
files_p = env.request.files('attach')
if not isinstance(files_in, (list, tuple)):
files_in = [files_in]
files_p = [files_p]
for i, file in enumerate(files_in[:10]):
if isinstance(file, str):
file = file.decode('utf-8')
file = re.sub(r'[^\w\.]+', '-', unidecode(file))
d = "%s/%s/" % (dest, randint(1000, 9999))
make_attach(files_p[i], d, file, remove=True)
files.append(os.path.join(d, file))
return files
开发者ID:radjah,项目名称:point-www,代码行数:32,代码来源:blog.py
示例10: __reWildcard
def __reWildcard(regexp, string):
"""Wildcard-based regular expression system"""
regexp = re.sub("\*+", "*", regexp)
match = True
if regexp.count("*") == 0:
if regexp == string:
return True
else:
return False
blocks = regexp.split("*")
start = ""
end = ""
if not regexp.startswith("*"):
start = blocks[0]
if not regexp.endswith("*"):
end = blocks[-1]
if start != "":
if string.startswith(start):
blocks = blocks[1:]
else:
return False
if end != "":
if string.endswith(end):
blocks = blocks[:-1]
else:
return False
blocks = [block for block in blocks if block != ""]
if not blocks:
return match
for block in blocks:
i = string.find(block)
if i == -1:
return False
string = string[i + len(block):]
return match
开发者ID:psuedoelastic,项目名称:wapiti,代码行数:35,代码来源:lswww.py
示例11: render_pony
def render_pony(name, text, balloonstyle, width=80, center=False, centertext=False):
pony = load_pony(name)
balloon = link_l = link_r = ''
if text:
[link_l, link_r] = balloonstyle[-2:]
for i,line in enumerate(pony):
match = re.search('\$balloon([0-9]*)\$', line)
if match:
minwidth = int(match.group(1) or '0')
pony[i:i+1] = render_balloon(text, balloonstyle, minwidth=minwidth, maxwidth=int(width/2), pad=str.center if centertext else str.ljust)
break
try:
first = pony.index('$$$')
second = pony[first+1:].index('$$$')
pony[first:] = pony[first+1+second+1:]
except:
pass
pony = [ line.replace('$\\$', link_l).replace('$/$', link_r) for line in pony ]
indent = ''
if center:
ponywidth = max([ len(re.sub(r'\x1B\[[0-9;]+m|\$.*\$', '', line)) for line in pony ])
indent = ' '*int((width-ponywidth)/2)
wre = re.compile('((\x1B\[[0-9;]+m)*.){0,%s}' % width)
reset = '[39;49m\n'
return indent+(reset+indent).join([ wre.search(line).group() for line in pony ])+reset
开发者ID:jaseg,项目名称:ponysay,代码行数:25,代码来源:__init__.py
示例12: replace_placeholders
def replace_placeholders(string, item, match):
"""Replaces placeholders in the string."""
if isinstance(item, praw.objects.Comment):
string = string.replace('{{body}}', item.body)
else:
string = string.replace('{{body}}', item.selftext)
string = string.replace('{{domain}}', getattr(item, 'domain', ''))
string = string.replace('{{permalink}}', get_permalink(item))
string = string.replace('{{subreddit}}', item.subreddit.display_name)
if isinstance(item, praw.objects.Comment):
string = string.replace('{{title}}', item.link_title)
else:
string = string.replace('{{title}}', item.title)
string = string.replace('{{url}}', getattr(item, 'url', ''))
if item.author:
string = string.replace('{{user}}', item.author.name)
else:
string = string.replace('{{user}}', '[deleted]')
# replace any {{match_##}} with the corresponding match groups
string = re.sub(r'\{\{match-(\d+)\}\}', r'\\\1', string)
if match:
string = match.expand(string)
return string
开发者ID:plbogen,项目名称:AutoModerator,代码行数:25,代码来源:automoderator.py
示例13: processText
def processText(text):
'''
strips some unwanted characters. Originally stripped the "references" section according to pubGeneric but it wasn't working. Splits full text strings by a simple sentence filter.
'''
text = re.sub(r'\x07|\r', '', text)
#text = re.sub(r'\x07|\r|[(\s{0,3}\d{1,3}\s{0,3})(,\s{0,3}\d{1,3}\s{0,3}){0,7}\]', '', text)
# strip ^G, \r, and inline citations
#sections = pubGeneric.sectionRanges(text)
#if sections is not None:
# try:
# dropRange = sections['ack']
# text = text[:dropRange[0]] + text[dropRange[1]:]
# except KeyError:
# pass
# try:
# dropRange = sections['refs']
# text = text[:dropRange[0]] + text[dropRange[1]:]
# except KeyError:
# pass
# split by period followed by capital letter within 3 proceeding characters
previousThreshold = -2
threshold = 0
for threshold in re.finditer('\..?.?([A-Z])', text):
threshold = threshold.start()
yield text[previousThreshold+2:threshold+1]
previousThreshold = threshold
yield text[threshold:]
开发者ID:bylin,项目名称:text-mining,代码行数:28,代码来源:interactionFinder.py
示例14: html2md
def html2md(s):
h2t = HTML2Text()
h2t.body_width = 0
#h2t.ignore_links = True
#h2t.ignore_images = True
s = h2t.handle(s)
s = re.sub(r'\!?\[\]\((?P<url>.+?)\)', lambda m: " %s " % m.group('url'), s)
return s
开发者ID:isqua-test,项目名称:point-core,代码行数:8,代码来源:feedproc.py
示例15: replace
def replace(self, m):
return "%s%s%s%s%s%s" % (
m.group("scheme"),
m.group("pass"),
m.group("authority"),
m.group("undef"),
m.group("query"),
re.sub(r":", "%3a", m.group("fragment")),
)
开发者ID:isqua-test,项目名称:point-core,代码行数:9,代码来源:md.py
示例16: parsesections
def parsesections(pattern, pattern_replace, section):
sectionsref = re.search(pattern, section)
while sectionsref:
i1 = sectionsref.start(1)
i2 = sectionsref.end(2)
#print "found multiple secs at", i1, "-", i2
section = section[:i1]+re.sub(pattern_replace[0], pattern_replace[1] % sectionsref.group(2), section[i1:i2]) + section[1+i2:]
sectionsref = re.search(pattern, section)
return section
开发者ID:aih,项目名称:uscites,代码行数:9,代码来源:autoparser.py
示例17: ulogin
def ulogin():
if env.user.id:
raise AlreadyAuthorized
sess = Session()
if env.request.method == "POST":
url = "http://ulogin.ru/token.php?token=%s&host=%s" % (env.request.args("token"), settings.domain)
try:
resp = urllib2.urlopen(url)
data = dict.fromkeys(ULOGIN_FIELDS)
data.update(json.loads(resp.read()))
resp.close()
except urllib2.URLError:
return render("/auth/login.html", fields=ULOGIN_FIELDS, errors=["ulogin-fail"])
try:
env.user.authenticate_ulogin(data["network"], data["uid"])
if env.user.id:
return Response(redirect=referer())
except NotAuthorized:
pass
login = data["nickname"].strip(u" -+.")
if login:
login = re.sub(r"[\._\-\+]+", "-", login)
info = {
"login": login,
"network": data["network"],
"uid": data["uid"],
"name": ("%s %s" % (data["first_name"], data["last_name"])).strip(),
"email": data["email"],
"avatar": data["photo_big"],
"birthdate": data["bdate"],
"gender": True if data["sex"] == "2" else False if data["sex"] == "1" else None,
"location": "%s, %s" % (data["city"], data["country"])
if data["city"] and data["country"]
else data["city"] or data["country"],
"_nickname": data["nickname"],
"_name": ("%s %s" % (data["first_name"], data["last_name"])).strip(),
"_profile": data["profile"],
}
sess["reg_info"] = info
sess.save()
else:
info = sess["reg_info"]
if not info or not "network" in info or not "uid" in info:
return Response(redirect="%s://%s/register" % (env.request.protocol, settings.domain))
info["birthdate"] = parse_date(info["birthdate"]) or datetime.now() - timedelta(days=365 * 16 + 4)
return render("/auth/register_ulogin.html", info=info)
开发者ID:artss,项目名称:point-www-new,代码行数:56,代码来源:auth.py
示例18: run
def run(self,results):
"""Run Moloch to import pcap
@return: nothing
"""
self.key = "moloch"
self.alerthash ={}
self.fileshash ={}
self.MOLOCH_CAPTURE_BIN = self.options.get("capture", None)
self.MOLOCH_CAPTURE_CONF = self.options.get("captureconf",None)
self.CUCKOO_INSTANCE_TAG = self.options.get("node",None)
self.MOLOCH_USER = self.options.get("user",None)
self.MOLOCH_PASSWORD = self.options.get("pass",None)
self.MOLOCH_REALM = self.options.get("realm",None)
self.MOLOCH_AUTH = self.options.get("auth","digest")
self.pcap_path = os.path.join(self.analysis_path, "dump.pcap")
self.MOLOCH_URL = self.options.get("base",None)
self.task_id = results["info"]["id"]
self.custom = None
if results["info"].has_key("machine") and results["info"]["machine"].has_key("name"):
self.machine_name = re.sub(r"[\W]","_",str(results["info"]["machine"]["name"]))
else:
self.machine_name = "Unknown"
if results["info"].has_key("options") and results["info"]["options"].has_key("setgw"):
self.gateway = re.sub(r"[\W]","_",str(results["info"]["options"]["setgw"]))
else:
self.gateway = "Default"
if results["info"].has_key("options") and results["info"].has_key("custom"):
self.custom = re.sub(r"[\W]","_",str(results["info"]["custom"]))
if not os.path.exists(self.MOLOCH_CAPTURE_BIN):
log.warning("Unable to Run moloch-capture: BIN File %s Does Not Exist" % (self.MOLOCH_CAPTURE_BIN))
return
if not os.path.exists(self.MOLOCH_CAPTURE_CONF):
log.warning("Unable to Run moloch-capture Conf File %s Does Not Exist" % (self.MOLOCH_CAPTURE_CONF))
return
try:
cmd = "%s -c %s -r %s -n %s -t %s:%s -t cuckoo_jtype:%s -t cuckoo_machine:%s -t cuckoo_gw:%s" % (self.MOLOCH_CAPTURE_BIN,self.MOLOCH_CAPTURE_CONF,self.pcap_path,self.CUCKOO_INSTANCE_TAG,self.CUCKOO_INSTANCE_TAG,self.task_id,self.task["category"],self.machine_name,self.gateway)
if self.custom:
cmd = cmd + " -t custom:%s" % (self.custom)
except Exception,e:
log.warning("Unable to Build Basic Moloch CMD: %s" % e)
开发者ID:swackhamer,项目名称:cuckoo-modified,代码行数:43,代码来源:moloch.py
示例19: remove_evil_links
def remove_evil_links(pdf_data):
""" Removes all it-ebook's links and metadata from the passed PDF data. """
pdf_data = pdf_data.encode("hex")
# Remove each annotation element inside the PDF file (This removes the
# "clickable" it-ebooks.info links)
print 'Removing evil links'
new_data = re2.sub(pattern, "", pdf_data)
# Remove the actual links (link elements which are assigned to the annotations)
new_data = new_data.replace("www.it-ebooks.info".encode("hex"), "")
print 'Done'
return new_data.decode("hex")
开发者ID:movb,项目名称:it-ebooks-unwatermark,代码行数:11,代码来源:unwatermark.py
示例20: build_options
def build_options(self):
"""Generate analysis options.
@return: options dict.
"""
options = {}
options["id"] = self.task.id
options["ip"] = self.machine.resultserver_ip
options["port"] = self.machine.resultserver_port
options["category"] = self.task.category
options["target"] = self.task.target
options["package"] = self.task.package
if self.task.package == "service":
if "service-dll-of-interest" not in self.task.options:
if self.task.options == "":
self.task.options = "service-dll-of-interest=c:\\windows\\system32\\nwsapagent.dll"
else:
self.task.options += ",service-dll-of-interest=c:\\windows\\system32\\nwsapagent.dll"
options["options"] = self.task.options
options["enforce_timeout"] = self.task.enforce_timeout
options["clock"] = self.task.clock
options["terminate_processes"] = self.cfg.cuckoo.terminate_processes
if not self.task.timeout or self.task.timeout == 0:
options["timeout"] = self.cfg.timeouts.default
else:
options["timeout"] = self.task.timeout
if self.task.category == "file":
options["file_name"] = File(self.task.target).get_name()
options["file_type"] = File(self.task.target).get_type()
# if it's a PE file, collect export information to use in more smartly determining the right
# package to use
options["exports"] = ""
if HAVE_PEFILE and ("PE32" in options["file_type"] or "MS-DOS executable" in options["file_type"]):
try:
pe = pefile.PE(self.task.target)
if hasattr(pe, "DIRECTORY_ENTRY_EXPORT"):
exports = []
for exported_symbol in pe.DIRECTORY_ENTRY_EXPORT.symbols:
exports.append(re.sub(r'[^[email protected]]', '', exported_symbol.name))
options["exports"] = ",".join(exports)
except:
pass
return options
开发者ID:CIRCL,项目名称:cuckoo-modified,代码行数:52,代码来源:scheduler.py
注:本文中的re2.sub函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论