本文整理汇总了Python中regex.escape函数的典型用法代码示例。如果您正苦于以下问题:Python escape函数的具体用法?Python escape怎么用?Python escape使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了escape函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, keywords, fuzzy_min_len=None):
"""Initialize search
"""
if fuzzy_min_len is None:
fuzzy_min_len = []
self.fuzzy_min_len = sorted(fuzzy_min_len)
self.keywords = {}
for i, k in keywords:
k = k.strip().lower()
if k not in self.keywords:
self.keywords[k] = i
else:
print("ERROR: found duplicate keyword '{0}'".format(k))
print("Number of unique keywords ID to be search: {0}"
.format(len(self.keywords)))
kw = []
for k in self.keywords:
d = self.get_allow_distance(k)
if d:
kw.append(r'(?:{0}){{e<={1}}}'.format(re.escape(k), d))
else:
kw.append(re.escape(k))
re_str = '|'.join(kw)
re_str = r'\b(?:{0})\b'.format(re_str)
self.re_keywords = re.compile(re_str)
开发者ID:soodoku,项目名称:search-names,代码行数:28,代码来源:searchengines.py
示例2: process_index_title_change_in_history
def process_index_title_change_in_history(indx, **kwargs):
"""
Update all history entries which reference 'old' to 'new'.
"""
if indx.is_commentary():
pattern = r'{} on '.format(re.escape(kwargs["old"]))
title_pattern = r'(^{}$)|({} on)'.format(re.escape(kwargs["old"]), re.escape(kwargs["old"]))
else:
commentators = text.IndexSet({"categories.0": "Commentary"}).distinct("title")
pattern = r"(^{} \d)|(^({}) on {} \d)".format(re.escape(kwargs["old"]), "|".join(commentators), re.escape(kwargs["old"]))
title_pattern = r'(^{}$)|(^({}) on {})'.format(re.escape(kwargs["old"]), "|".join(commentators), re.escape(kwargs["old"]))
text_hist = HistorySet({"ref": {"$regex": pattern}})
for h in text_hist:
h.ref = h.ref.replace(kwargs["old"], kwargs["new"], 1)
h.save()
link_hist = HistorySet({"new.refs": {"$regex": pattern}})
for h in link_hist:
h.new["refs"] = [r.replace(kwargs["old"], kwargs["new"], 1) for r in h.new["refs"]]
h.save()
note_hist = HistorySet({"new.ref": {"$regex": pattern}})
for h in note_hist:
h.new["ref"] = h.new["ref"].replace(kwargs["old"], kwargs["new"], 1)
h.save()
title_hist = HistorySet({"title": {"$regex": title_pattern}})
for h in title_hist:
h.title = h.title.replace(kwargs["old"], kwargs["new"], 1)
h.save()
开发者ID:rivkahcarl,项目名称:Sefaria-Project,代码行数:31,代码来源:history.py
示例3: dep_counts
def dep_counts(name):
ref_patterns = {
'alone': r'^{} \d'.format(re.escape(name)),
'commentor': r'{} on'.format(re.escape(name)),
'commentee': r'on {} \d'.format(re.escape(name))
}
commentee_title_pattern = r'on {}'.format(re.escape(name))
ret = {
'version title exact match': text.VersionSet({"title": name}).count(),
'version title match commentor': text.VersionSet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
'version title match commentee': text.VersionSet({"title": {"$regex": commentee_title_pattern}}).count(),
'history title exact match': history.HistorySet({"title": name}).count(),
'history title match commentor': history.HistorySet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
'history title match commentee': history.HistorySet({"title": {"$regex": commentee_title_pattern}}).count(),
}
for pname, pattern in ref_patterns.items():
ret.update({
'note match ' + pname: note.NoteSet({"ref": {"$regex": pattern}}).count(),
'link match ' + pname: link.LinkSet({"refs": {"$regex": pattern}}).count(),
'history refs match ' + pname: history.HistorySet({"ref": {"$regex": pattern}}).count(),
'history new refs match ' + pname: history.HistorySet({"new.refs": {"$regex": pattern}}).count()
})
return ret
开发者ID:JonMosenkis,项目名称:Sefaria-Project,代码行数:27,代码来源:count_index_dependencies.py
示例4: process_index_title_change_in_history
def process_index_title_change_in_history(indx, **kwargs):
print "Cascading History {} to {}".format(kwargs['old'], kwargs['new'])
"""
Update all history entries which reference 'old' to 'new'.
"""
from sefaria.model.text import prepare_index_regex_for_dependency_process
pattern = prepare_index_regex_for_dependency_process(indx)
pattern = pattern.replace(re.escape(indx.title), re.escape(kwargs["old"]))
title_pattern = ur'(^{}$)'.format(re.escape(kwargs["old"]))
text_hist = HistorySet({"ref": {"$regex": pattern}})
print "Cascading Text History {} to {}".format(kwargs['old'], kwargs['new'])
for h in text_hist:
h.ref = h.ref.replace(kwargs["old"], kwargs["new"], 1)
h.save()
link_hist = HistorySet({"new.refs": {"$regex": pattern}})
print "Cascading Link History {} to {}".format(kwargs['old'], kwargs['new'])
for h in link_hist:
h.new["refs"] = [r.replace(kwargs["old"], kwargs["new"], 1) for r in h.new["refs"]]
h.save()
note_hist = HistorySet({"new.ref": {"$regex": pattern}})
print "Cascading Note History {} to {}".format(kwargs['old'], kwargs['new'])
for h in note_hist:
h.new["ref"] = h.new["ref"].replace(kwargs["old"], kwargs["new"], 1)
h.save()
title_hist = HistorySet({"title": {"$regex": title_pattern}})
print "Cascading Index History {} to {}".format(kwargs['old'], kwargs['new'])
for h in title_hist:
h.title = h.title.replace(kwargs["old"], kwargs["new"], 1)
h.save()
开发者ID:pzp1997,项目名称:Sefaria-Project,代码行数:33,代码来源:history.py
示例5: inline_one
def inline_one(start: str, end: str, nest=Nesting.FRAME, sub=None, display=Display.INLINE):
"""
"""
patt = re.compile(Patterns.single_group.value.format(
re.escape(start), re.escape(end)))
return inline(patt, escape=[start[0], end[0]],
nest=nest, display=display, sub=sub)
开发者ID:vshesh,项目名称:glue,代码行数:7,代码来源:elements.py
示例6: process_index_delete_in_links
def process_index_delete_in_links(indx, **kwargs):
if indx.is_commentary():
pattern = ur'^{} on '.format(re.escape(indx.title))
else:
commentators = text.IndexSet({"categories.0": "Commentary"}).distinct("title")
pattern = ur"(^{} \d)|^({}) on {} \d".format(re.escape(indx.title), "|".join(commentators), re.escape(indx.title))
LinkSet({"refs": {"$regex": pattern}}).delete()
开发者ID:Elisha4700,项目名称:Sefaria-Project,代码行数:7,代码来源:link.py
示例7: guess_split
def guess_split(majiribun, reading):
kanjis=[]
matchreg_greedy=''
matchreg_nongreedy=''
for char in majiribun:
if kanji_re.match(char):
kanjis.append(char)
matchreg_greedy += "(\p{Hiragana}+)"
matchreg_nongreedy += "(\p{Hiragana}+?)"
else:
matchreg_greedy += re.escape(char)
matchreg_nongreedy += re.escape(char)
m = re.match(matchreg_greedy + '$', reading)
if m:
yomis = m.groups()
yomis_nongreedy = re.match(matchreg_nongreedy + '$', reading).groups()
if yomis != yomis_nongreedy:
# Ambiguous!
return None
d = {}
for idx in range(0, len(kanjis)):
d[kanjis[idx]] = yomis[idx]
return(d)
开发者ID:leoboiko,项目名称:yomisplit,代码行数:25,代码来源:__init__.py
示例8: dep_counts
def dep_counts(name):
commentators = model.IndexSet({"categories.0": "Commentary"}).distinct("title")
ref_patterns = {
'alone': r'^{} \d'.format(re.escape(name)),
'commentor': r'{} on'.format(re.escape(name)),
'commentee': r'^({}) on {} \d'.format("|".join(commentators), re.escape(name))
}
commentee_title_pattern = r'^({}) on {} \d'.format("|".join(commentators), re.escape(name))
ret = {
'version title exact match': model.VersionSet({"title": name}).count(),
'version title match commentor': model.VersionSet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
'version title match commentee': model.VersionSet({"title": {"$regex": commentee_title_pattern}}).count(),
'history title exact match': model.HistorySet({"title": name}).count(),
'history title match commentor': model.HistorySet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
'history title match commentee': model.HistorySet({"title": {"$regex": commentee_title_pattern}}).count(),
}
for pname, pattern in ref_patterns.items():
ret.update({
'note match ' + pname: model.NoteSet({"ref": {"$regex": pattern}}).count(),
'link match ' + pname: model.LinkSet({"refs": {"$regex": pattern}}).count(),
'history refs match ' + pname: model.HistorySet({"ref": {"$regex": pattern}}).count(),
'history new refs match ' + pname: model.HistorySet({"new.refs": {"$regex": pattern}}).count()
})
return ret
开发者ID:digideskio,项目名称:Sefaria-Project,代码行数:28,代码来源:text_test.py
示例9: expand_parens
def expand_parens(string, parens="()", include_spaces=False, substitute_string=''):
output = []
open_paren = re.escape(parens[0])
close_paren = re.escape(parens[1])
substitute_string = re.escape(substitute_string)
in_string = re.sub(open_paren + substitute_string, parens[0], string)
in_string = re.sub(substitute_string + close_paren, parens[1], in_string)
if include_spaces:
regex1 = regex2 = re.compile(r'(^.*)' + open_paren + r'(.+)' + close_paren + r'(.*$)')
else:
regex1 = re.compile(r'(^.*\S)' + open_paren + r'(\S+)' + close_paren + r'(.*$)')
regex2 = re.compile(r'(^.*)' + open_paren + r'(\S+)' + close_paren + r'(\S.*$)')
re_match1 = regex1.search(in_string)
re_match2 = regex2.search(in_string)
if re_match1:
within = re_match1.group(1) + re_match1.group(2) + re_match1.group(3)
without = re_match1.group(1) + re_match1.group(3)
elif re_match2:
within = re_match2.group(1) + re_match2.group(2) + re_match2.group(3)
without = re_match2.group(1) + re_match2.group(3)
else:
return [string]
output = [clean_str(without), clean_str(within)]
return output
开发者ID:longnow,项目名称:panlex-tools,代码行数:28,代码来源:string_manipulation.py
示例10: __init__
def __init__(self, charset: Union[Dict[str, Sequence[int]], Sequence[str], str]) -> None:
"""
Builds a codec converting between graphemes/code points and integer
label sequences.
charset may either be a string, a list or a dict. In the first case
each code point will be assigned a label, in the second case each
string in the list will be assigned a label, and in the final case each
key string will be mapped to the value sequence of integers. In the
first two cases labels will be assigned automatically.
As 0 is the blank label in a CTC output layer, output labels and input
dictionaries are/should be 1-indexed.
Args:
charset (unicode, list, dict): Input character set.
"""
if isinstance(charset, dict):
self.c2l = charset
else:
self.c2l = {k: [v] for v, k in enumerate(sorted(charset), start=1)}
# map integer labels to code points because regex only works with strings
self.l2c = {} # type: Dict[str, str]
for k, v in self.c2l.items():
self.l2c[''.join(chr(c) for c in v)] = k
# sort prefixes for c2l regex
self.c2l_regex = regex.compile(r'|'.join(regex.escape(x) for x in sorted(self.c2l.keys(), key=len, reverse=True)))
# sort prefixes for l2c regex
self.l2c_regex = regex.compile(r'|'.join(regex.escape(x) for x in sorted(self.l2c.keys(), key=len, reverse=True)))
开发者ID:mittagessen,项目名称:kraken,代码行数:30,代码来源:codec.py
示例11: _match_by_edit_distance
def _match_by_edit_distance(full_text, text_to_match):
text_to_match = text_to_match.replace("-LRB-", "(").replace("-RRB-", ")")
text_to_match = text_to_match.replace("-LCB-", "{").replace("-RCB-", "}")
text_to_match = re.sub(r'\[\\\]\\\)\]$', ')', text_to_match)
try:
end_point = (text_to_match.index(" ") if " " in text_to_match else len(text_to_match))
potential_matches = [full_text[m.start():(m.start() + len(text_to_match) + 1)] for m in
re.finditer(re.escape(text_to_match[0:end_point]), full_text, re.U | re.I)]
except:
import sys
print(full_text)
print()
print(text_to_match)
sys.exit(1)
if len(potential_matches) == 0:
potential_matches = [full_text[m.start():(m.start() + len(text_to_match) + 1)] for m in
re.finditer(re.escape(text_to_match[0]), full_text, re.U)]
if len(potential_matches) == 0:
text_to_match = text_to_match.replace("(", "[")
potential_matches = [full_text[m.start():(m.start() + len(text_to_match) + 1)] for m in
re.finditer(re.escape(text_to_match[0]), full_text, re.U)]
potential_matches = [(p[0:p.rindex(text_to_match[-1])+1]
if text_to_match[-1] in p and len(p) > len(text_to_match)
else p)
for p in potential_matches]
if len(potential_matches) == 0:
# No idea why this would ever happen, but it does
return text_to_match
match_with_lowest_edit_distance = ""
lowest_edit_distance = -1
for match in potential_matches:
e_d = edit_distance(match, text_to_match)
if lowest_edit_distance == -1 or e_d <= lowest_edit_distance:
lowest_edit_distance = e_d
match_with_lowest_edit_distance = match
result = match_with_lowest_edit_distance.strip()
if text_to_match[-1] in result:
while result[-1] != text_to_match[-1]:
result = result[0:-1]
elif text_to_match[-1] == '"' and re.search(r'["”\u201d]', result):
while result[-1] not in ['"', '”', "\u201d"]:
result = result[0:-1]
elif text_to_match[-1] not in [']', '}', ')'] and text_to_match[-2:] != "..":
while result[-1] != text_to_match[-1]:
result += full_text[full_text.index(result) + len(result)][-1]
return result
开发者ID:EducationalTestingService,项目名称:match,代码行数:54,代码来源:Match.py
示例12: process_index_title_change_in_links
def process_index_title_change_in_links(indx, **kwargs):
if indx.is_commentary():
pattern = r'^{} on '.format(re.escape(kwargs["old"]))
else:
commentators = text.IndexSet({"categories.0": "Commentary"}).distinct("title")
pattern = r"(^{} \d)|(^({}) on {} \d)".format(re.escape(kwargs["old"]), "|".join(commentators), re.escape(kwargs["old"]))
#pattern = r'(^{} \d)|( on {} \d)'.format(re.escape(kwargs["old"]), re.escape(kwargs["old"]))
links = LinkSet({"refs": {"$regex": pattern}})
for l in links:
l.refs = [r.replace(kwargs["old"], kwargs["new"], 1) if re.search(pattern, r) else r for r in l.refs]
l.save()
开发者ID:rivkahcarl,项目名称:Sefaria-Project,代码行数:11,代码来源:link.py
示例13: process_index_title_change_in_links
def process_index_title_change_in_links(indx, **kwargs):
print "Cascading Links {} to {}".format(kwargs['old'], kwargs['new'])
pattern = text.Ref(indx.title).regex()
pattern = pattern.replace(re.escape(indx.title), re.escape(kwargs["old"]))
links = LinkSet({"refs": {"$regex": pattern}})
for l in links:
l.refs = [r.replace(kwargs["old"], kwargs["new"], 1) if re.search(pattern, r) else r for r in l.refs]
try:
l.save()
except InputError: #todo: this belongs in a better place - perhaps in abstract
logger.warning("Deleting link that failed to save: {} - {}".format(l.refs[0], l.refs[1]))
l.delete()
开发者ID:pzp1997,项目名称:Sefaria-Project,代码行数:12,代码来源:link.py
示例14: process_index_title_change_in_notes
def process_index_title_change_in_notes(indx, **kwargs):
print "Cascading Notes {} to {}".format(kwargs['old'], kwargs['new'])
pattern = Ref(indx.title).regex()
pattern = pattern.replace(re.escape(indx.title), re.escape(kwargs["old"]))
notes = NoteSet({"ref": {"$regex": pattern}})
for n in notes:
try:
n.ref = n.ref.replace(kwargs["old"], kwargs["new"], 1)
n.save()
except Exception:
logger.warning("Deleting note that failed to save: {}".format(n.ref))
n.delete()
开发者ID:arielallon,项目名称:Sefaria-Project,代码行数:12,代码来源:note.py
示例15: __init__
def __init__(self, leading_allow=None, trailing_allow=None):
"""
:param list leading_allow: The leading punctuation characters to allow.
:param list trailing_allow: The trailing punctuation characters to allow.
"""
leading_pattern = "" if not leading_allow else r"[%s]*" % regex.escape("".join(leading_allow))
trailing_pattern = "" if not trailing_allow else r"[%s]" % regex.escape("".join(trailing_allow))
if trailing_pattern:
super(OuterPunctuationFilter, self).__init__(
"%s[^\W_]+(?:$|.*[^\W_]%s*|%s*)" % (leading_pattern, trailing_pattern, trailing_pattern)
)
else:
super(OuterPunctuationFilter, self).__init__("%s[^\W_](?:$|.*[^\W_])" % leading_pattern)
开发者ID:cjrh,项目名称:caterpillar,代码行数:13,代码来源:filter.py
示例16: process_index_title_change_in_links
def process_index_title_change_in_links(indx, **kwargs):
print "Cascading Links {} to {}".format(kwargs['old'], kwargs['new'])
patterns = [pattern.replace(re.escape(indx.title), re.escape(kwargs["old"]))
for pattern in text.Ref(indx.title).regex(as_list=True)]
queries = [{'refs': {'$regex': pattern}} for pattern in patterns]
links = LinkSet({"$or": queries})
for l in links:
l.refs = [r.replace(kwargs["old"], kwargs["new"], 1) if re.search(u'|'.join(patterns), r) else r for r in l.refs]
try:
l.save()
except InputError: #todo: this belongs in a better place - perhaps in abstract
logger.warning("Deleting link that failed to save: {} - {}".format(l.refs[0], l.refs[1]))
l.delete()
开发者ID:arielallon,项目名称:Sefaria-Project,代码行数:13,代码来源:link.py
示例17: process_index_title_change_in_notes
def process_index_title_change_in_notes(indx, **kwargs):
if indx.is_commentary():
pattern = r'{} on '.format(re.escape(kwargs["old"]))
else:
commentators = IndexSet({"categories.0": "Commentary"}).distinct("title")
pattern = r"(^{} \d)|(^({}) on {} \d)".format(re.escape(kwargs["old"]), "|".join(commentators), re.escape(kwargs["old"]))
notes = NoteSet({"ref": {"$regex": pattern}})
for n in notes:
try:
n.ref = n.ref.replace(kwargs["old"], kwargs["new"], 1)
n.save()
except Exception:
pass #todo: log me, and wrap other handlers in try/catch
开发者ID:rivkahcarl,项目名称:Sefaria-Project,代码行数:13,代码来源:note.py
示例18: process_index_title_change_in_links
def process_index_title_change_in_links(indx, **kwargs):
if indx.is_commentary():
pattern = r'^{} on '.format(re.escape(kwargs["old"]))
else:
commentators = text.IndexSet({"categories.0": "Commentary"}).distinct("title")
pattern = ur"(^{} \d)|(^({}) on {} \d)".format(re.escape(kwargs["old"]), "|".join(commentators), re.escape(kwargs["old"]))
#pattern = r'(^{} \d)|( on {} \d)'.format(re.escape(kwargs["old"]), re.escape(kwargs["old"]))
links = LinkSet({"refs": {"$regex": pattern}})
for l in links:
l.refs = [r.replace(kwargs["old"], kwargs["new"], 1) if re.search(pattern, r) else r for r in l.refs]
try:
l.save()
except InputError: #todo: this belongs in a better place - perhaps in abstract
logger.warning("Deleting link that failed to save: {} {}".format(l.refs[0], l.refs[1]))
l.delete()
开发者ID:Elisha4700,项目名称:Sefaria-Project,代码行数:15,代码来源:link.py
示例19: match
def match(pat=None, pat_args=None):
"""Generates a regular expression to match a dictionary entry.
In pat, '$foo' matches the contents of pat_args['foo'] or nothing, if
pat_args['foo'] exists. If it doesn't exist, it matches /.*?/. In
pat_args['foo'], '$foo' matches /.*?/, and '$bar' is expanded as it would
be in pat.
Args:
pat: (Optional) The format of the entry. Defaults to
'$word$pron$pos$cl$de'.
pat_args: (Optional) The expansions for variables. If pat is
unspecified, defaults to {
'pron': '/$pron/',
'pos': ' - $pos',
'cl': ' ($cl$subcl)',
'subcl': '.$subcl',
'de': ': $de'
}, otherwise, defaults to {}
Returns:
A regular expression which will match a dictionary entry in the
specified format. Fields mentioned in the pattern with '$' can be
accessed as named capture groups of the match object.
"""
if pat is None:
pat = default_pat
if pat_args is None:
pat_args = default_pat_args
pat_args = pat_args or {}
args = {}
for f in pat_args:
args[f] = regex.escape(pat_args[f], True).replace(r'\$', '$')
m = regex.search(var_matcher(f), args[f])
while m is not None:
sp = (m.start(1) - 1, m.end(1))
args[f] = workers.slice_replace(args[f], sp, var_group(f))
m = regex.search(var_matcher(f), args[f])
pat = '^' + regex.escape(pat, True).replace(r'\$', '$') + '$'
m = var_match.search(pat)
while m is not None:
sp = (m.start(1) -1, m.end(1))
f = m.group(1)
pat = workers.slice_replace(pat, sp,
'({})?'.format(args.get(f, var_group(f))))
m = var_match.search(pat)
pat = pat.replace(' ', r'\s+')
return regex.compile(pat)
开发者ID:zyxw59,项目名称:soundchanger,代码行数:48,代码来源:entry_format.py
示例20: constructErrorRegex
def constructErrorRegex( Error, Context ):
LEFTCONTEXT = 0
RIGHTCONTEXT = 1
LeftContext = regex.escape( Context[ LEFTCONTEXT ].lstrip( ) )
RightContext = regex.escape( Context[ RIGHTCONTEXT ].rstrip( ) )
minMaxLen = r"{" + str( int( len( Error ) * (1 / 2) ) ) + r"," + str( math.ceil( len( Error ) * 1.8 ) ) + r"}"
fullPattern = r"(?:(?:" + LeftContext + r"){1s+1i+1d<=7})(?=." + minMaxLen + r"(?:(?:" + RightContext + "){" \
"1s+1i+1d<=6}))" + \
r"(?P<errorMatch>(?:\w++[\-\']?\w*+)(?:(?=(?:" + RightContext + r"){1s+1i+1d<=6})|\W{1,2}))+?" + \
r"(?:(?:" + RightContext + "){1s+1i+2d<=6})"
return regex.compile( fullPattern, regex.BESTMATCH | regex.V1 )
开发者ID:jcavalieri8619,项目名称:OCRerror_correct,代码行数:16,代码来源:ParallelOCRalign_Global.py
注:本文中的regex.escape函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论