本文整理汇总了Python中regex.split函数的典型用法代码示例。如果您正苦于以下问题:Python split函数的具体用法?Python split怎么用?Python split使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了split函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: rev_ip
def rev_ip(ip, delimiter=None):
revip = False
eip = expand_ip(ip)
prefix = False
if '/' in eip:
eip, prefix = regex.split('/', eip)[0:2]
else:
if is_ip4.search(eip):
prefix = '32'
elif is_ip6.search(eip):
prefix = '128'
if prefix:
prefix = int(prefix)
if is_ip4.search(eip):
if prefix in (8, 16, 24, 32):
revip = '.'.join(eip.split('.')[0:int(prefix / 8)][::-1]) + '.in-addr.arpa.'
elif delimiter:
octs = eip.split('.')[::-1]
octs[3 - int(prefix / 8)] = octs[3 - int(prefix / 8)] + delimiter + str(prefix)
revip = '.'.join(octs[3 - int(prefix / 8):]) + '.in-addr.arpa.'
elif is_ip6.search(eip):
if prefix in (4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128):
revip = '.'.join(filter(None, regex.split('(.)', regex.sub(':', '', eip))))[0:(prefix / 4) * 2][::-1].strip('.') + '.ip6.arpa.'
elif delimiter:
nibs = filter(None, regex.split('(.)', regex.sub(':', '', eip)))[::-1]
nibs[31 - int(prefix / 4)] = nibs[31 - int(prefix /4)] + delimiter + str(prefix)
revip = '.'.join(nibs[31 - int(prefix /4):]) + '.ip6.arpa.'
return revip
开发者ID:cbuijs,项目名称:unbound-dns-filter,代码行数:32,代码来源:unbound-dns-filter.py
示例2: main
def main():
lang="english"
datapkg = "corpus"
book = "eng/myBigErrorsList.txt"
data = WP.readBook(WP, datapkg, book)
#words = regex.split("(\n+)", data.lower())
words = regex.split("(\n+)", data)
ng = NG(lang)
cletter, n ="", 0;
for word in words:
if "\n" in word:
cletter += str('\n')
else:
for w in regex.split("\W+", word):
if len(w):
n +=1
print("correct(%r) => %r" % (w, ng.correct(w.lower())))
cletter += str(ng.correct(w) + str(" "))
print("######## Original Txt ########")
print(data)
print("######## Txt After Correction ########")
print(cletter)
print("################")
开发者ID:seekerFactory,项目名称:spelled,代码行数:25,代码来源:filecorct.py
示例3: getNames
def getNames(evalLines, locs1, locs2, x):
"""
Calls appropriate helper functions to accurately parse name information
"""
locs = locs1 + locs2
locs = sorted(locs)
entries = []
keys = ['loc','name', 'nameFlag', 'rank','email','tel','fax', 'title']
for i in locs:
D= {key: None for key in keys}
m = None
D['loc'] = i
if i in locs1:
m = regex.split(r'•\s*(.*?):\s*',x[i]) #split line with rank
D['rank'] = m[1].strip()
elif i in locs2:
m = regex.split(r'•\s*(.*?):\s*', ' '.join([x[i-1].strip(), x[i].strip()]), flag=regex.UNICODE) #do something
D['rank'] = m[1].strip()
if len(m[2].strip()) == 0: #if there is only a rank
#assume name is on next line potentially with some other info
evalLines.append(i+1)
emtefa(x[i+1], D, x[i+2])
else: #name is on the same line
emtefa(m[2].strip(), D, x[i+1])
entries.append(D)
return(evalLines,entries)
开发者ID:aserlich,项目名称:frenchBur,代码行数:26,代码来源:frencBur2011.py
示例4: parse_semag
def parse_semag(self, str, mass):
# split = re.split('\s', str.strip())
reg_book = re.compile(u'ו?(עשין|שם|לאוין)')
split = re.split(reg_book, str.strip())
# str_list = filter(None, split)
str_list = filter(None, [item.strip() for item in split])
resolveds = []
# it = iter(str_list)
derabanan_flag = False
book = None
for i, word in enumerate(str_list):
if derabanan_flag:
derabanan_flag = False
resolved = self._tracker.resolve(book, [1])
resolveds.append(resolved)
continue
elif re.search(reg_book, word):
# book = word
# if book == u'שם':
# book = None
# elif book == u'לאוין':
# book = u'Sefer Mitzvot Gadol, Volume One'
try:
if word != u'שם':
derabanan = filter(None, [item.strip() for item in re.split(u'(מד"ס|מ?דרבנן)',str_list[i+1].strip())])
except IndexError:
# mass.ErrorFile.write('error smg, no place in book notation')
mass.error_flag = 'error smg, no place in book notation'
print 'error smg, no place in book notation'
return
if word == u'עשין' and len(derabanan) > 1 and (derabanan[0] != u"סימן"):
book = re.search(u'[א-ה]',derabanan[1])
# print book.group(0)
book = self._table[book.group(0)]
derabanan_flag = True
elif re.match(reg_book, word):
book = self._table[word]
else:
mass.ErrorFile.write("error smg, don't recognize book name")
print "error smg, don't recognize book name", book
return
else:
mitzva = re.split('\s', word)
for m in mitzva:
# if m == u'סימן':
# continue
if m == u'שם':
m = None
elif getGematriaVav(m):
m = getGematriaVav(m)
else:
m = None
resolved = self._tracker.resolve(book, [m])
resolveds.append(resolved)
if not resolveds:
resolved = self._tracker.resolve(book, [None])
resolveds.append(resolved)
# print resolveds
return resolveds
开发者ID:JonMosenkis,项目名称:Sefaria-Data,代码行数:59,代码来源:basic_ein_parser.py
示例5: parse_semag
def parse_semag(self, str, mass):
reg_book = re.compile(u'ו?ב?(עשין|שם|לאוין|לאין)')
split = re.split(reg_book, str.strip())
str_list = filter(None, [item.strip() for item in split])
resolveds = []
derabanan_flag = False
book = None
reg_siman = u"סי'?|סימן"
reg_vav = u'ו{}'.format(reg_siman)
for i, word in enumerate(str_list):
if derabanan_flag:
derabanan_flag = False
# resolved = self._tracker.resolve(book, [1])
resolved = resolveExceptin(self._tracker, book, [1])
resolveds.append(resolved)
continue
elif re.search(reg_book, word):
try:
if word != u'שם':
derabanan = filter(None, [item.strip() for item in re.split(u'(מד"ס|מ?דרבנן)',str_list[i+1].strip())])
except IndexError:
mass.write_shgia('error smg, no place in book notation')
return
if word == u'עשין' and len(derabanan) > 1:
book = re.search(u'[א-ה]',derabanan[1])
book = self._table[book.group(0)]
derabanan_flag = True
elif re.match(reg_book, word):
book = self._table[word]
else:
mass.write_shgia("error smg, don't recognize book name")
return
else:
mitzva = re.split('\s', word)
for m in mitzva:
if re.search(reg_vav, m) and not book:
# resolved = self._tracker.resolve(book, [None])
resolved = resolveExceptin(self._tracker, book, [None])
resolveds.append(resolved)
if m == u'ו?שם':
m = None
elif re.search(reg_siman, m):
continue
elif getGematriaVav(m, mass):
m = getGematriaVav(m, mass)
else:
m = None
# resolved = self._tracker.resolve(book, [m])
resolved = resolveExceptin(self._tracker, book, [m])
resolveds.append(resolved)
if not resolveds:
# resolved = self._tracker.resolve(book, [None])
resolved = resolveExceptin(self._tracker, book, [None])
resolveds.append(resolved)
if len([item for item in resolveds if not isinstance(item, Ref)]) > 0:
mass.write_shgia(u'error from ibid in Ref or table none problem')
return resolveds
开发者ID:JonMosenkis,项目名称:Sefaria-Data,代码行数:59,代码来源:ein_parser.py
示例6: get_text_completion_list
def get_text_completion_list(self):
# words = self.toPlainText().split(QRegExp("[^a-zA-Z0-9_]"),
# QString.SkipEmptyParts)
words = regex.split("\W+", unicode(self.toPlainText()), flags=regex.UNICODE)
word_till_cursor = unicode(self.word_till_cursor())
word_under_cursor = unicode(self.word_under_cursor())
# words.removeDuplicates()
# words.sort()
words = sorted(set(words))
completion_list = []
completion_list_not_start_with = []
for word in words:
if (
word != word_till_cursor
and word != word_under_cursor
and word.upper().find(word_till_cursor.upper()) == 0
):
completion_list.append(word)
elif word != word_till_cursor and len(word) > len(word_till_cursor):
words_till_cursor = [x for x in word_till_cursor.split("_") if x != "" and len(x) >= 2]
matches = 0
for word_tc in words_till_cursor:
if word.upper().find(word_tc.upper()) >= 0:
matches += 1
if matches == len(words_till_cursor):
completion_list.append(word)
elif matches * 1.20 >= len(words_till_cursor):
completion_list_not_start_with.append(word)
return (
super(WithWordCompletionMulty_, self).get_text_completion_list()
+ completion_list
+ completion_list_not_start_with
)
开发者ID:jleahred,项目名称:miow,代码行数:33,代码来源:Completion.py
示例7: split
def split(self, pattern, maxsplit=0, flags=0, out_lvs=[''], dupe=False):
split_list = re.split(pattern, str(self), maxsplit, flags)
out_lvs = [out_lv if out_lv else self.lv for out_lv in out_lvs]
out_lvs += [out_lvs[-1]] * (len(split_list) - len(out_lvs))
if dupe:
split_list += [split_list[-1]] * (len(out_lvs) - len(split_list))
return [Ex(split_string, lv) for split_string, lv in zip(split_list, out_lvs)]
开发者ID:longnow,项目名称:panlex-tools,代码行数:7,代码来源:panlex.py
示例8: tokenize_words_old
def tokenize_words_old(str):
str = str.replace(u"־"," ")
str = re.sub(r"</?[^>]+>","",str) #get rid of html tags
str = re.sub(r"\([^\(\)]+\)","",str) #get rid of refs
str = str.replace('"',"'")
word_list = filter(bool,re.split(r"[\s\:\-\,\.\;\(\)\[\]\{\}]",str))
return word_list
开发者ID:JonMosenkis,项目名称:Sefaria-Data,代码行数:7,代码来源:mesorat_hashas.py
示例9: read_list
def read_list(filenames, listname, domlst, ip4lst, ip6lst, rxlst):
for filename in filenames:
lines = get_lines(filename, listname)
if lines:
count = 0
for line in lines:
count += 1
entry = regex.split('\s*#\s*', line.replace('\r', '').replace('\n', ''))[0].strip() # Strip comments and line-feeds
if entry:
if is_ip4.search(entry):
ip4lst[entry] = entry
elif is_ip6.search(entry):
ip6lst[entry] = entry.lower()
elif is_dom.search(entry):
domlst[entry.strip('.').lower() + '.'] = entry.lower()
#if tldextract.extract(entry)[2]:
# domlst[entry.strip('.').lower() + '.'] = entry
#else:
# log_err('LIST [#{0}]: Invalid TLD: \"{1}\"'.format(count, line))
else:
try:
rx = regex.compile(entry, regex.I) # To test/validate
#rxlst[rx] = entry
rxlst.add(entry)
except BaseException as err:
log_err('LIST [#{0}]: Invalid Syntax: \"{1}\"'.format(count, line))
else:
log_err('LIST: Empty file \"{0}\"'.format(filename))
log_info('LIST-TOTALS [{0}]: {1} Domains, {2} IPv4-Addresses, {3} IPv6-Addresses and {4} Regexes'.format(listname, len(domlst), len(ip4lst), len(ip6lst), len(rxlst)))
return domlst, ip4lst, ip6lst, rxlst
开发者ID:cbuijs,项目名称:unbound-dns-filter,代码行数:35,代码来源:unbound-dns-filter.py
示例10: read_dst
def read_dst(
filename
):
"""
Function reads files in Phylip dst-format.
Parameters
----------
filename : string
Name of the file which should have the extension ``dst``.
Returns
-------
data : tuple
A tuple consisting of a list of taxa and a matrix.
"""
try:
f = open(filename)
except:
print("[!] Could not find the file {0}!".format(filename))
taxa,matrix = [],[]
for i,line in enumerate(f):
if i > 0:
taxa.append(line[0:10].strip())
matrix.append([float(i) for i in
re.split('\s+',line[11:].strip())])
return taxa,matrix
开发者ID:RichardLitt,项目名称:lingpy,代码行数:33,代码来源:phylip.py
示例11: __get_codon_usage
def __get_codon_usage(self):
'''Gets the codon usage table for a given taxonomy id.'''
aa_to_codon_prob = {aa_code: {} for aa_code in AA_CODES.values()}
url = 'http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=' \
+ self.__taxonomy_id + '&aa=1&style=GCG'
in_codons = False
for line in urllib2.urlopen(url):
if line == '<PRE>\n':
in_codons = True
elif line == '</PRE>\n':
break
elif in_codons:
values = re.split('\\s+', line)
if values[0] in AA_CODES:
codon_prob = aa_to_codon_prob[AA_CODES[values[0]]]
codon_prob[values[1]] = float(values[3])
aa_to_codon_prob.update((x, _scale(y))
for x, y in aa_to_codon_prob.items())
return aa_to_codon_prob
开发者ID:synbiochem,项目名称:synbiochem-py,代码行数:25,代码来源:seq_utils.py
示例12: separate_string
def separate_string(string):
"""
>>> separate_string("test <2>")
(['test ', ''], ['2'])
"""
string_list = regex.split(r'<(?![!=])', regex.sub(r'>', '<', string))
return string_list[::2], string_list[1::2] # Returns even and odd elements
开发者ID:aisbaa,项目名称:reparse,代码行数:7,代码来源:util.py
示例13: wptexturize
def wptexturize(self, text):
# Transform into regexp sub-expression used in _wptexturize_pushpop_element
# Must do this every time in case plugins use these filters in a context sensitive manner
no_texturize_tags = '(' + '|'.join(self.default_no_texturize_tags) + ')'
no_texturize_shortcodes = '(' + '|'.join(self.default_no_texturize_shortcodes) + ')'
no_texturize_tags_stack = []
no_texturize_shortcodes_stack = []
# PHP: Since Python doesn't support PHP's /U modifier (which inverts quantifier's greediness), I modified the regular expression accordingly
textarr = regex.split('(<.*?>|\[.*?\])', text, flags=regex.DOTALL)
result = []
for curl in textarr:
if len(curl) == 0:
continue
# Only call _wptexturize_pushpop_element if first char is correct tag opening
first = curl[0]
if '<' == first:
self.__wptexturize_pushpop_element(curl, no_texturize_tags_stack, no_texturize_tags, '<', '>')
elif '[' == first:
self.__wptexturize_pushpop_element(curl, no_texturize_shortcodes_stack, no_texturize_shortcodes, '[', ']')
elif len(no_texturize_shortcodes_stack) == 0 and len(no_texturize_tags_stack) == 0:
# This is not a tag, nor is the texturization disabled static strings
for search, replacement in self.static:
curl = curl.replace(search, replacement)
# regular expressions
for search, replacement in self.dynamic:
curl = regex.sub(search, replacement, curl)
curl = regex.sub('&([^#])(?![a-zA-Z1-4]{1,8};)', '&\\1', curl)
result.append(curl)
return ''.join(result)
开发者ID:ChillarAnand,项目名称:plugins,代码行数:33,代码来源:default_filters.py
示例14: split
def split(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs):
"""Wrapper for split."""
return regex.split(
_apply_search_backrefs(pattern, flags), string,
maxsplit, flags, concurrent, **kwargs
)
开发者ID:qhw0820,项目名称:BracketHighlighter,代码行数:7,代码来源:bregex.py
示例15: get_lang_class
def get_lang_class(name):
if name in LANGUAGES:
return LANGUAGES[name]
lang = re.split('[^a-zA-Z0-9]', name, 1)[0]
if lang not in LANGUAGES:
raise RuntimeError('Language not supported: %s' % name)
return LANGUAGES[lang]
开发者ID:kunbud1989,项目名称:spaCy,代码行数:7,代码来源:util.py
示例16: fileiter
def fileiter(src, ext=None, rex=None):
"""Iterate over files starting at src.
ext can be a string of space-seperated extensions. Or it can be an
empty string. The empty string only matches files with no extension.
rex should be a regular expression object or pattern. Only files which
produce a match will be returned.
"""
if ext is not None and ext != '':
ext = regex.split(r'[, ]+', ext)
if rex is not None and type(rex) is str:
rex = regex.compile(rex)
extrex = regex.compile(r'.*\.(.*)')
for dirpath, dirnames, filenames in os.walk(src):
for infile in (os.path.join(dirpath, a) for a in filenames):
if ext is not None:
m = extrex.search(infile)
if m is None:
if ext != '':
continue
else:
if m[1] not in ext:
continue
if rex is not None:
if rex.search(m) is None:
continue
yield infile
开发者ID:jgpacker,项目名称:suttacentral,代码行数:28,代码来源:helpers.py
示例17: Populate
def Populate(self, filename):
with open(filename) as f:
for num, line in enumerate(f, 1):
if num == 1:
self.Title = line
continue
match = regex_acte.match(line)
if match:
cur_acte = Acte(match.group("NB_Acte").strip())
self.Acts.append(cur_acte)
else:
match = regex_scene.match(line)
if match:
cur_scene = Scene(match.group("NB_Scene").strip())
cur_acte.Scenes.append(cur_scene)
else:
match = regex_character.match(line)
if match:
info = [i.replace('\n', '')
for i in re.split(", |\. ", line)]
personnage = [x for x in info if x.isupper()]
didascalie = [x for x in info if not x.isupper()]
self.Characters.update(personnage)
cur_replique = Replique(personnage, didascalie)
cur_scene.Repliques.append(cur_replique)
else:
if line and 'cur_replique' in locals():
cur_replique.text += line
开发者ID:simson,项目名称:repetiteur,代码行数:31,代码来源:text_reader.py
示例18: __init__
def __init__(self, text, window, lang_en, def_counter):
"""
:param text:
Complete tale/story
:param window:
Story_UI window
"""
QtCore.QObject.__init__(self)
self.word_list = re.split('\s', text)
self.window = window
self.sentence_list = regex.split("(?V1)(?<=\.|:|;|-|,|\!)", text)
self.sentence_list = self.join_short_sentences()
self.keyword_list = []
self.timing_list = []
self.pool = ThreadPool(4)
self.keyword_list = self.pool.map(derive_keyword, self.sentence_list)
self.pool.close()
self.pool.join()
self.audio_service = AudioService(window)
self.audio_thread = threading.Thread(target=self.audio_service.prepare_voice,
args=(self.sentence_list, def_counter))
self.audio_thread.setDaemon(True)
self.audio_thread.start()
self.image_thread = threading.Thread(target=image_from_keyword_list, args=(self.keyword_list, window, lang_en))
self.image_thread.setDaemon(True)
self.image_thread.start()
开发者ID:nichtawitz,项目名称:Real-Time-Story-Illustrator,代码行数:28,代码来源:text_service.py
示例19: node_volumes
def node_volumes(node, volume):
book = re.split('\s', node.full_title())
book = book[1]
if volume == u'א':
title = u'I ' + book
return library.get_schema_node(title)
return library.get_schema_node(u'II ' + book)
开发者ID:JonMosenkis,项目名称:Sefaria-Data,代码行数:7,代码来源:ibid.py
示例20: siman_smk_exctractor
def siman_smk_exctractor(smk_text):
split = re.split(u'\s', smk_text)
simanim = []
for word in split:
if not word or word == u'סימן' or word == u'סעיף':
continue
word = re.sub(u"[;.,']", u"", word)
if re.search(u'-', word):
borders = re.search(u"(.*?)-(.*)", word)
start = getGematria(borders.group(1))
end = getGematria(borders.group(2))
for siman in range(start, end+1):
simanim.append(siman)
if not is_hebrew_number(word):
if not check_vav(word):
# print smk_text, simanim
return simanim
else:
simanim.append(check_vav(word))
else:
smk_siman = getGematria(word)
simanim.append(smk_siman)
# print smk_text, simanim
return simanim
开发者ID:JonMosenkis,项目名称:Sefaria-Data,代码行数:25,代码来源:mitzvot_link_scraper.py
注:本文中的regex.split函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论