• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python regex.split函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中regex.split函数的典型用法代码示例。如果您正苦于以下问题:Python split函数的具体用法?Python split怎么用?Python split使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了split函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: rev_ip

def rev_ip(ip, delimiter=None):
    revip = False
    eip = expand_ip(ip)
    prefix = False

    if '/' in eip:
        eip, prefix = regex.split('/', eip)[0:2]
    else:
        if is_ip4.search(eip):
            prefix = '32'
        elif is_ip6.search(eip):
            prefix = '128'

    if prefix:
        prefix = int(prefix)
        if is_ip4.search(eip):
            if prefix in (8, 16, 24, 32):
                revip = '.'.join(eip.split('.')[0:int(prefix / 8)][::-1]) + '.in-addr.arpa.'
            elif delimiter:
                octs = eip.split('.')[::-1]
                octs[3 - int(prefix / 8)] = octs[3 - int(prefix / 8)] + delimiter + str(prefix)
                revip = '.'.join(octs[3 - int(prefix / 8):]) + '.in-addr.arpa.'

        elif is_ip6.search(eip):
            if prefix in (4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128):
                revip = '.'.join(filter(None, regex.split('(.)', regex.sub(':', '', eip))))[0:(prefix / 4) * 2][::-1].strip('.') + '.ip6.arpa.'
            elif delimiter:
                nibs = filter(None, regex.split('(.)', regex.sub(':', '', eip)))[::-1]
                nibs[31 - int(prefix / 4)] = nibs[31 - int(prefix /4)] + delimiter + str(prefix)
                revip = '.'.join(nibs[31 - int(prefix /4):]) + '.ip6.arpa.'

    return revip
开发者ID:cbuijs,项目名称:unbound-dns-filter,代码行数:32,代码来源:unbound-dns-filter.py


示例2: main

def main():
	lang="english"
	datapkg = "corpus"
	book = "eng/myBigErrorsList.txt"
	
	data = WP.readBook(WP, datapkg, book)
	#words = regex.split("(\n+)", data.lower())
	words = regex.split("(\n+)", data)
	ng = NG(lang)
	cletter, n ="", 0;
	for word in words:
		if "\n" in word:
			cletter += str('\n')
		else:
			for w in regex.split("\W+", word):
				if len(w):
					n +=1
					print("correct(%r) => %r" % (w, ng.correct(w.lower())))
					cletter += str(ng.correct(w) + str(" "))
    
	print("######## Original Txt ########")
	print(data)
	print("######## Txt After Correction ########")
	print(cletter)
	print("################")
开发者ID:seekerFactory,项目名称:spelled,代码行数:25,代码来源:filecorct.py


示例3: getNames

def getNames(evalLines, locs1, locs2, x):
	"""
	Calls appropriate helper functions to accurately parse name information
	"""		
	locs = locs1 + locs2
	locs = sorted(locs)
	entries = []	
	keys = ['loc','name', 'nameFlag', 'rank','email','tel','fax', 'title']
	for i in locs:
		D= {key: None for key in keys}
		m = None
		D['loc'] = i
		if i in locs1:
			m = regex.split(r'•\s*(.*?):\s*',x[i]) #split line with rank
			D['rank'] = m[1].strip()
		elif i in locs2:
			m = regex.split(r'•\s*(.*?):\s*', ' '.join([x[i-1].strip(), x[i].strip()]), flag=regex.UNICODE) #do something
			D['rank'] = m[1].strip()
		if len(m[2].strip()) == 0: #if there is only a rank
		 	#assume name is on next line potentially with some other info
			evalLines.append(i+1)
			emtefa(x[i+1], D, x[i+2])
		else:  #name is on the same line
			emtefa(m[2].strip(), D, x[i+1])
		entries.append(D)
	return(evalLines,entries)
开发者ID:aserlich,项目名称:frenchBur,代码行数:26,代码来源:frencBur2011.py


示例4: parse_semag

 def parse_semag(self, str, mass):
     # split = re.split('\s', str.strip())
     reg_book = re.compile(u'ו?(עשין|שם|לאוין)')
     split = re.split(reg_book, str.strip())
     # str_list = filter(None, split)
     str_list = filter(None, [item.strip() for item in split])
     resolveds = []
     # it = iter(str_list)
     derabanan_flag = False
     book = None
     for i, word in enumerate(str_list):
         if derabanan_flag:
             derabanan_flag = False
             resolved = self._tracker.resolve(book, [1])
             resolveds.append(resolved)
             continue
         elif re.search(reg_book, word):
             # book = word
             # if book == u'שם':
             #     book = None
             # elif book == u'לאוין':
             #     book = u'Sefer Mitzvot Gadol, Volume One'
             try:
                 if word != u'שם':
                     derabanan = filter(None, [item.strip() for item in re.split(u'(מד"ס|מ?דרבנן)',str_list[i+1].strip())])
             except IndexError:
                 # mass.ErrorFile.write('error smg, no place in book notation')
                 mass.error_flag = 'error smg, no place in book notation'
                 print 'error smg, no place in book notation'
                 return
             if word == u'עשין' and len(derabanan) > 1 and (derabanan[0] != u"סימן"):
                 book = re.search(u'[א-ה]',derabanan[1])
                 # print book.group(0)
                 book = self._table[book.group(0)]
                 derabanan_flag = True
             elif re.match(reg_book, word):
                 book = self._table[word]
             else:
                 mass.ErrorFile.write("error smg, don't recognize book name")
                 print "error smg, don't recognize book name", book
                 return
         else:
             mitzva = re.split('\s', word)
             for m in mitzva:
                 # if m == u'סימן':
                 #     continue
                 if m == u'שם':
                     m = None
                 elif getGematriaVav(m):
                     m = getGematriaVav(m)
                 else:
                     m = None
                 resolved = self._tracker.resolve(book, [m])
                 resolveds.append(resolved)
     if not resolveds:
         resolved = self._tracker.resolve(book, [None])
         resolveds.append(resolved)
     # print resolveds
     return resolveds
开发者ID:JonMosenkis,项目名称:Sefaria-Data,代码行数:59,代码来源:basic_ein_parser.py


示例5: parse_semag

    def parse_semag(self, str, mass):
        reg_book = re.compile(u'ו?ב?(עשין|שם|לאוין|לאין)')
        split = re.split(reg_book, str.strip())
        str_list = filter(None, [item.strip() for item in split])
        resolveds = []
        derabanan_flag = False
        book = None
        reg_siman = u"סי'?|סימן"
        reg_vav = u'ו{}'.format(reg_siman)
        for i, word in enumerate(str_list):
            if derabanan_flag:
                derabanan_flag = False
                # resolved = self._tracker.resolve(book, [1])
                resolved = resolveExceptin(self._tracker, book, [1])
                resolveds.append(resolved)
                continue
            elif re.search(reg_book, word):
                try:
                    if word != u'שם':
                        derabanan = filter(None, [item.strip() for item in re.split(u'(מד"ס|מ?דרבנן)',str_list[i+1].strip())])
                except IndexError:
                    mass.write_shgia('error smg, no place in book notation')
                    return
                if word == u'עשין' and len(derabanan) > 1:
                    book = re.search(u'[א-ה]',derabanan[1])
                    book = self._table[book.group(0)]
                    derabanan_flag = True
                elif re.match(reg_book, word):
                    book = self._table[word]
                else:
                    mass.write_shgia("error smg, don't recognize book name")
                    return
            else:
                mitzva = re.split('\s', word)
                for m in mitzva:
                    if re.search(reg_vav, m) and not book:
                        # resolved = self._tracker.resolve(book, [None])
                        resolved = resolveExceptin(self._tracker, book, [None])
                        resolveds.append(resolved)

                    if m == u'ו?שם':
                        m = None
                    elif re.search(reg_siman, m):
                        continue
                    elif getGematriaVav(m, mass):
                        m = getGematriaVav(m, mass)
                    else:
                        m = None
                    # resolved = self._tracker.resolve(book, [m])
                    resolved = resolveExceptin(self._tracker, book, [m])
                    resolveds.append(resolved)
        if not resolveds:
            # resolved = self._tracker.resolve(book, [None])
            resolved = resolveExceptin(self._tracker, book, [None])

            resolveds.append(resolved)
        if len([item for item in resolveds if not isinstance(item, Ref)]) > 0:
            mass.write_shgia(u'error from ibid in Ref or table none problem')
        return resolveds
开发者ID:JonMosenkis,项目名称:Sefaria-Data,代码行数:59,代码来源:ein_parser.py


示例6: get_text_completion_list

 def get_text_completion_list(self):
     # words = self.toPlainText().split(QRegExp("[^a-zA-Z0-9_]"),
     #                           QString.SkipEmptyParts)
     words = regex.split("\W+", unicode(self.toPlainText()), flags=regex.UNICODE)
     word_till_cursor = unicode(self.word_till_cursor())
     word_under_cursor = unicode(self.word_under_cursor())
     # words.removeDuplicates()
     # words.sort()
     words = sorted(set(words))
     completion_list = []
     completion_list_not_start_with = []
     for word in words:
         if (
             word != word_till_cursor
             and word != word_under_cursor
             and word.upper().find(word_till_cursor.upper()) == 0
         ):
             completion_list.append(word)
         elif word != word_till_cursor and len(word) > len(word_till_cursor):
             words_till_cursor = [x for x in word_till_cursor.split("_") if x != "" and len(x) >= 2]
             matches = 0
             for word_tc in words_till_cursor:
                 if word.upper().find(word_tc.upper()) >= 0:
                     matches += 1
             if matches == len(words_till_cursor):
                 completion_list.append(word)
             elif matches * 1.20 >= len(words_till_cursor):
                 completion_list_not_start_with.append(word)
     return (
         super(WithWordCompletionMulty_, self).get_text_completion_list()
         + completion_list
         + completion_list_not_start_with
     )
开发者ID:jleahred,项目名称:miow,代码行数:33,代码来源:Completion.py


示例7: split

 def split(self, pattern, maxsplit=0, flags=0, out_lvs=[''], dupe=False):
     split_list = re.split(pattern, str(self), maxsplit, flags)
     out_lvs = [out_lv if out_lv else self.lv for out_lv in out_lvs]
     out_lvs += [out_lvs[-1]] * (len(split_list) - len(out_lvs))
     if dupe:
         split_list += [split_list[-1]] * (len(out_lvs) - len(split_list))
     return [Ex(split_string, lv) for split_string, lv in zip(split_list, out_lvs)]
开发者ID:longnow,项目名称:panlex-tools,代码行数:7,代码来源:panlex.py


示例8: tokenize_words_old

def tokenize_words_old(str):
    str = str.replace(u"־"," ")
    str = re.sub(r"</?[^>]+>","",str) #get rid of html tags
    str = re.sub(r"\([^\(\)]+\)","",str) #get rid of refs
    str = str.replace('"',"'")
    word_list = filter(bool,re.split(r"[\s\:\-\,\.\;\(\)\[\]\{\}]",str))
    return word_list
开发者ID:JonMosenkis,项目名称:Sefaria-Data,代码行数:7,代码来源:mesorat_hashas.py


示例9: read_list

def read_list(filenames, listname, domlst, ip4lst, ip6lst, rxlst):
    for filename in filenames:
        lines = get_lines(filename, listname)

        if lines:
            count = 0
            for line in lines:
                count += 1
                entry = regex.split('\s*#\s*', line.replace('\r', '').replace('\n', ''))[0].strip() # Strip comments and line-feeds
                if entry:
                    if is_ip4.search(entry):
                        ip4lst[entry] = entry

                    elif is_ip6.search(entry):
                        ip6lst[entry] = entry.lower()

                    elif is_dom.search(entry):
                        domlst[entry.strip('.').lower() + '.'] = entry.lower()
                        #if tldextract.extract(entry)[2]:
                        #    domlst[entry.strip('.').lower() + '.'] = entry
                        #else:
                        #    log_err('LIST [#{0}]: Invalid TLD: \"{1}\"'.format(count, line))

                    else:
                        try:
                            rx = regex.compile(entry, regex.I) # To test/validate
                            #rxlst[rx] = entry
                            rxlst.add(entry)
                        except BaseException as err:
                            log_err('LIST [#{0}]: Invalid Syntax: \"{1}\"'.format(count, line))
        else:
            log_err('LIST: Empty file \"{0}\"'.format(filename))

    log_info('LIST-TOTALS [{0}]: {1} Domains, {2} IPv4-Addresses, {3} IPv6-Addresses and {4} Regexes'.format(listname, len(domlst), len(ip4lst), len(ip6lst), len(rxlst)))
    return domlst, ip4lst, ip6lst, rxlst
开发者ID:cbuijs,项目名称:unbound-dns-filter,代码行数:35,代码来源:unbound-dns-filter.py


示例10: read_dst

def read_dst(
        filename
        ):
    """
    Function reads files in Phylip dst-format.

    Parameters
    ----------
    filename : string
        Name of the file which should have the extension ``dst``.

    Returns
    -------
    data : tuple
        A tuple consisting of a list of taxa and a matrix.

    """

    try:
        f = open(filename)
    except:
        print("[!] Could not find the file {0}!".format(filename))

    taxa,matrix = [],[]
    
    
    for i,line in enumerate(f):
        if i > 0:
            taxa.append(line[0:10].strip())
            matrix.append([float(i) for i in
                re.split('\s+',line[11:].strip())])

    return taxa,matrix
开发者ID:RichardLitt,项目名称:lingpy,代码行数:33,代码来源:phylip.py


示例11: __get_codon_usage

    def __get_codon_usage(self):
        '''Gets the codon usage table for a given taxonomy id.'''
        aa_to_codon_prob = {aa_code: {} for aa_code in AA_CODES.values()}

        url = 'http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=' \
            + self.__taxonomy_id + '&aa=1&style=GCG'

        in_codons = False

        for line in urllib2.urlopen(url):
            if line == '<PRE>\n':
                in_codons = True
            elif line == '</PRE>\n':
                break
            elif in_codons:
                values = re.split('\\s+', line)

                if values[0] in AA_CODES:
                    codon_prob = aa_to_codon_prob[AA_CODES[values[0]]]
                    codon_prob[values[1]] = float(values[3])

        aa_to_codon_prob.update((x, _scale(y))
                                for x, y in aa_to_codon_prob.items())

        return aa_to_codon_prob
开发者ID:synbiochem,项目名称:synbiochem-py,代码行数:25,代码来源:seq_utils.py


示例12: separate_string

def separate_string(string):
    """
    >>> separate_string("test <2>")
    (['test ', ''], ['2'])
    """
    string_list = regex.split(r'<(?![!=])', regex.sub(r'>', '<', string))
    return string_list[::2], string_list[1::2]  # Returns even and odd elements
开发者ID:aisbaa,项目名称:reparse,代码行数:7,代码来源:util.py


示例13: wptexturize

    def wptexturize(self, text):
        # Transform into regexp sub-expression used in _wptexturize_pushpop_element
        # Must do this every time in case plugins use these filters in a context sensitive manner
        no_texturize_tags = '(' + '|'.join(self.default_no_texturize_tags) + ')'
        no_texturize_shortcodes = '(' + '|'.join(self.default_no_texturize_shortcodes) + ')'

        no_texturize_tags_stack = []
        no_texturize_shortcodes_stack = []

        # PHP: Since Python doesn't support PHP's /U modifier (which inverts quantifier's greediness), I modified the regular expression accordingly
        textarr = regex.split('(<.*?>|\[.*?\])', text, flags=regex.DOTALL)

        result = []
        for curl in textarr:
            if len(curl) == 0:
                continue

            # Only call _wptexturize_pushpop_element if first char is correct tag opening
            first = curl[0]
            if '<' == first:
                self.__wptexturize_pushpop_element(curl, no_texturize_tags_stack, no_texturize_tags, '<', '>')
            elif '[' == first:
                self.__wptexturize_pushpop_element(curl, no_texturize_shortcodes_stack, no_texturize_shortcodes, '[', ']')
            elif len(no_texturize_shortcodes_stack) == 0 and len(no_texturize_tags_stack) == 0:
                # This is not a tag, nor is the texturization disabled static strings
                for search, replacement in self.static:
                    curl = curl.replace(search, replacement)
                # regular expressions
                for search, replacement in self.dynamic:
                    curl = regex.sub(search, replacement, curl)
            curl = regex.sub('&([^#])(?![a-zA-Z1-4]{1,8};)', '&#038;\\1', curl)
            result.append(curl)
        return ''.join(result)
开发者ID:ChillarAnand,项目名称:plugins,代码行数:33,代码来源:default_filters.py


示例14: split

    def split(pattern, string, maxsplit=0, flags=0, concurrent=None, **kwargs):
        """Wrapper for split."""

        return regex.split(
            _apply_search_backrefs(pattern, flags), string,
            maxsplit, flags, concurrent, **kwargs
        )
开发者ID:qhw0820,项目名称:BracketHighlighter,代码行数:7,代码来源:bregex.py


示例15: get_lang_class

def get_lang_class(name):
    if name in LANGUAGES:
        return LANGUAGES[name]
    lang = re.split('[^a-zA-Z0-9]', name, 1)[0]
    if lang not in LANGUAGES:
        raise RuntimeError('Language not supported: %s' % name)
    return LANGUAGES[lang]
开发者ID:kunbud1989,项目名称:spaCy,代码行数:7,代码来源:util.py


示例16: fileiter

def fileiter(src, ext=None, rex=None):
    """Iterate over files starting at src.

    ext can be a string of space-seperated extensions. Or it can be an
    empty string. The empty string only matches files with no extension.

    rex should be a regular expression object or pattern. Only files which
    produce a match will be returned.
    """
    if ext is not None and ext != '':
        ext = regex.split(r'[, ]+', ext)
    if rex is not None and type(rex) is str:
        rex = regex.compile(rex)
    extrex = regex.compile(r'.*\.(.*)')
    for dirpath, dirnames, filenames in os.walk(src):
        for infile in (os.path.join(dirpath, a) for a in filenames):
            if ext is not None:
                m = extrex.search(infile)
                if m is None:
                    if ext != '':
                        continue
                else:
                    if m[1] not in ext:
                        continue
            if rex is not None:
                if rex.search(m) is None:
                    continue
            yield infile
开发者ID:jgpacker,项目名称:suttacentral,代码行数:28,代码来源:helpers.py


示例17: Populate

    def Populate(self, filename):
        with open(filename) as f:
            for num, line in enumerate(f, 1):
                if num == 1:
                    self.Title = line
                    continue

                match = regex_acte.match(line)
                if match:
                    cur_acte = Acte(match.group("NB_Acte").strip())
                    self.Acts.append(cur_acte)

                else:
                    match = regex_scene.match(line)
                    if match:
                        cur_scene = Scene(match.group("NB_Scene").strip())
                        cur_acte.Scenes.append(cur_scene)

                    else:
                        match = regex_character.match(line)
                        if match:
                            info = [i.replace('\n', '')
                                    for i in re.split(", |\. ", line)]
                            personnage = [x for x in info if x.isupper()]
                            didascalie = [x for x in info if not x.isupper()]
                            self.Characters.update(personnage)
                            cur_replique = Replique(personnage, didascalie)
                            cur_scene.Repliques.append(cur_replique)
                        else:
                            if line and 'cur_replique' in locals():
                                cur_replique.text += line
开发者ID:simson,项目名称:repetiteur,代码行数:31,代码来源:text_reader.py


示例18: __init__

    def __init__(self, text, window, lang_en, def_counter):
        """
        :param text:
           Complete tale/story
        :param window:
            Story_UI window
        """
        QtCore.QObject.__init__(self)
        self.word_list = re.split('\s', text)
        self.window = window
        self.sentence_list = regex.split("(?V1)(?<=\.|:|;|-|,|\!)", text)
        self.sentence_list = self.join_short_sentences()
        self.keyword_list = []
        self.timing_list = []

        self.pool = ThreadPool(4)
        self.keyword_list = self.pool.map(derive_keyword, self.sentence_list)
        self.pool.close()
        self.pool.join()

        self.audio_service = AudioService(window)
        self.audio_thread = threading.Thread(target=self.audio_service.prepare_voice,
                                             args=(self.sentence_list, def_counter))
        self.audio_thread.setDaemon(True)
        self.audio_thread.start()
        self.image_thread = threading.Thread(target=image_from_keyword_list, args=(self.keyword_list, window, lang_en))
        self.image_thread.setDaemon(True)
        self.image_thread.start()
开发者ID:nichtawitz,项目名称:Real-Time-Story-Illustrator,代码行数:28,代码来源:text_service.py


示例19: node_volumes

 def node_volumes(node, volume):
     book = re.split('\s', node.full_title())
     book = book[1]
     if volume == u'א':
         title = u'I ' + book
         return library.get_schema_node(title)
     return library.get_schema_node(u'II ' + book)
开发者ID:JonMosenkis,项目名称:Sefaria-Data,代码行数:7,代码来源:ibid.py


示例20: siman_smk_exctractor

def siman_smk_exctractor(smk_text):

    split = re.split(u'\s', smk_text)
    simanim = []
    for word in split:
        if not word or word == u'סימן' or word == u'סעיף':
            continue
        word = re.sub(u"[;.,']", u"", word)
        if re.search(u'-', word):
            borders = re.search(u"(.*?)-(.*)", word)
            start = getGematria(borders.group(1))
            end = getGematria(borders.group(2))
            for siman in range(start, end+1):
                simanim.append(siman)
        if not is_hebrew_number(word):
            if not check_vav(word):
                # print smk_text, simanim
                return simanim
            else:
                simanim.append(check_vav(word))
        else:
            smk_siman = getGematria(word)
            simanim.append(smk_siman)
    # print smk_text, simanim
    return simanim
开发者ID:JonMosenkis,项目名称:Sefaria-Data,代码行数:25,代码来源:mitzvot_link_scraper.py



注:本文中的regex.split函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python regex.sub函数代码示例发布时间:2022-05-26
下一篇:
Python regex.search函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap