• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python re.sub函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中re.sub函数的典型用法代码示例。如果您正苦于以下问题:Python sub函数的具体用法?Python sub怎么用?Python sub使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了sub函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: _parse_productions

    def _parse_productions(self):
        """
        Parse the current contents of the textwidget buffer, to create
        a list of productions.
        """
        productions = []

        # Get the text, normalize it, and split it into lines.
        text = self._textwidget.get('1.0', 'end')
        text = re.sub(self.ARROW, '->', text)
        text = re.sub('\t', ' ', text)
        lines = text.split('\n')

        # Convert each line to a CFG production
        for line in lines:
            line = line.strip()
            if line=='': continue
            productions += parse_cfg_production(line)
            #if line.strip() == '': continue
            #if not CFGEditor._PRODUCTION_RE.match(line):
            #    raise ValueError('Bad production string %r' % line)
            #
            #(lhs_str, rhs_str) = line.split('->')
            #lhs = Nonterminal(lhs_str.strip())
            #rhs = []
            #def parse_token(match, rhs=rhs):
            #    token = match.group()
            #    if token[0] in "'\"": rhs.append(token[1:-1])
            #    else: rhs.append(Nonterminal(token))
            #    return ''
            #CFGEditor._TOKEN_RE.sub(parse_token, rhs_str)
            #
            #productions.append(Production(lhs, *rhs))

        return productions
开发者ID:sfu-natlang,项目名称:nltk,代码行数:35,代码来源:cfg.py


示例2: getPanelInfo

 def getPanelInfo(self, doc, strXPath):
     try:
         npos = doc.text_content().find(strXPath)
         if npos == -1:
             return ""
         strContent = doc.text_content()[npos:-1]
         npos = strContent.find("})")
         if npos == -1:
             return ""
         strContent = strContent[0:npos+1]
         strContent = (strContent[strContent.find("\"html\":\"")+8:-4])
         if "v2" in self.xpathType:
             strContent = strContent.decode('unicode-escape')
         strContent = re.sub(r"(\\n)*(\\t)*(\\ /)*(\\)*", "", strContent)
         strContent = re.sub(r"\\/", "/", strContent)
         if strContent:
             strContent = strContent.replace("&lt;", "<").replace("&gt;", ">").replace("nbsp;", "")
         else:
             return ""
     except Exception:
         s=sys.exc_info()
         msg = (u"getPanelInfo Error %s happened on line %d" % (s[1],s[2].tb_lineno))
         logger.error(msg)
         return ""
     return strContent
开发者ID:CnPaMeng,项目名称:WeiboMsgBackupGUI,代码行数:25,代码来源:msgcomcrawler.py


示例3: GetBook

    def GetBook(self, book):
        self.footnotes=[]
        self.content=[]
        counter = 1
        plainBook = unicodeToPlain(book)
        while True:
            url='http://www.biblia.deon.pl/otworz.php'
            values={'ksiega': book.encode('iso8859_2'),
              'rozdzial': str(counter)}
            data=urllib.urlencode(values)
            response = urllib2.urlopen(urllib2.Request(url, data)).read()
            doc = html.fromstring(response)

            if counter == 1:
                BookTitle = (doc.findall('.//span[@style="font-size:22px;"]')[0])
                self.content.append(re.sub(r'</span>', r'</div>', re.sub(r'<span style=\"font-size:22px;\"',r'<br><br><a name="K' + plainBook + r'"></a><div class="tytul"', html.tostring(BookTitle))))
                ChaptersInBook = len(doc.findall('.//select[@name="rozdzial"]/option'))
            else:
                self.content.append('<br><br>')

            plainPrefix = plainBook + str(counter)
            self.content.append('<div class="numer">' + str(counter) + '</div>')
            Book.GetContent(self, doc.xpath('//div[@class="tresc"]')[0], plainPrefix)
            Book.GetFootnotes(self, doc.xpath('//td[@width="150"]/table/tr[5]/td/div[1]')[0], plainPrefix, unicodeToReference(book) + ' ' + str(counter))

            if counter == ChaptersInBook:
                self.content.append('<br><br>' + "".join(self.footnotes))
                break
            counter += 1
开发者ID:kamm,项目名称:project_b,代码行数:29,代码来源:project_b.py


示例4: extract_bow_v2_features

def extract_bow_v2_features(train, test, test_contains_labels = False):
    '''
    Performs feature extraction for another simple tfidf model used for 
    ensembling purposes.
    '''
    s_data = []
    s_labels = []
    t_data = []
    t_labels = []
    stemmer = PorterStemmer()    
    
    for i, row in train.iterrows():
        s=(" ").join(["q"+ z for z in BeautifulSoup(train["search_term"][i], "lxml").get_text(" ").split(" ")]) + " " + (" ").join(["z"+ z for z in BeautifulSoup(train.product_title[i], "lxml").get_text(" ").split(" ")]) + " " + BeautifulSoup(train.product_description[i], "lxml").get_text(" ")
        s=re.sub("[^a-zA-Z0-9]"," ", s)
        s= (" ").join([stemmer.stem(z) for z in s.split(" ")])
        s_data.append(s)
        s_labels.append(str(train["relevance"][i]))
    for i, row in test.iterrows():
        s=(" ").join(["q"+ z for z in BeautifulSoup(test["search_term"][i], "lxml").get_text().split(" ")]) + " " + (" ").join(["z"+ z for z in BeautifulSoup(test.product_title[i], "lxml").get_text().split(" ")]) + " " + BeautifulSoup(test.product_description[i], "lxml").get_text()
        s=re.sub("[^a-zA-Z0-9]"," ", s)
        s= (" ").join([stemmer.stem(z) for z in s.split(" ")])
        t_data.append(s)
        if test_contains_labels:
            t_labels.append(str(test["relevance"][i]))
            
    return (s_data, s_labels, t_data, t_labels)
开发者ID:pringlesLion,项目名称:mycs231n,代码行数:26,代码来源:extract.py


示例5: gen_xkcd_sub

def gen_xkcd_sub(msg, hook=False):
    # http://xkcd.com/1288/
    substitutions = {
        'witnesses': 'these dudes I know',
        'allegedly': 'kinda probably',
        'new study': 'tumblr post',
        'rebuild': 'avenge',
        'space': 'SPAAAAAACCCEEEEE',
        'google glass': 'virtual boy',
        'smartphone': 'pokedex',
        'electric': 'atomic',
        'senator': 'elf-lord',
        'car': 'cat',
        'election': 'eating contest',
        'congressional leaders': 'river spirits',
        'homeland security': 'homestar runner',
        'could not be reached for comment': 'is guilty and everyone knows it'
    }
    # http://xkcd.com/1031/
    substitutions['keyboard'] = 'leopard'
    # http://xkcd.com/1418/
    substitutions['force'] = 'horse'
    output = msg
    if not hook or random() < 0.001 or True:
        for text, replacement in substitutions.items():
            if text in output:
                output = re.sub(r"\b%s\b" % text, replacement, output)

    output = re.sub(r'(.*)(?:-ass )(.*)', r'\1 ass-\2', output)
    if msg == output:
        return None if hook else msg
    else:
        return output
开发者ID:tjcsl,项目名称:cslbot,代码行数:33,代码来源:textutils.py


示例6: classifyText

def classifyText( text, params ):
	start_time = params.my_time()
	#clean
	try: text = params.cleaner.clean_html( text )
	except: pass

	text = re.sub('<.*?>', ' ', text )
	text = re.sub('\s+', ' ', text )
	text = text.lower()

	#Tokenize
	tokens = re.findall('[a-z]+', text )

	#Remove stop words
	tokens_2 = []
	for t in tokens:
		if( not t in params.stopword_list ): tokens_2.append(t)

#	print tokens_2

	#Stem
	stems = []
	for t in tokens_2:
		stem = params.porterStemmer.stem( t, 0, len(t)-1 )
		stems.append(stem)

	z = 0#params.linear_classifier['{{intercept}}']+.6
	for s in stems:
		if s in params.linear_classifier:
#			print s, params.linear_classifier[s]
			z += params.linear_classifier[s]

	end_time = params.my_time()
	return ( z<0, [start_time, end_time, len(stems), z, 1/(1+math.exp(-z)), int(z>0)] )
开发者ID:dustin-stoltz,项目名称:snowcrawl,代码行数:34,代码来源:test3_processor.py


示例7: htmlify

 def htmlify (self, text):
     t=text.strip()
     #t=xml.sax.saxutils.escape(t)
     t="<p>%s</p>"%t
     t=re.sub('\n\n+','</p><p>',t)
     t=re.sub('\n','<br>',t)
     return t
开发者ID:ChrisWellington,项目名称:gourmet,代码行数:7,代码来源:html_exporter.py


示例8: convert_corpus

def convert_corpus(filepath, mapping, alignment, begin="xxBeGiN142xx", end="xxEnD142xx"):
    general_corpus = ''
    with open(filepath, 'rb') as f:
        general_corpus = re.sub('(' + begin + '\W+)+', ' . ', f.read())
        general_corpus = re.sub('\n+', ' this_is_n3wline ', general_corpus)

    corpus = []
    for token in general_corpus.split():
        if token.strip() == '.':
            if len(corpus) > 0:
                if '\n' not in corpus[-1]:
                    # If the token is punctuation assign a random punctuation.
                    corpus[-1] = corpus[-1] + random.choice(['.', '.', '.' , ',', ',' ',', '!', '?'])
        elif token.strip() == 'this_is_n3wline':
            corpus[-1] = corpus[-1] + '.\n\n'
        elif alignment[token] in mapping:
            if  len(corpus) > 0 and re.search('[\n\.!?]',corpus[-1]):
                corpus.append(mapping[alignment[token]].capitalize().strip())
            else:
                corpus.append(mapping[alignment[token]].strip())
    corpus[0] = corpus[0].capitalize()
    output = ' '.join(corpus)
    output = re.sub(r' +', ' ', output)
    output = re.sub(r'\n+ ', '\n\n', output)
    return output
开发者ID:lizrush,项目名称:shorties,代码行数:25,代码来源:convert_vocab.py


示例9: getCategoryUrl

def getCategoryUrl(site="",url=""):
    catDb = openTable(tableName=global_setting['catTable'])
    r = session.get(url)
    if not r.text:
        return False

    soup = BeautifulSoup(r.text)
    for level1 in soup.select('.classify_books'):
        curLevel1 = level1.select('.classify_title')[0].text
        curLevel1 = re.sub('\s', '', curLevel1)
        for level2 in level1.select('.classify_kind'):
            curLevel2 = level2.select('.classify_kind_name')[0].text
            curLevel2 = re.sub('\s', '', curLevel2)
            for level3 in level2.select('ul li a'):
                #curLevel3 = re.sub('\s', '', level3.text)
                curLevel3 =  level3.text.strip()
                curlUrl = level3['href']
                retFind = re.findall(r'\/cp(.*)\.html',curlUrl)
                if retFind:
                    curCatID = retFind[0]
                    catType = 'book'
                else:
                    retFind = re.findall(r'\/cid(.*)\.html',curlUrl)
                    if retFind:
                        curCatID = retFind[0]
                        catType = 'nonbook'
                if retFind:
                    if catDb.find({'catId':curCatID}).count() >0:
                        logger.debug('catetogy %s exists,skip\n'%(curCatID))
                    else:
                        catDb.insert({'catId':curCatID,'level1':curLevel1, 'level2':curLevel2, 'level3':curLevel3, 'catUrl':curlUrl,'catType':catType, 'site':site})
    return True
开发者ID:Neilfu,项目名称:NLP,代码行数:32,代码来源:getEcommence_dangdang.py


示例10: obfuscate_codeblocks

def obfuscate_codeblocks(source):
  """Method for obfuscating codeblocks contents.

  It can be often useful to temporarly obfuscate codeblocks contents for performing safely some tasks
  and then re-introducing them.

  Parameters
  ----------
  source : str
    string (as single stream) containing the source

  Returns
  -------
  protected_contents : list
    list of str containing the contents of codeblocks
  str
    source with codeblocks contents obfuscated and replaced by a safe placeholder

  >>> source = '``` my code block ``` other contents'
  >>> prot, ob_source = obfuscate_codeblocks(source)
  >>> prot[0][2]
  '``` my code block ```'
  >>> ob_source
  '$PROTECTED-1 other contents'
  """
  obfuscate_source = source
  protected_contents = []
  for match in re.finditer(__regex_codeblock__,obfuscate_source):
    protected_contents.append([match.start(),match.end(),match.group()])
    obfuscate_source = re.sub(__regex_codeblock__,'$PROTECTED-'+str(len(protected_contents)),obfuscate_source,1)
  for match in re.finditer(__regex_codeblock_html__,obfuscate_source):
    protected_contents.append([match.start(),match.end(),match.group()])
    obfuscate_source = re.sub(__regex_codeblock_html__,'$PROTECTED-'+str(len(protected_contents)),obfuscate_source,1)
  return protected_contents,obfuscate_source
开发者ID:nunb,项目名称:MaTiSSe,代码行数:34,代码来源:source_editor.py


示例11: makeIdentifier

 def makeIdentifier(self, string):
   string = re.sub( r"\s+", " ", string.strip())
   string = unicodedata.normalize('NFKD', safeEncode(string))
   string = re.sub(r"['\"[email protected]#$&%^*\(\)_+\.,;:/]","", string)
   string = re.sub(r"[_ ]+","_", string)
   string = string.strip('_')
   return string.strip().lower()
开发者ID:RussianPlex,项目名称:WikipediaRu,代码行数:7,代码来源:__init__.py


示例12: _sanitize

 def _sanitize(self, data):
     retv = ''
     if data.find('\x1b') != -1:
         tmp = filter(lambda x: x in string.printable, data)
         retv += re.sub('(\{|\}|\*|\%)', '', re.sub('\[[0-9\;]+m', '', tmp))
         return retv
     return data
开发者ID:fuzzy,项目名称:abused,代码行数:7,代码来源:emerge.py


示例13: _clean_text

    def _clean_text(self, text):
        """ Cleans up text before we make it into an HTML tree:
            1. Nukes <![CDATA stuff.
            2. Nukes XML encoding declarations
            3. Replaces </br> with <br/>
            4. Nukes invalid bytes in input
            5. ?
        """
        # Remove <![CDATA because it causes breakage in lxml.
        text = re.sub(r"<!\[CDATA\[", u"", text)
        text = re.sub(r"\]\]>", u"", text)

        # Remove <?xml> declaration in Unicode objects, because it causes an error:
        # "ValueError: Unicode strings with encoding declaration are not supported."
        # Note that the error only occurs if the <?xml> tag has an "encoding"
        # attribute, but we remove it in all cases, as there's no downside to
        # removing it. This moves our encoding detection to chardet, rather than
        # lxml.
        if isinstance(text, unicode):
            text = re.sub(r"^\s*<\?xml\s+.*?\?>", "", text)

        # Fix </br>
        text = re.sub("</br>", "<br/>", text)

        # Fix invalid bytes (http://stackoverflow.com/questions/8733233/filtering-out-certain-bytes-in-python)
        text = re.sub(u"[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\u10000-\u10FFFF]+", "", text)

        return text
开发者ID:m4h7,项目名称:juriscraper,代码行数:28,代码来源:AbstractSite.py


示例14: sendGPS

    def sendGPS(self, head, cmd):
        maxcnt = 100
        string = head + cmd
        res = ""
        print "GPS SEND: '%s'" % string

        self.dev.flushInput()
        self.dev.write(string + "\r\n")

        for j in xrange(maxcnt):
          res = self.dev.readline()
          if len(res) > 0:
            res = re.sub("^\s+", "", res)
            res = re.sub("\s+$", "", res)
            print "RAW GPS REPLY: '%s'" % res

            pos = res.find(head)
            if pos != -1:
              res = res[pos:].split("*")[0]
              print "GPS REPLY: '%s'" % res
              return res
          else:
            print "ZERO REPLY"
            return None

        print "sendGPS: FAILED: '%s'" % res
开发者ID:ghalfacree,项目名称:clock-tamer,代码行数:26,代码来源:tamerdevice.py


示例15: main

def main():
    cur_dir = os.path.dirname(__file__)
    os.chdir(os.path.join(cur_dir, ".."))
    modules = sys.argv[1:]

    if not modules:
        modules = ['django_evolution']

    p = subprocess.Popen(['pyflakes'] + modules,
                         stderr=subprocess.PIPE,
                         stdout=subprocess.PIPE,
                         close_fds=True)

    contents = p.stdout.readlines()
    # Read in the exclusions file
    exclusions = {}
    fp = open(os.path.join(cur_dir, "pyflakes.exclude"), "r")

    for line in fp.readlines():
        if not line.startswith("#"):
            exclusions[line.rstrip()] = 1

    fp.close()

    # Now filter thin
    for line in contents:
        line = line.rstrip()
        test_line = re.sub(r':[0-9]+:', r':*:', line, 1)
        test_line = re.sub(r'line [0-9]+', r'line *', test_line)

        if test_line not in exclusions:
            print line
开发者ID:Aeron,项目名称:django-evolution,代码行数:32,代码来源:run-pyflakes.py


示例16: start

 def start(self):
     keyword = getattr(settings, 'USHAHIDI_KEYWORD', '')
     self.default_response = getattr(settings, 'USHAHIDI_RESPONSE', 'Thank you for your report.')
     self.error_response = getattr(settings, 'USHAHIDI_ERROR', "Due to some error, we're unable to process your message. Please resend.")
     self.pattern = re.compile(r"^\s*(?:%s)(?:[\s,;:]+(.+))?$" % (keyword))
     self.trigger_url = re.sub('\$\{sender_number\}', '%(sender)s', getattr(settings, 'USHAHIDI_TRIGGER_URL'))
     self.trigger_url = re.sub('\$\{message_content\}', '%(message)s', self.trigger_url)
开发者ID:timbaobjects,项目名称:rapidsms-ushahidi-app,代码行数:7,代码来源:app.py


示例17: copy_template

    def copy_template():
        config_prompt(template)
        shutil.copytree(template, name)

        if os.path.exists('%s/%s' % (name, 'config.yaml')):
            os.remove('%s/%s' % (name, 'config.yaml'))

        for dirname, dirnames, files in os.walk(name):
            for d in dirnames:
                if d == options.template:
                    shutil.copytree('%s/%s' % (dirname, d), '%s/%s' % (dirname, name))
                    shutil.rmtree('%s/%s' % (dirname, d))

        for dirname, dirnames, files in os.walk(name):
            for filename in files:
                f = open('%s/%s' % (dirname, filename), 'r')
                lines = f.readlines()
                f.close()

                first_pass = [re.sub('{{\s*(\w+)\s*}}', replace_variable, line) for line in lines]
                new_lines = [re.sub('__config_(\w+)__', replace_variable, line) for line in first_pass]

                f = open('%s/%s' % (dirname, filename), 'w')
                f.write(''.join(new_lines))
                f.close()
开发者ID:rlayte,项目名称:kickstart,代码行数:25,代码来源:kickstart.py


示例18: parse_list

 def parse_list(self, page):
     # Remove null bytes
     page = re.sub(r'\0', r' ', page)
     # Remove sequences of '''''''
     page = re.sub(r"'+", "'", page)
     reader = csv.DictReader(StringIO(page), quoting=csv.QUOTE_ALL, escapechar='\\')
     # There is one row in the data for each violation, not just each
     # inspection. Violations from the same inspection will be contiguous,
     # so roll up the violations until we see a different inspection.
     current_record = None
     for row in reader:
         if row['CITY'] != 'CHARLOTTE':
             continue
         row['comments'] = []
         # Strip any leading zeros. Both 01 and 1 appear sometimes, but
         # they mean the same thing.
         item_id = row['ITEM_NUM'].lstrip('0')
         violation = {'id': item_id, 'value': row['ITEM_VALUE'], 'comment': row['COMMENT']}
         if current_record is None:
             current_record = row
             current_record['violation'] = [violation]
         elif current_record['FAC_NAME'] != row['FAC_NAME'] or current_record['DATE'] != row['DATE']:
             yield current_record
             current_record = row
             current_record['violation'] = [violation]
         else:
             current_record['violation'].append(violation)
     # The final record won't be yielded from the loop above because it has
     # no following record to trigger it, so yield it here.
     yield current_record
开发者ID:frankk00,项目名称:openblock,代码行数:30,代码来源:retrieval.py


示例19: clean_word

def clean_word(word):
    """Removes any potential non-word characters"""
    word = re.sub("[0-9]* ", "", word)
    word = re.sub("[\s]*", "", word)
    word = word.replace('\n', '')
    word = word.replace('\r', '')
    return word
开发者ID:vincent-ferotin,项目名称:PhiloLogic4-WSGI,代码行数:7,代码来源:format.py


示例20: main

def main(argv):
    (type, address, action, key, value) = parse_cli()

    base_url = ''
    address = re.sub('http://', '', address)
    if type == 'etcd':
        base_url = address + '/v2/keys/'
    elif type == 'consul':
        base_url = address + '/v1/kv/'

    base_url = re.sub('\/+', '/', base_url)

    base_url = 'http://' + base_url
    base_url = re.sub('\/+$', '', base_url)

    if action.lower() == 'set':
        set_key_value(base_url, key, value)
    elif action.lower() == 'get':
        value = parse_value(get_key_value(base_url, key), type)

        if value is not None:
            print value

    elif action.lower() == 'delete':
        delete_key_value(base_url, key)
开发者ID:amos6224,项目名称:ami-roles,代码行数:25,代码来源:key_value.py



注:本文中的re.sub函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python re.subn函数代码示例发布时间:2022-05-26
下一篇:
Python re.split函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap