本文整理汇总了Python中pywikibot.textlib.replaceExcept函数的典型用法代码示例。如果您正苦于以下问题:Python replaceExcept函数的具体用法?Python replaceExcept怎么用?Python replaceExcept使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了replaceExcept函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: translateAndCapitalizeNamespaces
def translateAndCapitalizeNamespaces(self, text):
"""Use localized namespace names."""
# arz uses english stylish codes
if self.site.sitename == 'wikipedia:arz':
return text
# wiki links aren't parsed here.
exceptions = ['nowiki', 'comment', 'math', 'pre']
for namespace in self.site.namespaces.values():
if namespace == 0:
# skip main (article) namespace
continue
# a clone is needed. Won't change the namespace dict
namespaces = list(namespace)
if namespace == 6 and self.site.family.name == 'wikipedia':
if self.site.code in ('en', 'fr') and MediaWikiVersion(
self.site.version()) >= MediaWikiVersion('1.14'):
# do not change "Image" on en-wiki and fr-wiki
assert u'Image' in namespaces
namespaces.remove(u'Image')
if self.site.code == 'hu':
# do not change "Kép" on hu-wiki
assert u'Kép' in namespaces
namespaces.remove(u'Kép')
elif self.site.code == 'pt':
# use "Imagem" by default on pt-wiki (per T57242)
assert 'Imagem' in namespaces
namespaces.insert(
0, namespaces.pop(namespaces.index('Imagem')))
# final namespace variant
final_ns = namespaces.pop(0)
if namespace in (2, 3):
# skip localized user namespace, maybe gender is used
namespaces = ['User' if namespace == 2 else 'User talk']
# lowerspaced and underscored namespaces
for i, item in enumerate(namespaces):
item = item.replace(' ', '[ _]')
item = u'[%s%s]' % (item[0], item[0].lower()) + item[1:]
namespaces[i] = item
namespaces.append(first_lower(final_ns))
if final_ns and namespaces:
if self.site.sitename == 'wikipedia:pt' and namespace == 6:
# only change on these file extensions (per T57242)
extensions = ('png', 'gif', 'jpg', 'jpeg', 'svg', 'tiff',
'tif')
text = textlib.replaceExcept(
text,
r'\[\[\s*({}) *:(?P<name>[^\|\]]*?\.({}))'
r'(?P<label>.*?)\]\]'
.format('|'.join(namespaces), '|'.join(extensions)),
r'[[{}:\g<name>\g<label>]]'.format(final_ns),
exceptions)
else:
text = textlib.replaceExcept(
text,
r'\[\[\s*(%s) *:(?P<nameAndLabel>.*?)\]\]'
% '|'.join(namespaces),
r'[[%s:\g<nameAndLabel>]]' % final_ns,
exceptions)
return text
开发者ID:Zeffar,项目名称:Elobot,代码行数:60,代码来源:cosmetic_changes.py
示例2: fixSyntaxSave
def fixSyntaxSave(self, text):
def replace_link(match):
replacement = '[[' + match.group('link')
if match.group('title'):
replacement += '|' + match.group('title')
return replacement + ']]'
exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
'startspace']
# link to the wiki working on
# Only use suffixes for article paths
for suffix in self.site._interwiki_urls(True):
http_url = self.site.base_url(suffix, 'http')
if self.site.protocol() == 'http':
https_url = None
else:
https_url = self.site.base_url(suffix, 'https')
# compare strings without the protocol, if they are empty support
# also no prefix (//en.wikipedia.org/…)
if https_url is not None and http_url[4:] == https_url[5:]:
urls = ['(?:https?:)?' + re.escape(http_url[5:])]
else:
urls = [re.escape(url) for url in (http_url, https_url)
if url is not None]
for url in urls:
# Only include links which don't include the separator as
# the wikilink won't support additional parameters
separator = '?'
if '?' in suffix:
separator += '&'
# Match first a non space in the title to prevent that multiple
# spaces at the end without title will be matched by it
text = textlib.replaceExcept(
text,
r'\[\[?' + url + r'(?P<link>[^' + separator + r']+?)'
r'(\s+(?P<title>[^\s].*?))?\s*\]\]?',
replace_link, exceptions, site=self.site)
# external link in/starting with double brackets
text = textlib.replaceExcept(
text,
r'\[\[(?P<url>https?://[^\]]+?)\]\]?',
r'[\g<url>]', exceptions, site=self.site)
# external link and description separated by a pipe, with
# whitespace in front of the pipe, so that it is clear that
# the dash is not a legitimate part of the URL.
text = textlib.replaceExcept(
text,
r'\[(?P<url>https?://[^\|\] \r\n]+?) +\| *(?P<label>[^\|\]]+?)\]',
r'[\g<url> \g<label>]', exceptions)
# dash in external link, where the correct end of the URL can
# be detected from the file extension. It is very unlikely that
# this will cause mistakes.
extensions = [r'\.{0}'.format(ext)
for ext in ['pdf', 'html?', 'php', 'aspx?', 'jsp']]
text = textlib.replaceExcept(
text,
r'\[(?P<url>https?://[^\|\] ]+?(' + '|'.join(extensions) + r')) *'
r'\| *(?P<label>[^\|\]]+?)\]',
r'[\g<url> \g<label>]', exceptions)
return text
开发者ID:Nivgov,项目名称:AvodatGemer,代码行数:60,代码来源:cosmetic_changes.py
示例3: commonsfiledesc
def commonsfiledesc(self, text):
"""
Clean up file descriptions on the Wikimedia Commons.
It is working according to [1] and works only on pages in the file
namespace on the Wikimedia Commons.
[1]: https://commons.wikimedia.org/wiki/Commons:Tools/pywiki_file_description_cleanup
"""
if self.site.sitename != 'commons:commons' or self.namespace == 6:
return
# section headers to {{int:}} versions
exceptions = ['comment', 'includeonly', 'math', 'noinclude', 'nowiki',
'pre', 'source', 'ref', 'timeline']
text = textlib.replaceExcept(text,
r"([\r\n]|^)\=\= *Summary *\=\=",
r"\1== {{int:filedesc}} ==",
exceptions, True)
text = textlib.replaceExcept(
text,
r"([\r\n])\=\= *\[\[Commons:Copyright tags\|Licensing\]\]: *\=\=",
r"\1== {{int:license-header}} ==", exceptions, True)
text = textlib.replaceExcept(
text,
r"([\r\n])\=\= *(Licensing|License information|{{int:license}}) *\=\=",
r"\1== {{int:license-header}} ==", exceptions, True)
# frequent field values to {{int:}} versions
text = textlib.replaceExcept(
text,
r'([\r\n]\|[Ss]ource *\= *)'
r'(?:[Oo]wn work by uploader|[Oo]wn work|[Ee]igene [Aa]rbeit) *([\r\n])',
r'\1{{own}}\2', exceptions, True)
text = textlib.replaceExcept(
text,
r'(\| *Permission *\=) *(?:[Ss]ee below|[Ss]iehe unten) *([\r\n])',
r'\1\2', exceptions, True)
# added to transwikied pages
text = textlib.replaceExcept(text, r'__NOTOC__', '', exceptions, True)
# tracker element for js upload form
text = textlib.replaceExcept(
text,
r'<!-- *{{ImageUpload\|(?:full|basic)}} *-->',
'', exceptions[1:], True)
text = textlib.replaceExcept(text, r'{{ImageUpload\|(?:basic|full)}}',
'', exceptions, True)
# duplicated section headers
text = textlib.replaceExcept(
text,
r'([\r\n]|^)\=\= *{{int:filedesc}} *\=\=(?:[\r\n ]*)\=\= *{{int:filedesc}} *\=\=',
r'\1== {{int:filedesc}} ==', exceptions, True)
text = textlib.replaceExcept(
text,
r'([\r\n]|^)\=\= *{{int:license-header}} *\=\=(?:[\r\n ]*)'
r'\=\= *{{int:license-header}} *\=\=',
r'\1== {{int:license-header}} ==', exceptions, True)
return text
开发者ID:PersianWikipedia,项目名称:pywikibot-core,代码行数:60,代码来源:cosmetic_changes.py
示例4: fixHtml
def fixHtml(self, text):
"""Relace html markups with wikitext markups."""
def replace_header(match):
"""Create a header string for replacing."""
depth = int(match.group(1))
return r'{0} {1} {0}'.format('=' * depth, match.group(2))
# Everything case-insensitive (?i)
# Keep in mind that MediaWiki automatically converts <br> to <br />
exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
'startspace']
text = textlib.replaceExcept(text, r'(?i)<(b|strong)>(.*?)</\1>',
r"'''\2'''", exceptions, site=self.site)
text = textlib.replaceExcept(text, r'(?i)<(i|em)>(.*?)</\1>',
r"''\2''", exceptions, site=self.site)
# horizontal line without attributes in a single line
text = textlib.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])',
r'\1----\2', exceptions)
# horizontal line with attributes; can't be done with wiki syntax
# so we only make it XHTML compliant
text = textlib.replaceExcept(text, r'(?i)<hr ([^>/]+?)>',
r'<hr \1 />',
exceptions)
# a header where only spaces are in the same line
text = textlib.replaceExcept(
text,
r'(?i)(?<=[\r\n]) *<h([1-7])> *([^<]+?) *</h\1> *(?=[\r\n])',
replace_header,
exceptions)
# TODO: maybe we can make the bot replace <p> tags with \r\n's.
return text
开发者ID:PersianWikipedia,项目名称:pywikibot-core,代码行数:31,代码来源:cosmetic_changes.py
示例5: fixSyntaxSave
def fixSyntaxSave(self, text):
exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
'startspace']
# link to the wiki working on
# TODO: disable this for difflinks and titled links,
# to prevent edits like this:
# https://de.wikipedia.org/w/index.php?title=Wikipedia%3aVandalismusmeldung&diff=103109563&oldid=103109271
# text = textlib.replaceExcept(text,
# r'\[https?://%s\.%s\.org/wiki/(?P<link>\S+)\s+(?P<title>.+?)\s?\]'
# % (self.site.code, self.site.family.name),
# r'[[\g<link>|\g<title>]]', exceptions)
# external link in double brackets
text = textlib.replaceExcept(
text,
r'\[\[(?P<url>https?://[^\]]+?)\]\]',
r'[\g<url>]', exceptions)
# external link starting with double bracket
text = textlib.replaceExcept(text,
r'\[\[(?P<url>https?://.+?)\]',
r'[\g<url>]', exceptions)
# external link and description separated by a dash, with
# whitespace in front of the dash, so that it is clear that
# the dash is not a legitimate part of the URL.
text = textlib.replaceExcept(
text,
r'\[(?P<url>https?://[^\|\] \r\n]+?) +\| *(?P<label>[^\|\]]+?)\]',
r'[\g<url> \g<label>]', exceptions)
# dash in external link, where the correct end of the URL can
# be detected from the file extension. It is very unlikely that
# this will cause mistakes.
text = textlib.replaceExcept(
text,
r'\[(?P<url>https?://[^\|\] ]+?(\.pdf|\.html|\.htm|\.php|\.asp|\.aspx|\.jsp)) *\| *(?P<label>[^\|\]]+?)\]',
r'[\g<url> \g<label>]', exceptions)
return text
开发者ID:skamithi,项目名称:pywikibot-core,代码行数:35,代码来源:cosmetic_changes.py
示例6: fixHtml
def fixHtml(self, text):
# Everything case-insensitive (?i)
# Keep in mind that MediaWiki automatically converts <br> to <br />
exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
'startspace']
text = textlib.replaceExcept(text, r'(?i)<(b|strong)>(.*?)</\1>',
r"'''\2'''", exceptions, site=self.site)
text = textlib.replaceExcept(text, r'(?i)<(i|em)>(.*?)</\1>',
r"''\2''", exceptions, site=self.site)
# horizontal line without attributes in a single line
text = textlib.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])',
r'\1----\2', exceptions)
# horizontal line with attributes; can't be done with wiki syntax
# so we only make it XHTML compliant
text = textlib.replaceExcept(text, r'(?i)<hr ([^>/]+?)>',
r'<hr \1 />',
exceptions)
# a header where only spaces are in the same line
for level in range(1, 7):
equals = '\\1%s \\2 %s\\3' % ("=" * level, "=" * level)
text = textlib.replaceExcept(
text,
r'(?i)([\r\n]) *<h%d> *([^<]+?) *</h%d> *([\r\n])'
% (level, level),
r'%s' % equals,
exceptions)
# TODO: maybe we can make the bot replace <p> tags with \r\n's.
return text
开发者ID:edgarskos,项目名称:wbots,代码行数:28,代码来源:cosmetic_changes.py
示例7: test_replace_template
def test_replace_template(self):
"""Test replacing not inside templates."""
template_sample = (r'a {{templatename '
r' | accessdate={{Fecha|1993}} '
r' |atitle=The [[real title]] }}')
self.assertEqual(textlib.replaceExcept(template_sample, 'a', 'X',
['template'], site=self.site),
'X' + template_sample[1:])
template_sample = (r'a {{templatename '
r' | 1={{a}}2{{a}} '
r' | 2={{a}}1{{a}} }}')
self.assertEqual(textlib.replaceExcept(template_sample, 'a', 'X',
['template'], site=self.site),
'X' + template_sample[1:])
template_sample = (r'a {{templatename '
r' | 1={{{a}}}2{{{a}}} '
r' | 2={{{a}}}1{{{a}}} }}')
self.assertEqual(textlib.replaceExcept(template_sample, 'a', 'X',
['template'], site=self.site),
'X' + template_sample[1:])
# sf.net bug 1575: unclosed template
template_sample = template_sample[:-2]
self.assertEqual(textlib.replaceExcept(template_sample, 'a', 'X',
['template'], site=self.site),
'X' + template_sample[1:])
开发者ID:metakgp,项目名称:batman,代码行数:28,代码来源:textlib_tests.py
示例8: test_replace_source_reference
def test_replace_source_reference(self):
"""Test replacing in text which contains back references."""
# Don't use a valid reference number in the original string, in case it
# tries to apply that as a reference.
self.assertEqual(textlib.replaceExcept(r"\42", r"^(.*)$", r"X\1X", [], site=self.site), r"X\42X")
self.assertEqual(
textlib.replaceExcept(r"\g<bar>", r"^(?P<foo>.*)$", r"X\g<foo>X", [], site=self.site), r"X\g<bar>X"
)
开发者ID:hasteur,项目名称:pywikibot_scripts,代码行数:8,代码来源:textlib_tests.py
示例9: removeUselessSpaces
def removeUselessSpaces(self, text):
multipleSpacesR = re.compile(' +')
spaceAtLineEndR = re.compile(' $')
exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace', 'table',
'template']
text = textlib.replaceExcept(text, multipleSpacesR, ' ', exceptions)
text = textlib.replaceExcept(text, spaceAtLineEndR, '', exceptions)
return text
开发者ID:skamithi,项目名称:pywikibot-core,代码行数:8,代码来源:cosmetic_changes.py
示例10: test_replace_exception
def test_replace_exception(self):
self.assertEqual(textlib.replaceExcept('123x123', '123', '000', [],
site=self.site),
'000x000')
self.assertEqual(textlib.replaceExcept('123x123', '123', '000',
[re.compile(r'\w123')],
site=self.site),
'000x123')
开发者ID:xZise,项目名称:pywikibot-core,代码行数:8,代码来源:textlib_tests.py
示例11: test_replace_with_marker
def test_replace_with_marker(self):
self.assertEqual(textlib.replaceExcept('AxyxB', 'x', 'y', [],
marker='.',
site=self.site),
'Ayyy.B')
self.assertEqual(textlib.replaceExcept('AxyxB', '1', 'y', [],
marker='.',
site=self.site),
'AxyxB.')
开发者ID:xZise,项目名称:pywikibot-core,代码行数:9,代码来源:textlib_tests.py
示例12: test_replace_exception
def test_replace_exception(self):
"""Test replacing not inside a specific regex."""
self.assertEqual(textlib.replaceExcept('123x123', '123', '000', [],
site=self.site),
'000x000')
self.assertEqual(textlib.replaceExcept('123x123', '123', '000',
[re.compile(r'\w123')],
site=self.site),
'000x123')
开发者ID:metakgp,项目名称:batman,代码行数:9,代码来源:textlib_tests.py
示例13: test_overlapping_replace
def test_overlapping_replace(self):
self.assertEqual(textlib.replaceExcept('1111', '11', '21', [],
allowoverlap=False,
site=self.site),
'2121')
self.assertEqual(textlib.replaceExcept('1111', '11', '21', [],
allowoverlap=True,
site=self.site),
'2221')
开发者ID:xZise,项目名称:pywikibot-core,代码行数:9,代码来源:textlib_tests.py
示例14: test_simple_replace
def test_simple_replace(self):
self.assertEqual(textlib.replaceExcept('AxB', 'x', 'y', [],
site=self.site),
'AyB')
self.assertEqual(textlib.replaceExcept('AxxB', 'x', 'y', [],
site=self.site),
'AyyB')
self.assertEqual(textlib.replaceExcept('AxyxB', 'x', 'y', [],
site=self.site),
'AyyyB')
开发者ID:xZise,项目名称:pywikibot-core,代码行数:10,代码来源:textlib_tests.py
示例15: removeUselessSpaces
def removeUselessSpaces(self, text):
"""Cleanup multiple or trailing spaces."""
multipleSpacesR = re.compile(' +')
spaceAtLineEndR = re.compile(' $')
exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace', 'table']
if self.site.sitename != 'wikipedia:cs':
exceptions.append('template')
text = textlib.replaceExcept(text, multipleSpacesR, ' ', exceptions)
text = textlib.replaceExcept(text, spaceAtLineEndR, '', exceptions)
return text
开发者ID:metakgp,项目名称:batman,代码行数:10,代码来源:cosmetic_changes.py
示例16: test_replace_tags_interwiki
def test_replace_tags_interwiki(self):
if "es" not in self.site.family.langs or "ey" in self.site.family.langs:
raise unittest.SkipTest("family %s doesnt have languages" % self.site)
self.assertEqual(
textlib.replaceExcept("[[es:s]]", "s", "t", ["interwiki"], site=self.site), "[[es:s]]"
) # "es" is a valid interwiki code
self.assertEqual(
textlib.replaceExcept("[[ex:x]]", "x", "y", ["interwiki"], site=self.site), "[[ey:y]]"
) # "ex" is not a valid interwiki code
开发者ID:hasteur,项目名称:pywikibot_scripts,代码行数:10,代码来源:textlib_tests.py
示例17: test_overlapping_replace
def test_overlapping_replace(self):
"""Test replacing with and without overlap."""
self.assertEqual(textlib.replaceExcept('1111', '11', '21', [],
allowoverlap=False,
site=self.site),
'2121')
self.assertEqual(textlib.replaceExcept('1111', '11', '21', [],
allowoverlap=True,
site=self.site),
'2221')
开发者ID:metakgp,项目名称:batman,代码行数:10,代码来源:textlib_tests.py
示例18: test_simple_replace
def test_simple_replace(self):
"""Test replacing without regex."""
self.assertEqual(textlib.replaceExcept('AxB', 'x', 'y', [],
site=self.site),
'AyB')
self.assertEqual(textlib.replaceExcept('AxxB', 'x', 'y', [],
site=self.site),
'AyyB')
self.assertEqual(textlib.replaceExcept('AxyxB', 'x', 'y', [],
site=self.site),
'AyyyB')
开发者ID:metakgp,项目名称:batman,代码行数:11,代码来源:textlib_tests.py
示例19: fixReferences
def fixReferences(self, text):
"""Fix references tags."""
# See also https://en.wikipedia.org/wiki/User:AnomieBOT/source/tasks/OrphanReferenceFixer.pm
exceptions = ["nowiki", "comment", "math", "pre", "source", "startspace"]
# it should be name = " or name=" NOT name ="
text = re.sub(r'(?i)<ref +name(= *| *=)"', r'<ref name="', text)
# remove empty <ref/>-tag
text = textlib.replaceExcept(text, r"(?i)(<ref\s*/>|<ref *>\s*</ref>)", r"", exceptions)
text = textlib.replaceExcept(text, r"(?i)<ref\s+([^>]+?)\s*>\s*</ref>", r"<ref \1/>", exceptions)
return text
开发者ID:h4ck3rm1k3,项目名称:pywikibot-core,代码行数:11,代码来源:cosmetic_changes.py
示例20: test_replace_tags_interwiki
def test_replace_tags_interwiki(self):
if 'es' not in self.site.family.langs or 'ey' in self.site.family.langs:
raise unittest.SkipTest('family %s doesnt have languages'
% self.site)
self.assertEqual(textlib.replaceExcept('[[es:s]]', 's', 't',
['interwiki'], site=self.site),
'[[es:s]]') # "es" is a valid interwiki code
self.assertEqual(textlib.replaceExcept('[[ex:x]]', 'x', 'y',
['interwiki'], site=self.site),
'[[ey:y]]') # "ex" is not a valid interwiki code
开发者ID:xZise,项目名称:pywikibot-core,代码行数:11,代码来源:textlib_tests.py
注:本文中的pywikibot.textlib.replaceExcept函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论