本文整理汇总了Python中six.moves.html_parser.HTMLParser类的典型用法代码示例。如果您正苦于以下问题:Python HTMLParser类的具体用法?Python HTMLParser怎么用?Python HTMLParser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了HTMLParser类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: feed
def feed(self, data):
data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'<!\1', data)
data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data)
data = data.replace(''', "'")
data = data.replace('"', '"')
HTMLParser.feed(self, data)
HTMLParser.close(self)
开发者ID:ImmPortDB,项目名称:immport-galaxy,代码行数:7,代码来源:page.py
示例2: __init__
def __init__(self):
HTMLParser.__init__(self)
self.links = []
self.in_anchor = False
self.attrs = None
self.title = ''
开发者ID:AlexPerrot,项目名称:mkdocs,代码行数:7,代码来源:toc.py
示例3: __init__
def __init__(self, *args, **kwargs):
if sys.version_info > (3,4): #pragma: no cover
HTMLParser.__init__(self, convert_charrefs=False)
else: #pragma: no cover
HTMLParser.__init__(self)
super(HTMLRewriter, self).__init__(*args, **kwargs)
开发者ID:daleathan,项目名称:pywb,代码行数:7,代码来源:html_rewriter.py
示例4: feed
def feed(self, string):
try:
HTMLParser.feed(self, string)
except Exception as e: # pragma: no cover
import traceback
traceback.print_exc()
self.out.write(string)
开发者ID:daleathan,项目名称:pywb,代码行数:7,代码来源:html_rewriter.py
示例5: display_link_prompt
def display_link_prompt(args, urls, titles):
"""Print URLs and their descriptions alongside a prompt.
Keyword arguments:
args -- program arguments (dict)
urls -- search URLs found (list)
titles -- descriptions of search URLs found (list)
"""
while 1:
print('\n{0}'.format(BORDER))
for i in range(len(urls)):
link = HTMLParser().unescape(titles[i])
print('{0}. {1}'.format(i+1, link.encode('utf-8') if PY2 else link))
print(BORDER)
# Handle link prompt input
try:
link_input = [inp.strip() for inp in input(': ').split()]
if not link_input:
continue
utils.check_input(link_input) # Check input in case of quit
print('\n')
exec_prompt_cmd(args, urls, link_input[0], link_input[1:])
except (KeyboardInterrupt, EOFError, ValueError, IndexError):
return False
开发者ID:huntrar,项目名称:cliquery,代码行数:25,代码来源:cliquery.py
示例6: original_unescape
def original_unescape(self, s):
"""Since we need to use this sometimes"""
if isinstance(s, basestring):
return unicode(HTMLParser.unescape(self, s))
elif isinstance(s, list):
return [unicode(HTMLParser.unescape(self, item)) for item in s]
else:
return s
开发者ID:Parsely,项目名称:schemato,代码行数:8,代码来源:parselypage.py
示例7: __init__
def __init__(self, style, styles = None):
HTMLParser.__init__(self)
self.s = ''
self.style = style
self.styles = styles if styles else default_styles
self.style_stack = []
开发者ID:schwa,项目名称:punic,代码行数:8,代码来源:styling.py
示例8: __init__
def __init__(self, media_locator, link_handler):
HTMLParser.__init__(self)
self.handlers_start = StartRules(media_locator, link_handler)
self.handlers_startend = StartEndRules(media_locator, link_handler)
self.handlers_end = EndRules()
self.new_buffer()
self.stack = deque()
self.stack.append([])
开发者ID:cecedille1,项目名称:PDF_generator,代码行数:9,代码来源:from_html.py
示例9: __init__
def __init__(self, max_words):
# In Python 2, HTMLParser is not a new-style class,
# hence super() cannot be used.
HTMLParser.__init__(self)
self.max_words = max_words
self.words_found = 0
self.open_tags = []
self.truncate_at = None
开发者ID:joetboole,项目名称:pelican,代码行数:9,代码来源:utils.py
示例10: feed
def feed(self, *args, **kwargs):
try:
# With Python 2, super() cannot be used.
# See the comment for __init__().
HTMLParser.feed(self, *args, **kwargs)
except self.TruncationCompleted as exc:
self.truncate_at = exc.truncate_at
else:
self.truncate_at = None
开发者ID:52M,项目名称:pelican,代码行数:9,代码来源:utils.py
示例11: __init__
def __init__(self):
HTMLParser.__init__(self)
self._ignore = False
self._ignorePath = None
self._lasttag = None
self._depth = 0
self.depthText = {} # path:text
self.counting = 0
self.lastN = 0
开发者ID:Mondego,项目名称:pyreco,代码行数:9,代码来源:allPythonContent.py
示例12: __init__
def __init__(self):
HTMLParser.__init__(self)
self.text_name = None
self.original_value = None
self.new_value = None
self.in_tag = False
self.read_buffer = six.StringIO()
开发者ID:Nitrate,项目名称:Nitrate,代码行数:9,代码来源:0003_migrate_logs_to_new_fields.py
示例13: cmd_genpot
def cmd_genpot(config, options):
"""Generate the gettext pot file"""
os.chdir(config.source_dir)
po_path = os.path.join(config.source_dir, 'po')
if not os.path.isdir(po_path):
os.mkdir(po_path)
python_files = []
for root, dirs_dummy, files in os.walk(config.source_dir):
for file_name in files:
if file_name.endswith('.py'):
file_path = os.path.relpath(os.path.join(root, file_name),
config.source_dir)
python_files.append(file_path)
python_files.sort()
# First write out a stub .pot file containing just the translated
# activity name, then have xgettext merge the rest of the
# translations into that. (We can't just append the activity name
# to the end of the .pot file afterwards, because that might
# create a duplicate msgid.)
pot_file = os.path.join('po', '%s.pot' % config.bundle_name)
escaped_name = _po_escape(config.activity_name)
f = open(pot_file, 'w')
f.write('#: activity/activity.info:2\n')
f.write('msgid "%s"\n' % escaped_name)
f.write('msgstr ""\n')
if config.summary is not None:
escaped_summary = _po_escape(config.summary)
f.write('#: activity/activity.info:3\n')
f.write('msgid "%s"\n' % escaped_summary)
f.write('msgstr ""\n')
if config.description is not None:
parser = HTMLParser()
strings = []
parser.handle_data = strings.append
parser.feed(config.description)
for s in strings:
s = s.strip()
if s:
f.write('#: activity/activity.info:4\n')
f.write('msgid "%s"\n' % _po_escape(s))
f.write('msgstr ""\n')
f.close()
args = ['xgettext', '--join-existing', '--language=Python',
'--keyword=_', '--add-comments=TRANS:', '--output=%s' % pot_file]
args += python_files
retcode = subprocess.call(args)
if retcode:
print('ERROR - xgettext failed with return code %i.' % retcode)
开发者ID:sugarlabs,项目名称:sugar-toolkit-gtk3,代码行数:56,代码来源:bundlebuilder.py
示例14: logged_in
def logged_in(self, y):
if all([None is y or 'logout' in y,
bool(filter(lambda c: 'remember_web_' in c, self.session.cookies.keys()))]):
if None is not y:
self.shows = dict(re.findall('<option value="(\d+)">(.*?)</option>', y))
h = HTMLParser()
for k, v in self.shows.items():
self.shows[k] = sanitizeSceneName(h.unescape(unidecode(v.decode('utf-8'))))
return True
return False
开发者ID:JackDandy,项目名称:SickGear,代码行数:10,代码来源:showrss.py
示例15: add_set
def add_set(self, title, description, index=-1):
widget = QtWidgets.QCheckBox(title.replace('&', '&&'))
if description:
h = HTMLParser()
widget.setToolTip(h.unescape(description))
if index >= 0:
self.sets_widget.layout().insertWidget(index, widget)
else:
self.sets_widget.layout().addWidget(widget)
return widget
开发者ID:jim-easterbrook,项目名称:Photini,代码行数:10,代码来源:flickr.py
示例16: check_bz_bug
def check_bz_bug(b):
''' Return status of a bug in BZ'''
html = get_html(b)
if html:
text = html.content.decode('utf-8')
name = TITLE.search(text).group(1) if TITLE.search(text) else ''
h = HTMLParser()
name = h.unescape(name)
else:
name = ''
return name, None
开发者ID:sshnaidm,项目名称:various,代码行数:11,代码来源:check_tests.py
示例17: __init__
def __init__(self, *args, **kwargs):
if sys.version_info > (3,):
super(AnchorParser, self).__init__(*args, **kwargs)
else: # pragma: no cover
# HTMLParser is still an old style object and so super doesn't
# work
HTMLParser.__init__(self, *args, **kwargs)
self.capture = 0
self.url = ''
self.text = ''
开发者ID:cltrudeau,项目名称:wrench,代码行数:11,代码来源:utils.py
示例18: get_formatted_value
def get_formatted_value(value, field):
'''Prepare field from raw data'''
from six.moves.html_parser import HTMLParser
if(getattr(field, 'fieldtype', None) in ["Text", "Text Editor"]):
h = HTMLParser()
value = h.unescape(value)
value = (re.subn(r'<[\s]*(script|style).*?</\1>(?s)', '', text_type(value))[0])
value = ' '.join(value.split())
return field.label + " : " + strip_html_tags(text_type(value))
开发者ID:vhrspvl,项目名称:vhrs-frappe,代码行数:11,代码来源:global_search.py
示例19: get_email_subject
def get_email_subject(self):
"""
WARNING: It is MANDATORY to override method if you are going to
send email using the `send_notification_email` method.
Your class must define an `email_subject_tmpl` attribute
containing a template path to a file that has your email subject.
"""
# Convert the html back to plaintext after rendering it using template
# to get rid of html ampersand character codes
parser = HTMLParser()
html_email = self._get_email_field('email_subject_tmpl', 'get_email_subject')
return parser.unescape(html_email)
开发者ID:Nomadblue,项目名称:django-nomad-notifier,代码行数:12,代码来源:models.py
示例20: __init__
def __init__(self, tag="a", attr="href", process=None, unique=False):
HTMLParser.__init__(self)
warnings.warn(
"HtmlParserLinkExtractor is deprecated and will be removed in "
"future releases. Please use scrapy.linkextractors.LinkExtractor",
ScrapyDeprecationWarning, stacklevel=2,
)
self.scan_tag = tag if callable(tag) else lambda t: t == tag
self.scan_attr = attr if callable(attr) else lambda a: a == attr
self.process_attr = process if callable(process) else lambda v: v
self.unique = unique
开发者ID:CPoirot3,项目名称:scrapy,代码行数:13,代码来源:htmlparser.py
注:本文中的six.moves.html_parser.HTMLParser类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论