本文整理汇总了Python中whoosh.highlight.highlight函数的典型用法代码示例。如果您正苦于以下问题:Python highlight函数的具体用法?Python highlight怎么用?Python highlight使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了highlight函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_null_fragment
def test_null_fragment():
terms = frozenset(("bravo", "india"))
sa = analysis.StandardAnalyzer()
nf = highlight.WholeFragmenter()
uc = highlight.UppercaseFormatter()
htext = highlight.highlight(_doc, terms, sa, nf, uc)
assert htext == "alfa BRAVO charlie delta echo foxtrot golf hotel INDIA juliet kilo lima"
开发者ID:JunjieHu,项目名称:dl,代码行数:7,代码来源:test_highlighting.py
示例2: search
def search(self):
c.terms = request.GET.get('terms', '')
c.results = []
if len(c.terms) < 4:
h.flash(
_('Search queries must be at least 4 characters in length.'),
'error'
)
redirect(url(controller='blog', action='index'))
query = MultifieldParser(
['title', 'content', 'summary'],
schema=index.schema
).parse(c.terms)
results = index.searcher().search(query, limit=10)
for result in results:
terms = [v for k, v in query.all_terms() if k == 'content']
url_kwargs = json.loads(result['url'])
result['url'] = url(**url_kwargs)
result['highlights'] = highlight(
result['content'],
terms,
search.schema['content'].format.analyzer,
ContextFragmenter(terms),
HtmlFormatter(tagname='span', classname='highlight')
)
c.results.append(result)
return render('search.tpl', slacks=True)
开发者ID:chrisrsantiago,项目名称:muse,代码行数:28,代码来源:blog.py
示例3: test_null_fragment
def test_null_fragment(self):
terms = frozenset(("bravo", "india"))
sa = analysis.StandardAnalyzer()
nf = highlight.NullFragmenter
uc = highlight.UppercaseFormatter()
htext = highlight.highlight(self._doc, terms, sa, nf, uc)
self.assertEqual(htext, "alfa BRAVO charlie delta echo foxtrot golf hotel INDIA juliet kilo lima")
开发者ID:SpaceAppsXploration,项目名称:whoosh,代码行数:7,代码来源:test_highlighting.py
示例4: test_context_at_start
def test_context_at_start():
terms = frozenset(["alfa"])
sa = analysis.StandardAnalyzer()
cf = highlight.ContextFragmenter(surround=15)
uc = highlight.UppercaseFormatter()
htext = highlight.highlight(_doc, terms, sa, cf, uc)
assert htext == "ALFA bravo charlie delta echo foxtrot"
开发者ID:JunjieHu,项目名称:dl,代码行数:7,代码来源:test_highlighting.py
示例5: run
def run(self):
termset = []
keywords = self.keywords.replace('+',' ').replace('|',' ')
keywords = ' '.join(filter(lambda x:x.find('v:') != 0, keywords.split()))
for t in keywords.split():
termset.append(t)
items = []
wx.CallAfter(self.window.DisplayStarted)
key = '%d:%d'%self.p
if key not in dataModel:
for i,r in enumerate(self.results[self.p[0]:self.p[1]]):
nMin = min([len(t) for t in termset])
nMax = max([len(t) for t in termset])
excerpts = highlight(r['content'],
termset,NgramTokenizer(nMin,nMax),
SimpleFragmenter(size=70),
MyHtmlFormatter(tagname='font',attrs='size="4" color="purple"'))
if self.lang == 'pali' and 'wxMac' not in wx.PlatformInfo:
excerpts = excerpts.replace(u'ฐ',u'\uf700').replace(u'ญ',u'\uf70f').replace(u'\u0e4d',u'\uf711')
if self.lang != 'thaibt':
items.append((self.p[0]+i+1,r['volume'].lstrip(u'0'),r['page'].lstrip(u'0'),r['items'],excerpts))
else:
items.append((self.p[0]+i+1, unicode(r['volume']), unicode(r['page']), u'0', excerpts))
wx.CallAfter(self.window.UpdateProgress, (i+1)*10)
dataModel[key] = items
wx.CallAfter(self.window.DisplayFinished)
开发者ID:mutita,项目名称:E-Tipitaka-for-PC,代码行数:31,代码来源:mythread.py
示例6: test_simple_fragment
def test_simple_fragment(self):
terms = frozenset(("bravo", "india"))
sa = analysis.StandardAnalyzer()
sf = highlight.SimpleFragmenter(size=20)
uc = highlight.UppercaseFormatter()
htext = highlight.highlight(self._doc, terms, sa, sf, uc)
self.assertEqual(htext, "alfa BRAVO charlie...hotel INDIA juliet kilo")
开发者ID:SpaceAppsXploration,项目名称:whoosh,代码行数:7,代码来源:test_highlighting.py
示例7: run
def run(self):
termset = []
keywords = self.keywords.replace('+',' ')
#if self.lang == 'thai' or self.lang == 'thaimm' or self.lang == 'thaiwn' or self.lang == 'thaimc':
# for t in self.segmenter(keywords):
# termset.append(t.text)
#elif self.lang == 'pali':
for t in keywords.split():
termset.append(t)
tmp = []
wx.CallAfter(self.window.DisplayStarted)
key = '%d:%d'%self.p
if key not in dataModel:
for i,r in enumerate(self.results[self.p[0]:self.p[1]]):
nMin = min([len(t) for t in termset])
nMax = max([len(t) for t in termset])
excerpts = highlight(r['content'],
termset,NgramTokenizer(nMin,nMax),
SimpleFragmenter(size=70),
MyHtmlFormatter(tagname='font',attrs='size="4" color="purple"'))
if self.lang == 'pali' and 'wxMac' not in wx.PlatformInfo:
excerpts = excerpts.replace(u'ฐ',u'\uf700').replace(u'ญ',u'\uf70f').replace(u'\u0e4d',u'\uf711')
tmp.append((self.p[0]+i+1,r['volumn'].lstrip(u'0'),r['page'].lstrip(u'0'),r['items'],excerpts))
wx.CallAfter(self.window.UpdateProgress, (i+1)*10)
dataModel[key] = tmp
wx.CallAfter(self.window.DisplayFinished)
开发者ID:kit119,项目名称:E-Tipitaka-for-PC,代码行数:29,代码来源:mythread.py
示例8: test_context_fragment
def test_context_fragment(self):
terms = frozenset(("bravo", "india"))
sa = analysis.StandardAnalyzer()
cf = highlight.ContextFragmenter(terms, surround=6)
uc = highlight.UppercaseFormatter()
htext = highlight.highlight(self._doc, terms, sa, cf, uc)
self.assertEqual(htext, "alfa BRAVO charlie...hotel INDIA juliet")
开发者ID:SpaceAppsXploration,项目名称:whoosh,代码行数:7,代码来源:test_highlighting.py
示例9: test_html_format
def test_html_format(self):
terms = frozenset(("bravo", "india"))
sa = analysis.StandardAnalyzer()
cf = highlight.ContextFragmenter(terms, surround=6)
hf = highlight.HtmlFormatter()
htext = highlight.highlight(self._doc, terms, sa, cf, hf)
self.assertEqual(htext, 'alfa <strong class="match term0">bravo</strong> charlie...hotel <strong class="match term1">india</strong> juliet')
开发者ID:SpaceAppsXploration,项目名称:whoosh,代码行数:7,代码来源:test_highlighting.py
示例10: resultExcerpt
def resultExcerpt(self, results, i, ki=None):
# FIXME: this should not be implementation specific
if not ki:
r=results[i]
name=r['kitab']
v=r['vrr'].split('-')[0]
m=self.th.getMeta().getLatestKitabV(name,v)
ki=self.th.getCachedKitab(m['uri'])
num=int(results[i]['nodeIdNum'])
node=ki.getNodeByIdNum(num)
n=ki.toc.next(node)
if n: ub=n.globalOrder
else: ub=-1
txt=node.toText(ub)
s=set()
#results.query.all_terms(s) # return (field,term) pairs
results.q.existing_terms(self.indexer.reader(), s, phrases=True) # return (field,term) pairs # self.self.__ix_searcher.reader()
terms=dict(
map(lambda i: (i[1],i[0]),
filter(lambda j: j[0]=='content' or j[0]=='title', s))).keys()
#print "txt=[%s]" % len(txt)
snippet=txt[:min(len(txt),512)] # dummy summary
snippet=highlight(txt, terms, analyzer,
SentenceFragmenter(sentencechars = ".!?؟\n"), HtmlFormatter(between=u"\u2026\n"),
top=3, scorer=BasicFragmentScorer, minscore=1, order=FIRST)
#snippet=highlight(txt, terms, analyzer,
# SentenceFragmenter(sentencechars = ".!?"), ExcerptFormatter(between = u"\u2026\n"), top=3,
# scorer=BasicFragmentScorer, minscore=1,
# order=FIRST)
return snippet
开发者ID:amzouri,项目名称:Thawab,代码行数:30,代码来源:whooshSearchEngine.py
示例11: test_maxclasses
def test_maxclasses(self):
terms = frozenset(("alfa", "bravo", "charlie", "delta", "echo"))
sa = analysis.StandardAnalyzer()
cf = highlight.ContextFragmenter(terms, surround=6)
hf = highlight.HtmlFormatter(tagname="b", termclass="t", maxclasses=2)
htext = highlight.highlight(self._doc, terms, sa, cf, hf)
self.assertEqual(htext, '<b class="match t0">alfa</b> <b class="match t1">bravo</b> <b class="match t0">charlie</b>...<b class="match t1">delta</b> <b class="match t0">echo</b> foxtrot')
开发者ID:SpaceAppsXploration,项目名称:whoosh,代码行数:7,代码来源:test_highlighting.py
示例12: _process_results
def _process_results(self, raw_results, highlight=False, query_string=''):
from haystack import site
results = []
hits = len(raw_results)
facets = {}
spelling_suggestion = None
indexed_models = site.get_indexed_models()
for doc_offset, raw_result in enumerate(raw_results):
raw_result = dict(raw_result)
app_label, model_name = raw_result['django_ct'].split('.')
additional_fields = {}
for key, value in raw_result.items():
additional_fields[str(key)] = self._to_python(value)
del(additional_fields['django_ct'])
del(additional_fields['django_id'])
if highlight:
from whoosh import analysis
from whoosh.highlight import highlight, ContextFragmenter, UppercaseFormatter
sa = analysis.StemmingAnalyzer()
terms = [term.replace('*', '') for term in query_string.split()]
# DRL_FIXME: Highlighting doesn't seem to work properly in testing.
additional_fields['highlighted'] = {
self.content_field_name: [highlight(additional_fields.get(self.content_field_name), terms, sa, ContextFragmenter(terms), UppercaseFormatter())],
}
# Requires Whoosh 0.1.20+.
if hasattr(raw_results, 'score'):
score = raw_results.score(doc_offset)
else:
score = None
if score is None:
score = 0
model = get_model(app_label, model_name)
if model:
if model in indexed_models:
result = SearchResult(app_label, model_name, raw_result['django_id'], score, **additional_fields)
results.append(result)
else:
hits -= 1
else:
hits -= 1
if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True:
spelling_suggestion = self.create_spelling_suggestion(query_string)
return {
'results': results,
'hits': hits,
'facets': facets,
'spelling_suggestion': spelling_suggestion,
}
开发者ID:mthornhill,项目名称:django-haystack,代码行数:59,代码来源:whoosh_backend.py
示例13: test_html_escape
def test_html_escape():
terms = frozenset(["bravo"])
sa = analysis.StandardAnalyzer()
wf = highlight.WholeFragmenter()
hf = highlight.HtmlFormatter()
htext = highlight.highlight(u('alfa <bravo "charlie"> delta'), terms, sa,
wf, hf)
assert htext == 'alfa <<strong class="match term0">bravo</strong> "charlie"> delta'
开发者ID:JunjieHu,项目名称:dl,代码行数:8,代码来源:test_highlighting.py
示例14: test_sentence_fragment
def test_sentence_fragment(self):
text = u"This is the first sentence. This one doesn't have the word. This sentence is the second. Third sentence here."
terms = ("sentence", )
sa = analysis.StandardAnalyzer(stoplist=None)
sf = highlight.SentenceFragmenter()
uc = highlight.UppercaseFormatter()
htext = highlight.highlight(text, terms, sa, sf, uc)
self.assertEqual(htext, "This is the first SENTENCE...This SENTENCE is the second...Third SENTENCE here")
开发者ID:SpaceAppsXploration,项目名称:whoosh,代码行数:8,代码来源:test_highlighting.py
示例15: get_highlighted_summary
def get_highlighted_summary(self, summary, query, analyzer, frag, format):
summary = unicode(summary.replace("\n", " "))
if len(summary) > 350:
summary = unicode(summary.replace("\n", " "))[0:350] + "..."
hl = highlight(summary, query, analyzer, frag, format)
if hl:
return hl
else:
return summary
开发者ID:jiyinhe,项目名称:fedweb_ui,代码行数:9,代码来源:models.py
示例16: highlight
def highlight(self, content, top=5):
if self.search_type != 'content':
return ''
hl = highlight(escape(content),
self.highlight_items,
analyzer=ANALYZER,
fragmenter=FRAGMENTER,
formatter=FORMATTER,
top=top)
return hl
开发者ID:lmamsen,项目名称:rhodecode,代码行数:10,代码来源:__init__.py
示例17: highlight
def highlight(self, content, top=5):
if self.search_type not in ['content', 'message']:
return ''
hl = highlight(
text=content,
terms=self.highlight_items,
analyzer=ANALYZER,
fragmenter=FRAGMENTER,
formatter=FORMATTER,
top=top
)
return hl
开发者ID:yujiro,项目名称:rhodecode,代码行数:12,代码来源:__init__.py
示例18: test_workflow_manual
def test_workflow_manual():
schema = fields.Schema(id=fields.ID(stored=True),
title=fields.TEXT(stored=True))
ix = RamStorage().create_index(schema)
w = ix.writer()
w.add_document(id=u("1"), title=u("The man who wasn't there"))
w.add_document(id=u("2"), title=u("The dog who barked at midnight"))
w.add_document(id=u("3"), title=u("The invisible man"))
w.add_document(id=u("4"), title=u("The girl with the dragon tattoo"))
w.add_document(id=u("5"), title=u("The woman who disappeared"))
w.commit()
with ix.searcher() as s:
# Parse the user query
parser = qparser.QueryParser("title", schema=ix.schema)
q = parser.parse(u("man"))
# Extract the terms the user used in the field we're interested in
terms = [text for fieldname, text in q.all_terms()
if fieldname == "title"]
# Perform the search
r = s.search(q)
assert len(r) == 2
# Use the same analyzer as the field uses. To be sure, you can
# do schema[fieldname].analyzer. Be careful not to do this
# on non-text field types such as DATETIME.
analyzer = schema["title"].analyzer
# Since we want to highlight the full title, not extract fragments,
# we'll use WholeFragmenter.
nf = highlight.WholeFragmenter()
# In this example we'll simply uppercase the matched terms
fmt = highlight.UppercaseFormatter()
outputs = []
for d in r:
text = d["title"]
outputs.append(highlight.highlight(text, terms, analyzer, nf, fmt))
assert outputs == ["The invisible MAN", "The MAN who wasn't there"]
开发者ID:JunjieHu,项目名称:dl,代码行数:44,代码来源:test_highlighting.py
示例19: searchBodyAndHighlight
def searchBodyAndHighlight(q):
parser = SimpleParser("body", schema=ix.schema)
q = parser.parse(q)
terms = [text for fieldname, text in q.all_terms()
if fieldname == "body"]
r = s.search(q)
analyzer = schema["body"].format.analyzer
print "will tokenize with",q.all_terms
fragmenter = highlight.ContextFragmenter(q.all_terms,400,80)
# formatter = highlight.HtmlFormatter()
formatter = colorIpythonFormatter
for d in r:
# The text argument to highlight is the stored text of the title
text = d["body"]
res= highlight.highlight(text, terms, analyzer,fragmenter, formatter)
# print res.encode("latin-1","replace")
print unicodedata.normalize('NFKC', res).encode("utf-8","replace")
print "-"*8
开发者ID:massyah,项目名称:ResearchBrowser,代码行数:20,代码来源:whooshIndexing.py
示例20: _process_results
def _process_results(self, raw_results, highlight=False, query_string=''):
results = []
facets = {}
for raw_result in raw_results:
raw_result = dict(raw_result)
app_label, module_name = raw_result['django_ct_s'].split('.')
additional_fields = {}
for key, value in raw_result.items():
additional_fields[str(key)] = self._to_python(value)
del(additional_fields['django_ct_s'])
del(additional_fields['django_id_s'])
# DRL_FIXME: Figure out if there's a way to get the score out of Whoosh.
# del(additional_fields['score'])
if highlight:
from whoosh import analysis
from whoosh.highlight import highlight, ContextFragmenter, UppercaseFormatter
sa = analysis.StemmingAnalyzer()
terms = [term.replace('*', '') for term in query_string.split()]
# DRL_FIXME: Highlighting doesn't seem to work properly in testing.
additional_fields['highlighted'] = {
self.content_field_name: [highlight(additional_fields.get(self.content_field_name), terms, sa, ContextFragmenter(terms), UppercaseFormatter())],
}
result = SearchResult(app_label, module_name, raw_result['django_id_s'], raw_result.get('score', 0), **additional_fields)
results.append(result)
return {
'results': results,
'hits': len(results),
'facets': facets,
}
开发者ID:JoeGermuska,项目名称:django-haystack,代码行数:36,代码来源:whoosh_backend.py
注:本文中的whoosh.highlight.highlight函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论