本文整理汇总了Python中searx.utils.html_to_text函数的典型用法代码示例。如果您正苦于以下问题:Python html_to_text函数的具体用法?Python html_to_text怎么用?Python html_to_text使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了html_to_text函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: response
def response(resp):
results = []
doc = fromstring(resp.text)
# parse results
for r in doc.xpath(result_xpath):
try:
res_url = r.xpath(url_xpath)[-1]
except:
continue
if not res_url:
continue
title = html_to_text(''.join(r.xpath(title_xpath)))
content = html_to_text(''.join(r.xpath(content_xpath)))
# append result
results.append({'title': title,
'content': content,
'url': res_url})
# return results
return results
开发者ID:Reventl0v,项目名称:searx,代码行数:25,代码来源:duckduckgo.py
示例2: response
def response(resp):
results = []
search_results = loads(resp.text)
# return empty array if there are no results
if 'data' not in search_results:
return []
data = search_results.get('data', {})
res = data.get('result', {})
# parse results
for result in res.get('items', {}):
title = html_to_text(result['title'])
res_url = result['url']
content = html_to_text(result['desc'])
if category_to_keyword.get(categories[0], '') == 'web':
results.append({'title': title,
'content': content,
'url': res_url})
elif category_to_keyword.get(categories[0], '') == 'images':
thumbnail_src = result['thumbnail']
img_src = result['media']
results.append({'template': 'images.html',
'url': res_url,
'title': title,
'content': '',
'thumbnail_src': thumbnail_src,
'img_src': img_src})
elif category_to_keyword.get(categories[0], '') == 'social':
published_date = datetime.fromtimestamp(result['date'], None)
img_src = result.get('img', None)
results.append({'url': res_url,
'title': title,
'publishedDate': published_date,
'content': content,
'img_src': img_src})
elif category_to_keyword.get(categories[0], '') == 'news':
published_date = datetime.fromtimestamp(result['date'], None)
media = result.get('media', [])
if len(media) > 0:
img_src = media[0].get('pict', {}).get('url', None)
else:
img_src = None
results.append({'url': res_url,
'title': title,
'publishedDate': published_date,
'content': content,
'img_src': img_src})
return results
开发者ID:cy8aer,项目名称:searx,代码行数:58,代码来源:qwant.py
示例3: response
def response(resp):
results = []
response_data = loads(resp.text)
for result in response_data['results']:
url = _get_url(result)
title = result['e']['dn']
content = _get_content(result)
results.append({
'url': url,
'title': html_to_text(title),
'content': html_to_text(content),
})
return results
开发者ID:asciimoo,项目名称:searx,代码行数:15,代码来源:microsoft_academic.py
示例4: test_html_to_text
def test_html_to_text(self):
html = """
<a href="/testlink" class="link_access_account">
<span class="toto">
<span>
<img src="test.jpg" />
</span>
</span>
<span class="titi">
Test text
</span>
</a>
"""
self.assertIsInstance(utils.html_to_text(html), unicode)
self.assertIsNotNone(utils.html_to_text(html))
self.assertEqual(utils.html_to_text(html), "Test text")
开发者ID:Acidburn0zzz,项目名称:searx,代码行数:16,代码来源:test_utils.py
示例5: response
def response(resp):
results = []
search_res = loads(resp.text)
# return empty array if there are no results
if search_res.get('total') < 1:
return []
# parse results
for result in search_res['results']:
if 'id' not in result:
continue
# is it thumbnail or img_src??
thumbnail = None
if 'logo' in result:
thumbnail = result['logo']
if thumbnail[0] == '/':
thumbnail = url + thumbnail
content = None
if 'highlights' in result:
content = result['highlights'][0]['value']
# append result
results.append({'url': url + 'structure/' + result['id'],
'title': result['label'],
# 'thumbnail': thumbnail,
'img_src': thumbnail,
'content': html_to_text(content)})
# return results
return results
开发者ID:JASON0916,项目名称:searx,代码行数:34,代码来源:scanr_structures.py
示例6: response
def response(resp):
results = []
search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1]
for r in search_res:
if not r.get('t'):
continue
results.append({'title': r['t'],
'content': html_to_text(r['a']),
'url': r['u']})
return results
开发者ID:nschlemm,项目名称:searx,代码行数:10,代码来源:duckduckgo.py
示例7: response
def response(resp):
result_xpath = '//div[@class="results_links results_links_deep web-result"]' # noqa
url_xpath = './/a[@class="large"]/@href'
title_xpath = './/a[@class="large"]//text()'
content_xpath = './/div[@class="snippet"]//text()'
results = []
doc = fromstring(resp.text)
for r in doc.xpath(result_xpath):
try:
res_url = r.xpath(url_xpath)[-1]
except:
continue
if not res_url:
continue
title = html_to_text(''.join(r.xpath(title_xpath)))
content = html_to_text(''.join(r.xpath(content_xpath)))
results.append({'title': title,
'content': content,
'url': res_url})
return results
开发者ID:canarduck,项目名称:searx,代码行数:23,代码来源:duckduckgo.py
示例8: extract_text
def extract_text(xpath_results):
if type(xpath_results) == list:
# it's list of result : concat everything using recursive call
if not len(xpath_results):
raise Exception('Empty url resultset')
result = ''
for e in xpath_results:
result = result + extract_text(e)
return result
elif type(xpath_results) == _ElementStringResult:
# it's a string
return ''.join(xpath_results)
else:
# it's a element
return html_to_text(xpath_results.text_content())
开发者ID:Aigeruth,项目名称:searx,代码行数:15,代码来源:xpath.py
示例9: response
def response(resp):
results = []
raw_search_results = loads(resp.text)
# return empty array if there are no results
if not raw_search_results:
return []
search_results = raw_search_results.get('channels', [])
if len(search_results) == 0:
return []
for result in search_results[0].get('items', []):
# parse image results
if result.get('image'):
result_url = ''
if 'url' in result:
result_url = result['url']
elif 'link' in result:
result_url = result['link']
else:
continue
# append result
results.append({'url': result_url,
'title': result['title'],
'content': '',
'img_src': result['image'],
'template': 'images.html'})
# parse general results
else:
publishedDate = parser.parse(result['pubDate'])
# append result
results.append({'url': result['link'],
'title': result['title'],
'content': html_to_text(result['description']),
'publishedDate': publishedDate})
# TODO parse video, audio and file results
# return results
return results
开发者ID:MrLpk,项目名称:searx,代码行数:47,代码来源:yacy.py
示例10: index
def index():
global categories
if request.method == 'POST':
request_data = request.form
else:
request_data = request.args
if not request_data.get('q'):
return render('index.html')
selected_categories = []
query, selected_engines = parse_query(request_data['q'].encode('utf-8'))
if not len(selected_engines):
for pd_name, pd in request_data.items():
if pd_name.startswith('category_'):
category = pd_name[9:]
if not category in categories:
continue
selected_categories.append(category)
if not len(selected_categories):
cookie_categories = request.cookies.get('categories', '')
cookie_categories = cookie_categories.split(',')
for ccateg in cookie_categories:
if ccateg in categories:
selected_categories.append(ccateg)
if not len(selected_categories):
selected_categories = ['general']
for categ in selected_categories:
selected_engines.extend({'category': categ,
'name': x.name}
for x in categories[categ])
results, suggestions = search(query, request, selected_engines)
featured_results = []
for result in results:
if request_data.get('format', 'html') == 'html':
if 'content' in result:
result['content'] = highlight_content(result['content'], query)
result['title'] = highlight_content(result['title'], query)
else:
if 'content' in result:
result['content'] = html_to_text(result['content']).strip()
result['title'] = html_to_text(result['title']).strip()
if len(result['url']) > 74:
url_parts = result['url'][:35], result['url'][-35:]
result['pretty_url'] = '{0}[...]{1}'.format(*url_parts)
else:
result['pretty_url'] = result['url']
for engine in result['engines']:
if engine in favicons:
result['favicon'] = engine
if request_data.get('format') == 'json':
return Response(json.dumps({'query': query, 'results': results}),
mimetype='application/json')
elif request_data.get('format') == 'csv':
csv = UnicodeWriter(cStringIO.StringIO())
keys = ('title', 'url', 'content', 'host', 'engine', 'score')
if len(results):
csv.writerow(keys)
for row in results:
row['host'] = row['parsed_url'].netloc
csv.writerow([row.get(key, '') for key in keys])
csv.stream.seek(0)
response = Response(csv.stream.read(), mimetype='application/csv')
content_disp = 'attachment;Filename=searx_-_{0}.csv'.format(query)
response.headers.add('Content-Disposition', content_disp)
return response
elif request_data.get('format') == 'rss':
response_rss = render(
'opensearch_response_rss.xml',
results=results,
q=request_data['q'],
number_of_results=len(results),
base_url=get_base_url()
)
return Response(response_rss, mimetype='text/xml')
return render(
'results.html',
results=results,
q=request_data['q'],
selected_categories=selected_categories,
number_of_results=len(results) + len(featured_results),
featured_results=featured_results,
suggestions=suggestions
)
开发者ID:nschlemm,项目名称:searx,代码行数:92,代码来源:webapp.py
示例11: search
#.........这里部分代码省略.........
# create a callback wrapper for the search engine results
callback = make_callback(
selected_engine['name'],
results_queue,
engine.response,
request_params)
# create dictionary which contain all
# informations about the request
request_args = dict(
headers=request_params['headers'],
hooks=dict(response=callback),
cookies=request_params['cookies'],
timeout=engine.timeout,
verify=request_params['verify']
)
# specific type of request (GET or POST)
if request_params['method'] == 'GET':
req = requests_lib.get
else:
req = requests_lib.post
request_args['data'] = request_params['data']
# ignoring empty urls
if not request_params['url']:
continue
# append request to list
requests.append((req, request_params['url'],
request_args,
selected_engine['name']))
if not requests:
return self
# send all search-request
threaded_requests(requests)
while not results_queue.empty():
engine_name, engine_results = results_queue.get_nowait()
# TODO type checks
[self.suggestions.append(x['suggestion'])
for x in list(engine_results)
if 'suggestion' in x
and engine_results.remove(x) is None]
[self.answers.append(x['answer'])
for x in list(engine_results)
if 'answer' in x
and engine_results.remove(x) is None]
self.infoboxes.extend(x for x in list(engine_results)
if 'infobox' in x
and engine_results.remove(x) is None)
results[engine_name] = engine_results
# update engine-specific stats
for engine_name, engine_results in results.items():
engines[engine_name].stats['search_count'] += 1
engines[engine_name].stats['result_count'] += len(engine_results)
# score results and remove duplications
self.results = score_results(results)
# merge infoboxes according to their ids
self.infoboxes = merge_infoboxes(self.infoboxes)
# update engine stats, using calculated score
for result in self.results:
plugins.callAPI('on_result', self.plugins, locals())
for res_engine in result['engines']:
engines[result['engine']] \
.stats['score_count'] += result['score']
result['pretty_url'] = prettify_url(result['url'])
# TODO, check if timezone is calculated right
if 'publishedDate' in result:
result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
if not self.paging and engines[result['engine']].paging:
self.paging = True
if 'content' in result:
result['content_html'] = highlight_content(result['content'],
self.query.encode('utf-8')) # noqa
result['title_html'] = highlight_content(result['title'],
self.query.encode('utf-8'))
if result.get('content'):
result['content'] = html_to_text(result['content']).strip()
# removing html content and whitespace duplications
result['title'] = ' '.join(html_to_text(result['title']).strip().split())
# return results, suggestions, answers and infoboxes
return self
开发者ID:moritan,项目名称:searx,代码行数:101,代码来源:searchAPI.py
示例12: index
def index():
"""Render index page.
Supported outputs: html, json, csv, rss.
"""
if request.form.get("q") is None:
return render("index.html")
# search
search_query = None
result_container = None
try:
search_query = get_search_query_from_webapp(request.preferences, request.form)
# search = Search(search_query) # without plugins
search = SearchWithPlugins(search_query, request)
result_container = search.search()
except:
request.errors.append(gettext("search error"))
logger.exception("search error")
return render("index.html")
results = result_container.get_ordered_results()
# UI
advanced_search = request.form.get("advanced_search", None)
output_format = request.form.get("format", "html")
if output_format not in ["html", "csv", "json", "rss"]:
output_format = "html"
# output
for result in results:
if output_format == "html":
if "content" in result and result["content"]:
result["content"] = highlight_content(
escape(result["content"][:1024]), search_query.query.encode("utf-8")
)
result["title"] = highlight_content(escape(result["title"] or u""), search_query.query.encode("utf-8"))
else:
if result.get("content"):
result["content"] = html_to_text(result["content"]).strip()
# removing html content and whitespace duplications
result["title"] = " ".join(html_to_text(result["title"]).strip().split())
result["pretty_url"] = prettify_url(result["url"])
# TODO, check if timezone is calculated right
if "publishedDate" in result:
try: # test if publishedDate >= 1900 (datetime module bug)
result["pubdate"] = result["publishedDate"].strftime("%Y-%m-%d %H:%M:%S%z")
except ValueError:
result["publishedDate"] = None
else:
if result["publishedDate"].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
timedifference = datetime.now() - result["publishedDate"].replace(tzinfo=None)
minutes = int((timedifference.seconds / 60) % 60)
hours = int(timedifference.seconds / 60 / 60)
if hours == 0:
result["publishedDate"] = gettext(u"{minutes} minute(s) ago").format(minutes=minutes)
else:
result["publishedDate"] = gettext(u"{hours} hour(s), {minutes} minute(s) ago").format(
hours=hours, minutes=minutes
) # noqa
else:
result["publishedDate"] = format_date(result["publishedDate"])
number_of_results = result_container.results_number()
if number_of_results < result_container.results_length():
number_of_results = 0
if output_format == "json":
return Response(
json.dumps(
{
"query": search_query.query,
"number_of_results": number_of_results,
"results": results,
"answers": list(result_container.answers),
"infoboxes": result_container.infoboxes,
"suggestions": list(result_container.suggestions),
}
),
mimetype="application/json",
)
elif output_format == "csv":
csv = UnicodeWriter(cStringIO.StringIO())
keys = ("title", "url", "content", "host", "engine", "score")
csv.writerow(keys)
for row in results:
row["host"] = row["parsed_url"].netloc
csv.writerow([row.get(key, "") for key in keys])
csv.stream.seek(0)
response = Response(csv.stream.read(), mimetype="application/csv")
cont_disp = "attachment;Filename=searx_-_{0}.csv".format(search_query.query.encode("utf-8"))
response.headers.add("Content-Disposition", cont_disp)
return response
elif output_format == "rss":
response_rss = render(
"opensearch_response_rss.xml",
results=results,
#.........这里部分代码省略.........
开发者ID:kvch,项目名称:searx,代码行数:101,代码来源:webapp.py
示例13: index
def index():
"""Render index page.
Supported outputs: html, json, csv, rss.
"""
if not request.args and not request.form:
return render('index.html')
try:
search = Search(request)
except:
return render('index.html')
# TODO moar refactor - do_search integration into Search class
search.results, search.suggestions = do_search(search.query,
request,
search.engines,
search.pageno,
search.lang)
for result in search.results:
if not search.paging and engines[result['engine']].paging:
search.paging = True
if search.request_data.get('format', 'html') == 'html':
if 'content' in result:
result['content'] = highlight_content(result['content'],
search.query.encode('utf-8')) # noqa
result['title'] = highlight_content(result['title'],
search.query.encode('utf-8'))
else:
if 'content' in result:
result['content'] = html_to_text(result['content']).strip()
# removing html content and whitespace duplications
result['title'] = ' '.join(html_to_text(result['title'])
.strip().split())
if len(result['url']) > 74:
url_parts = result['url'][:35], result['url'][-35:]
result['pretty_url'] = u'{0}[...]{1}'.format(*url_parts)
else:
result['pretty_url'] = result['url']
for engine in result['engines']:
if engine in favicons:
result['favicon'] = engine
if search.request_data.get('format') == 'json':
return Response(json.dumps({'query': search.query,
'results': search.results}),
mimetype='application/json')
elif search.request_data.get('format') == 'csv':
csv = UnicodeWriter(cStringIO.StringIO())
keys = ('title', 'url', 'content', 'host', 'engine', 'score')
if search.results:
csv.writerow(keys)
for row in search.results:
row['host'] = row['parsed_url'].netloc
csv.writerow([row.get(key, '') for key in keys])
csv.stream.seek(0)
response = Response(csv.stream.read(), mimetype='application/csv')
cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search.query)
response.headers.add('Content-Disposition', cont_disp)
return response
elif search.request_data.get('format') == 'rss':
response_rss = render(
'opensearch_response_rss.xml',
results=search.results,
q=search.request_data['q'],
number_of_results=len(search.results),
base_url=get_base_url()
)
return Response(response_rss, mimetype='text/xml')
return render(
'results.html',
results=search.results,
q=search.request_data['q'],
selected_categories=search.categories,
paging=search.paging,
pageno=search.pageno,
base_url=get_base_url(),
suggestions=search.suggestions
)
开发者ID:germc,项目名称:searx,代码行数:83,代码来源:webapp.py
示例14: index
def index():
"""Render index page.
Supported outputs: html, json, csv, rss.
"""
if not request.args and not request.form:
return render(
'index.html',
)
try:
search = Search(request)
except:
return render(
'index.html',
)
search.results, search.suggestions,\
search.answers, search.infoboxes = search.search(request)
for result in search.results:
if not search.paging and engines[result['engine']].paging:
search.paging = True
# check if HTTPS rewrite is required
if settings['server']['https_rewrite']\
and result['parsed_url'].scheme == 'http':
skip_https_rewrite = False
# check if HTTPS rewrite is possible
for target, rules, exclusions in https_rules:
# check if target regex match with url
if target.match(result['url']):
# process exclusions
for exclusion in exclusions:
# check if exclusion match with url
if exclusion.match(result['url']):
skip_https_rewrite = True
break
# skip https rewrite if required
if skip_https_rewrite:
break
# process rules
for rule in rules:
try:
# TODO, precompile rule
p = re.compile(rule[0])
# rewrite url if possible
new_result_url = p.sub(rule[1], result['url'])
except:
break
# parse new url
new_parsed_url = urlparse(new_result_url)
# continiue if nothing was rewritten
if result['url'] == new_result_url:
continue
# get domainname from result
# TODO, does only work correct with TLD's like
# asdf.com, not for asdf.com.de
# TODO, using publicsuffix instead of this rewrite rule
old_result_domainname = '.'.join(
result['parsed_url'].hostname.split('.')[-2:])
new_result_domainname = '.'.join(
new_parsed_url.hostname.split('.')[-2:])
# check if rewritten hostname is the same,
# to protect against wrong or malicious rewrite rules
if old_result_domainname == new_result_domainname:
# set new url
result['url'] = new_result_url
# target has matched, do not search over the other rules
break
if search.request_data.get('format', 'html') == 'html':
if 'content' in result:
result['content'] = highlight_content(result['content'],
search.query.encode('utf-8')) # noqa
result['title'] = highlight_content(result['title'],
search.query.encode('utf-8'))
else:
if 'content' in result:
result['content'] = html_to_text(result['content']).strip()
# removing html content and whitespace duplications
result['title'] = ' '.join(html_to_text(result['title'])
.strip().split())
if len(result['url']) > 74:
url_parts = result['url'][:35], result['url'][-35:]
result['pretty_url'] = u'{0}[...]{1}'.format(*url_parts)
#.........这里部分代码省略.........
开发者ID:suksim,项目名称:searx,代码行数:101,代码来源:webapp.py
示例15: index
def index():
"""Render index page.
Supported outputs: html, json, csv, rss.
"""
if not request.args and not request.form:
return render(
'index.html',
)
try:
search = Search(request)
except:
return render(
'index.html',
)
if plugins.call('pre_search', request, locals()):
search.search(request)
plugins.call('post_search', request, locals())
for result in search.result_container.get_ordered_results():
plugins.call('on_result', request, locals())
if not search.paging and engines[result['engine']].paging:
search.paging = True
if search.request_data.get('format', 'html') == 'html':
if 'content' in result:
result['content'] = highlight_content(result['content'],
search.query.encode('utf-8')) # noqa
result['title'] = highlight_content(result['title'],
search.query.encode('utf-8'))
else:
if result.get('content'):
result['content'] = html_to_text(result['content']).strip()
# removing html content and whitespace duplications
result['title'] = ' '.join(html_to_text(result['title']).strip().split())
result['pretty_url'] = prettify_url(result['url'])
# TODO, check if timezone is calculated right
if 'publishedDate' in result:
result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
minutes = int((timedifference.seconds / 60) % 60)
hours = int(timedifference.seconds / 60 / 60)
if hours == 0:
result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
else:
result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa
else:
result['publishedDate'] = format_date(result['publishedDate'])
if search.request_data.get('format') == 'json':
return Response(json.dumps({'query': search.query,
'results': search.result_container.get_ordered_results()}),
mimetype='application/json')
elif search.request_data.get('format') == 'csv':
csv = UnicodeWriter(cStringIO.StringIO())
keys = ('title', 'url', 'content', 'host', 'engine', 'score')
csv.writerow(keys)
for row in search.result_container.get_ordered_results():
row['host'] = row['parsed_url'].netloc
csv.writerow([row.get(key, '') for key in keys])
csv.stream.seek(0)
response = Response(csv.stream.read(), mimetype='application/csv')
cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search.query)
response.headers.add('Content-Disposition', cont_disp)
return response
elif search.request_data.get('format') == 'rss':
response_rss = render(
'opensearch_response_rss.xml',
results=search.result_container.get_ordered_results(),
q=search.request_data['q'],
number_of_results=search.result_container.results_length(),
base_url=get_base_url()
)
return Response(response_rss, mimetype='text/xml')
return render(
'results.html',
results=search.result_container.get_ordered_results(),
q=search.request_data['q'],
selected_categories=search.categories,
paging=search.paging,
pageno=search.pageno,
base_url=get_base_url(),
suggestions=search.result_container.suggestions,
answers=search.result_container.answers,
infoboxes=search.result_container.infoboxes,
theme=get_current_theme_name(),
favicons=global_favicons[themes.index(get_current_theme_name())]
)
开发者ID:GreenLunar,项目名称:searx,代码行数:97,代码来源:webapp.py
示例16: index
def index():
"""Render index page.
Supported outputs: html, json, csv, rss.
"""
if not request.args and not request.form:
return render("index.html")
try:
search = Search(request)
except:
return render("index.html")
if plugins.call("pre_search", request, locals()):
search.search(request)
plugins.call("post_search", request, locals())
for result in search.result_container.get_ordered_results():
plugins.call("on_result", request, locals())
if not search.paging and engines[result["engine"]].paging:
search.paging = True
if search.request_data.get("format", "html") == "html":
if "content" in result:
result["content"] = highlight_content(result["content"], search.query.encode("utf-8")) # noqa
result["title"] = highlight_content(result["title"], search.query.encode("utf-8"))
else:
if result.get("content"):
result["content"] = html_to_text(result["content"]).strip()
# removing html content and whitespace duplications
result["title"] = " ".join(html_to_text(result["title"]).strip().split())
result["pretty_url"] = prettify_url(result["url"])
# TODO, check if timezone is calculated right
if "publishedDate" in result:
try: # test if publishedDate >= 1900 (datetime module bug)
result["pubdate"] = result["publishedDate"].strftime("%Y-%m-%d %H:%M:%S%z")
except ValueError:
result["publishedDate"] = None
else:
if result["publishedDate"].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
timedifference = datetime.now() - result["publishedDate"].replace(tzinfo=None)
minutes = int((timedifference.seconds / 60) % 60)
hours = int(timedifference.seconds / 60 / 60)
if hours == 0:
result["publishedDate"] = gettext(u"{minutes} minute(s) ago").format(minutes=minutes)
else:
result["publishedDate"] = gettext(u"{hours} hour(s), {minutes} minute(s) ago").format(
hours=hours, minutes=minutes
) # noqa
else:
result["publishedDate"] = format_date(result["publishedDate"])
if search.request_data.get("format") == "json":
return Response(
json.dumps({"query": search.query, "results": search.result_container.get_ordered_results()}),
mimetype="application/json",
)
elif search.request_data.get("format") == "csv":
csv = UnicodeWriter(cStringIO.StringIO())
keys = ("title", "url", "content", "host", "engine", "score")
csv.writerow(keys)
for row in search.result_container.get_ordered_results():
row["host"] = row["parsed_url"].netloc
csv.writerow([row.get(key, "") for key in keys])
csv.stream.seek(0)
response = Response(csv.stream.read(), mimetype="application/csv")
cont_disp = "attachment;Filename=searx_-_{0}.csv".format(search.query.encode("utf-8"))
response.headers.add("Content-Disposition", cont_disp)
return response
elif search.request_data.get("format") == "rss":
response_rss = render(
"opensearch_response_rss.xml",
results=search.result_container.get_ordered_results(),
q=search.request_data["q"],
number_of_results=search.result_container.results_length(),
base_url=get_base_url(),
)
return Response(response_rss, mimetype="text/xml")
return render(
"results.html",
results=search.result_container.get_ordered_results(),
q=search.request_data["q"],
selected_categories=search.categories,
paging=search.paging,
pageno=search.pageno,
base_url=get_base_url(),
suggestions=search.result_container.suggestions,
answers=search.result_container.answers,
infoboxes=search.result_container.infoboxes,
theme=get_current_theme_name(),
favicons=global_favicons[themes.index(get_current_theme_name())],
)
开发者ID:jibe-b,项目名称:searx,代码行数:98,代码来源:webapp.py
示例17: index
def index():
"""Render index page.
Supported outputs: html, json, csv, rss.
"""
paging = False
lang = 'all'
if request.cookies.get('language')\
and request.cookies['language'] in (x[0] for x in language_codes):
lang = request.cookies['language']
if request.method == 'POST':
request_data = request.form
else:
request_data = request.args
if not request_data.get('q'):
return render('index.html')
pageno_param = request_data.get('pageno', '1')
if not pageno_param.isdigit() or int(pageno_param) < 1:
return render('index.html')
pageno = int(pageno_param)
selected_categories = []
query, selected_engines = parse_query(request_data['q'].encode('utf-8'))
if len(selected_engines):
selected_categories = list(set(engine['category']
for engine in selected_engines))
else:
for pd_name, pd in request_data.items():
if pd_name.startswith('category_'):
category = pd_name[9:]
if not category in categories:
continue
selected_categories.append(category)
if not len(selected_categories):
cookie_categories = request.cookies.get('categories', '')
cookie_categories = cookie_categories.split(',')
for ccateg in cookie_categories:
if ccateg in categories:
selected_categories.append(ccateg)
if not len(selected_categories):
selected_categories = ['general']
for categ in selected_categories:
selected_engines.extend({'category': categ,
'name': x.name}
for x in categories[categ])
results, suggestions = search(query,
request,
selected_engines,
pageno,
lang)
for result in results:
if not paging and engines[result['engine']].paging:
paging = True
if request_data.get('format', 'html') == 'html':
if 'content' in result:
result['content'] = highlight_content(result['content'], query)
result['title'] = highlight_content(result['title'], query)
else:
if 'content' in result:
result['content'] = html_to_text(result['content']).strip()
# removing html content and whitespace duplications
result['title'] = ' '.join(html_to_text(result['title'])
.strip().split())
if len(result['url']) > 74:
url_parts = result['url'][:35], result['url'][-35:]
result['pretty_url'] = '{0}[...]{1}'.format(*url_parts)
else:
result['pretty_url'] = result['url']
for engine in result['engines']:
if engine in favicons:
result['favicon'] = engine
if request_data.get('format') == 'json':
return Response(json.dumps({'query': query, 'results': results}),
mimetype='application/json')
elif request_data.get('format') == 'csv':
csv = UnicodeWriter(cStringIO.StringIO())
keys = ('title', 'url', 'content', 'host', 'engine', 'score')
if len(results):
csv.writerow(keys)
for row in results:
row['host'] = row['parsed_url'].netloc
csv.writerow([row.get(key, '') for key in keys])
csv.stream.seek(0)
response = Response(csv.stream.read(), mimetype='application/csv')
content_disp = 'attachment;Filename=searx_-_{0}.csv'.format(query)
response.headers.add('Content-Disposition', content_disp)
return response
elif request_data.get('format') == 'rss':
response_rss = render(
#.........这里部分代码省
|
请发表评论