本文整理汇总了Python中whoosh.util.now函数的典型用法代码示例。如果您正苦于以下问题:Python now函数的具体用法?Python now怎么用?Python now使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了now函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: index
def index(self, lib):
print "Indexing with %s..." % lib
options = self.options
chunk = int(options.chunk)
skip = int(options.skip)
upto = int(options.upto)
count = 0
skipc = skip
starttime = chunkstarttime = now()
lib.indexer()
for d in self.spec.documents():
skipc -= 1
if not skipc:
lib.index_document(d)
count += 1
skipc = skip
if chunk and not count % chunk:
t = now()
sofar = t - starttime
print "Done %d docs, %0.3f secs for %d, %0.3f total, %0.3f docs/s" % (count, t - chunkstarttime, chunk, sofar, count/sofar)
chunkstarttime = t
if count > upto:
break
spooltime = now()
print "Spool time:", spooltime - starttime
lib.finish()
committime = now()
print "Commit time:", committime - spooltime
print "Total time to index", count, "documents:", committime - starttime
开发者ID:gbdu,项目名称:wikidpad,代码行数:32,代码来源:bench.py
示例2: search
def search(self, lib):
lib.searcher()
t = now()
q = lib.query()
print "Query:", q
r = lib.find(q)
print "Search time:", now() - t
t = now()
self.spec.print_results(lib.results(r))
print "Print time:", now() - t
开发者ID:MapofLife,项目名称:MOL,代码行数:12,代码来源:bench.py
示例3: search_file
def search_file(self, lib):
f = open(self.options.termfile, "rb")
terms = [line.strip() for line in f]
f.close()
print "Searching %d terms with %s" % (len(terms), lib)
lib.searcher()
starttime = now()
for r in lib.findterms(terms):
pass
searchtime = now() - starttime
print "Search time:", searchtime, "searches/s:", float(len(terms)) / searchtime
开发者ID:MapofLife,项目名称:MOL,代码行数:12,代码来源:bench.py
示例4: finish
def finish(self, doccount, lengthfile, termtable, postingwriter):
_fieldlength_totals = self._fieldlength_totals
if not self.tasks:
return
pqueue = self.postingqueue
rqueue = self.resultsqueue
for _ in xrange(self.procs):
pqueue.put((-1, doccount))
#print "Joining..."
t = now()
for task in self.tasks:
task.join()
#print "Join:", now() - t
#print "Getting results..."
t = now()
runs = []
lenfilenames = []
for task in self.tasks:
taskruns, flentotals, flenmaxes, lenfilename = rqueue.get()
runs.extend(taskruns)
lenfilenames.append(lenfilename)
for fieldnum, total in flentotals.iteritems():
_fieldlength_totals[fieldnum] += total
for fieldnum, length in flenmaxes.iteritems():
if length > self._fieldlength_maxes.get(fieldnum, 0):
self._fieldlength_maxes[fieldnum] = length
#print "Results:", now() - t
#print "Writing lengths..."
t = now()
lw = LengthWriter(lengthfile, doccount)
for lenfilename in lenfilenames:
sublengths = LengthReader(StructFile(open(lenfilename, "rb")), doccount)
lw.add_all(sublengths)
os.remove(lenfilename)
lw.close()
lengths = lw.reader()
#print "Lengths:", now() - t
t = now()
iterator = imerge([read_run(runname, count) for runname, count in runs])
total = sum(count for runname, count in runs)
write_postings(self.schema, termtable, lengths, postingwriter, iterator)
for runname, count in runs:
os.remove(runname)
#print "Merge:", now() - t
self.cleanup()
开发者ID:KeNJiKunG,项目名称:E-Tipitaka-for-PC,代码行数:52,代码来源:multiproc.py
示例5: cache_messages
def cache_messages(self, archive, cache):
print("Caching messages in %s..." % cache)
if not os.path.exists(archive):
raise Exception("Archive file %r does not exist" % archive)
t = now()
f = open(cache, "wb")
c = 0
for d in self.get_messages(archive):
c += 1
dump(d, f)
if not c % 1000: print(c)
f.close()
print("Cached messages in ", now() - t, "seconds")
开发者ID:JunjieHu,项目名称:dl,代码行数:15,代码来源:enron.py
示例6: prepare
def prepare(self, top_searcher, q, context):
"""This method is called before a search.
Subclasses can override this to perform set-up work, but
they should still call the superclass's method because it sets several
necessary attributes on the collector object:
self.top_searcher
The top-level searcher.
self.q
The query object
self.context
``context.needs_current`` controls whether a wrapping collector
requires that this collector's matcher be in a valid state at every
call to ``collect()``. If this is ``False``, the collector is free
to use faster methods that don't necessarily keep the matcher
updated, such as ``matcher.all_ids()``.
:param top_searcher: the top-level :class:`whoosh.searching.Searcher`
object.
:param q: the :class:`whoosh.query.Query` object being searched for.
:param context: a :class:`whoosh.searching.SearchContext` object
containing information about the search.
"""
self.top_searcher = top_searcher
self.q = q
self.context = context
self.starttime = now()
self.runtime = None
self.docset = set()
开发者ID:32footsteps,项目名称:SpecialCollectionsProject,代码行数:32,代码来源:collectors.py
示例7: index
def index(self, lib):
print("Indexing with %s..." % lib)
options = self.options
every = None if options.every is None else int(options.every)
merge = options.merge
chunk = int(options.chunk)
skip = int(options.skip)
upto = int(options.upto)
count = 0
skipc = skip
starttime = chunkstarttime = now()
lib.indexer()
for d in self.spec.documents():
skipc -= 1
if not skipc:
lib.index_document(d)
count += 1
skipc = skip
if chunk and not count % chunk:
t = now()
sofar = t - starttime
print(
"Done %d docs, %0.3f secs for %d, %0.3f total, %0.3f docs/s"
% (count, t - chunkstarttime, chunk, sofar,
count / sofar))
chunkstarttime = t
if count > upto:
break
if every and not count % every:
print("----Commit")
lib.finish(merge=merge)
lib.indexer(create=False)
spooltime = now()
print("Spool time:", spooltime - starttime)
lib.finish(merge=merge)
committime = now()
print("Commit time:", committime - spooltime)
totaltime = committime - starttime
print("Total time to index %d documents: %0.3f secs (%0.3f minutes)" %
(count, totaltime, totaltime / 60.0))
print("Indexed %0.3f docs/s" % (count / totaltime))
开发者ID:waywire,项目名称:popcorn_maker,代码行数:46,代码来源:bench.py
示例8: _complex_sort_query
def _complex_sort_query(self, q, limit=None, reverse=False, filter=None):
t = now()
if self.arrays is None:
self._complex_cache()
comb = self.searcher._filter_to_comb(filter)
docnums = [docnum for docnum in self.searcher.docs_for_query(q)
if (not comb) or docnum in comb]
docnums.sort(key=self._complex_key_fn, reverse=reverse)
docset = set(docnums)
# I artificially enforce the limit here, even thought the current
# implementation can't use it, so that the results don't change based
# on single- vs- multi-segment.
if limit:
docnums = docnums[:limit]
runtime = now() - t
return self._results(q, docnums, docset, runtime)
开发者ID:ljarufe,项目名称:mp100,代码行数:17,代码来源:sorting.py
示例9: test_20000_single
def test_20000_single():
sc = fields.Schema(id=fields.ID(stored=True), text=fields.TEXT)
with TempIndex(sc, "20000single") as ix:
domain = ["alfa", "bravo", "charlie", "delta", "echo", "foxtrot",
"golf", "hotel", "india", "juliet", "kilo", "lima"]
t = now()
for i in xrange(20000):
w = ix.writer()
w.add_document(id=text_type(i),
text=u(" ").join(random.sample(domain, 5)))
w.commit()
print("Write single:", now() - t)
t = now()
ix.optimize()
print("Optimize single:", now() - t)
开发者ID:gsadikin,项目名称:whoosh,代码行数:17,代码来源:test_bigindex.py
示例10: sort_query
def sort_query(self, query, sortedby, reverse=False):
if isinstance(sortedby, basestring):
sorter = self._field_sorter(sortedby)
elif isinstance(sortedby, (list, tuple)):
sorter = scoring.MultiFieldSorter([self._field_sorter(fname)
for fname in sortedby])
elif isinstance(sortedby, Sorter):
sorter = sortedby
else:
raise ValueError("sortedby argument (%R) must be a string, list,"
" or Sorter" % sortedby)
t = now()
sorted_docs = list(sorter.order(self, query.docs(self), reverse=reverse))
runtime = now() - t
return Results(self, query, sorted_docs, None, runtime)
开发者ID:KeNJiKunG,项目名称:E-Tipitaka-for-PC,代码行数:17,代码来源:searching.py
示例11: search
def search(qstring, ixdir, basedir, limit=None, optimize=True, scores=True):
ix = index.open_dir(ixdir)
qp = qparser.QueryParser("title", ix.schema)
q = qp.parse(qstring)
with ix.searcher(weighting=scoring.PL2()) as s:
if scores:
r = s.search(q, limit=limit, optimize=optimize)
for hit in r:
print_record(hit.rank, basedir, hit["file"], hit["pos"])
print("Found %d records in %0.06f seconds" % (len(r), r.runtime))
else:
t = now()
for i, docnum in enumerate(s.docs_for_query(q)):
if not limit or i < limit:
fields = s.stored_fields(docnum)
print_record(i, basedir, fields["file"], fields["pos"])
print("Found %d records in %0.06f seconds" % (i, now() - t))
开发者ID:JunjieHu,项目名称:dl,代码行数:18,代码来源:marc21.py
示例12: make_index
def make_index(basedir, ixdir, procs=4, limitmb=128, multisegment=True,
glob="*.mrc"):
if not os.path.exists(ixdir):
os.mkdir(ixdir)
# Multi-lingual stop words
stoplist = (analysis.STOP_WORDS
| set("de la der und le die et en al no von di du da "
"del zur ein".split()))
# Schema
ana = analysis.StemmingAnalyzer(stoplist=stoplist)
schema = fields.Schema(title=fields.TEXT(analyzer=ana),
author=fields.TEXT(phrase=False),
subject=fields.TEXT(analyzer=ana, phrase=False),
file=fields.STORED, pos=fields.STORED,
)
# MARC fields to extract
mfields = set(subjectfields) # Subjects
mfields.update("100 110 111".split()) # Author
mfields.add("245") # Title
print("Indexing with %d processor(s) and %d MB per processor"
% (procs, limitmb))
c = 0
t = now()
ix = index.create_in(ixdir, schema)
with ix.writer(procs=procs, limitmb=limitmb,
multisegment=multisegment) as w:
filenames = [filename for filename in os.listdir(basedir)
if fnmatch.fnmatch(filename, glob)]
for filename in filenames:
path = os.path.join(basedir, filename)
print("Indexing", path)
f = open(path, 'rb')
for x, pos in read_file(f, mfields):
w.add_document(title=uni(title(x)), author=uni(author(x)),
subject=uni(subjects(x)),
file=filename, pos=pos)
c += 1
f.close()
print("Committing...")
print("Indexed %d records in %0.02f minutes" % (c, (now() - t) / 60.0))
开发者ID:JunjieHu,项目名称:dl,代码行数:43,代码来源:marc21.py
示例13: test_20000_buffered
def test_20000_buffered():
from whoosh.writing import BufferedWriter
sc = fields.Schema(id=fields.ID(stored=True), text=fields.TEXT)
with TempIndex(sc, "20000buffered") as ix:
domain = ["alfa", "bravo", "charlie", "delta", "echo", "foxtrot",
"golf", "hotel", "india", "juliet", "kilo", "lima"]
t = now()
w = BufferedWriter(ix, limit=100, period=None)
for i in xrange(20000):
w.add_document(id=text_type(i),
text=u(" ").join(random.sample(domain, 5)))
w.close()
print("Write buffered:", now() - t)
t = now()
ix.optimize()
print("Optimize buffered:", now() - t)
开发者ID:gsadikin,项目名称:whoosh,代码行数:19,代码来源:test_bigindex.py
示例14: index_document
def index_document(self, d):
try:
self.archive.indexDictionary(str(self.count), d)
except ValueError:
print "d=", d
raise
self.count += 1
if not self.count % int(self.options.batch):
t = now()
self.archive.store(lazy=True)
self.indexer(create=False)
开发者ID:MapofLife,项目名称:MOL,代码行数:11,代码来源:bench.py
示例15: finish
def finish(self):
"""This method is called after a search.
Subclasses can override this to perform set-up work, but
they should still call the superclass's method because it sets several
necessary attributes on the collector object:
self.runtime
The time (in seconds) the search took.
"""
self.runtime = now() - self.starttime
开发者ID:32footsteps,项目名称:SpecialCollectionsProject,代码行数:12,代码来源:collectors.py
示例16: dump_run
def dump_run(self):
if self.size > 0:
#print "Dumping run..."
t = now()
filename = self.unique_name(".run")
runfile = open(filename, "w+b")
self.postings.sort()
for p in self.postings:
dump(p, runfile)
runfile.close()
self.runs.append((filename, self.count))
self.postings = []
self.size = 0
self.count = 0
开发者ID:KeNJiKunG,项目名称:E-Tipitaka-for-PC,代码行数:15,代码来源:pools.py
示例17: timing
def timing(name=None):
t = now()
yield
t = now() - t
print("%s: %0.06f s" % (name or '', t))
开发者ID:32footsteps,项目名称:SpecialCollectionsProject,代码行数:5,代码来源:testing.py
示例18: results
def results(self, qstring, cat_order, category=None, shortcuts=None,
limit=None, cat_limit=5):
from whoosh.util import now
t = now()
s = self.searcher
limit = limit or self.limit
showall = False
if shortcuts:
qstring, showall = self.expand_shortcuts(qstring, shortcuts)
if category:
filter = query.Term("category", category)
else:
filter = None
all_q = self.make_query(qstring, "content")
show_best = (not category and
all(isinstance(lq, query.Term) and lq.field() == "content"
for lq in all_q.leaves()))
if show_best:
best_q = self.make_query(qstring, "bestbet")
best_r = s.search(best_q, limit=10)
else:
best_r = None
grams_groups = None
grams_q = self.make_query(qstring, "grams")
if any(fn == "grams" for fn, _ in grams_q.iter_all_terms()):
try:
grams_r = s.search(grams_q, limit=limit, groupedby="category",
filter=filter)
except query.QueryError:
pass
else:
grams_groups = grams_r.groups()
all_r = s.search(all_q, limit=limit, groupedby="category",
filter=filter)
all_groups = all_r.groups()
# OK, this is complicated... we want to present the categories in the
# order defined in cat_order, BUT we want categories that have grams
# matches to come before categories that only have content matches
final_order = []
if grams_groups:
# Add categories in grams_groups in the order defined by cat_order
for cat in cat_order:
if cat in grams_groups:
final_order.append(cat)
# Add any categories in grams_groups that aren't in cat_order
final_order.extend(cat for cat in sorted(grams_groups)
if cat not in cat_order)
seen = set(final_order)
# Add categories in all_groups in the order defined by cat_order, IF
# they weren't already added in the previous step
for cat in cat_order:
if cat in all_groups and cat not in seen:
final_order.append(cat)
# Add any categories in all_groups that weren't added in the previous
# steps
final_order.extend(cat for cat in sorted(all_groups)
if cat not in cat_order and cat not in seen)
# If there's only one category, there's no point in cutting it off,
# just show all hits
showall = showall or len(final_order) == 1
# For each category, pull out the docnums and get their stored fields
length = 0
categories = []
for cat in final_order:
# Combine the docnums for this category from grams and all
docnums = []
seen = set()
if grams_groups:
for docnum in grams_groups.get(cat, ()):
docnums.append(docnum)
seen.add(docnum)
for docnum in all_groups.get(cat, ()):
if docnum not in seen:
docnums.append(docnum)
seen.add(docnum)
# If the number of hits is exactly the limit + 1, then there's no
# point showing a "show more" line instead of that one extra hit,
# so just increase the limit in that case
if len(docnums) == cat_limit + 1:
cutoff = len(docnums)
else:
cutoff = cat_limit
if not showall and len(docnums) > cutoff:
docnums = docnums[:cutoff]
length += len(seen)
#.........这里部分代码省略.........
开发者ID:mchaput,项目名称:bookish,代码行数:101,代码来源:search.py
示例19: _simple_sort_query
def _simple_sort_query(self, q, limit=None, reverse=False, filter=None):
# If the direction of all sort fields is the same, we can use field
# caches to do the sorting
t = now()
docset = set()
sortedby = [c[0] for c in self.criteria]
reverse = self.criteria[0][1] ^ reverse
comb = self.searcher._filter_to_comb(filter)
if self.searcher.subsearchers:
heap = []
# I wish I could actually do a heap thing here, but the Python heap
# queue only works with greater-than, and I haven't thought of a
# smart way to get around that yet, so I'm being dumb and using
# nlargest/nsmallest on the heap + each subreader list :(
op = nlargest if reverse else nsmallest
for s, offset in self.searcher.subsearchers:
# This searcher is wrapping a MultiReader, so push the sorting
# down to the leaf readers and then combine the results.
docnums = [docnum for docnum in q.docs(s)
if (not comb) or docnum + offset in comb]
# Add the docnums to the docset
docset.update(docnums)
# Ask the reader to return a list of (key, docnum) pairs to
# sort by. If limit=None, the returned list is not sorted. If
# limit=True, it is sorted.
r = s.reader()
srt = r.key_docs_by(sortedby, docnums, limit, reverse=reverse,
offset=offset)
if limit:
# Pick the "limit" smallest/largest items from the current
# and new list
heap = op(limit, heap + srt)
else:
# If limit=None, we'll just add everything to the "heap"
# and sort it at the end.
heap.extend(srt)
# Sort the heap and take the docnums
docnums = [docnum for _, docnum in sorted(heap, reverse=reverse)]
else:
# This searcher is wrapping an atomic reader, so we don't need to
# get tricky combining the results of multiple readers, just ask
# the reader to sort the results.
r = self.searcher.reader()
docnums = [docnum for docnum in q.docs(self.searcher)
if (not comb) or docnum in comb]
docnums = r.sort_docs_by(sortedby, docnums, reverse=reverse)
docset = set(docnums)
# I artificially enforce the limit here, even thought the current
# implementation can't use it, so that the results don't change
# based on single- vs- multi-segment.
docnums = docnums[:limit]
runtime = now() - t
return self._results(q, docnums, docset, runtime)
开发者ID:ljarufe,项目名称:mp100,代码行数:63,代码来源:sorting.py
示例20: test_bigsort
def test_bigsort():
times = 30000
dirname = "testindex"
df = fields.DATETIME(stored=True)
schema = fields.Schema(id=fields.ID(stored=True), date=df)
if os.path.exists(dirname):
shutil.rmtree(dirname)
os.mkdir(dirname)
ix = index.create_in(dirname, schema)
print("Writing...")
t = now()
w = ix.writer(limitmb=512)
for i in xrange(times):
dt = datetime.fromtimestamp(random.randint(15839593, 1294102139))
w.add_document(id=text_type(i), date=dt)
w.commit()
print("Writing took ", now() - t)
ix = index.open_dir(dirname)
s = ix.searcher()
q = query.Wildcard("id", "1?2*")
t = now()
x = list(df.sortable_terms(s.reader(), "date"))
print(now() - t, len(x))
t = now()
for y in x:
p = list(s.postings("date", y).all_ids())
print(now() - t)
t = now()
r = s.search(q, limit=25, sortedby="date", reverse=True)
print("Search 1 took", now() - t)
print("len=", r.scored_length())
t = now()
r = s.search(q, limit=25, sortedby="date")
print("Search 2 took", now() - t)
t = now()
r = s.search(q, limit=25, sortedby="date")
print("Search 2 took", now() - t)
from heapq import nlargest
t = now()
sf = s.stored_fields
gen = ((sf(n)["date"], n) for n in q.docs(s))
r = nlargest(25, gen)
print(now() - t)
开发者ID:gsadikin,项目名称:whoosh,代码行数:55,代码来源:test_bigsort.py
注:本文中的whoosh.util.now函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论