本文整理汇总了Python中whoosh.index.open_dir函数的典型用法代码示例。如果您正苦于以下问题:Python open_dir函数的具体用法?Python open_dir怎么用?Python open_dir使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了open_dir函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: searchModule
def searchModule(qq):
global results_bm25, results_tfidf, results_tf, results ,idx, time_bm25, time_tfidf, time_tf
results_bm25 = results_tfidf = results_tf = results = []
time_bm25 = time_tfidf = time_tf = None
if flag == 1:
if doStem!="false" and doStop!="false":
idx = open_dir("Indexed/Index_stsw")
elif doStem!="false" and doStop=="false":
idx = open_dir("Indexed/Index_st")
elif doStem=="false" and doStop!="false":
idx = open_dir("Indexed/Index_sw")
else:
idx = open_dir("Indexed/Index")
qp = qparser.QueryParser("content", schema=idx.schema)
q = qp.parse(unicode(qq)) #by default, the parser treats the words as if they were connected by AND
s = idx.searcher()
results = s.search(q)
results.fragmenter.surround = 50
start = time.time()
results_bm25 = idx.searcher().search(q)
time_bm25 = (time.time()-start)
results_bm25.fragmenter.surround = 50
start = time.time()
results_tfidf = idx.searcher(weighting=scoring.TF_IDF()).search(q)
time_tfidf = (time.time()-start)
results_tfidf.fragmenter.surround = 50
start = time.time()
results_tf = idx.searcher(weighting=scoring.Frequency()).search(q)
time_tf = (time.time()-start)
results_tf.fragmenter.surround = 50
开发者ID:abhishekgoyal1994,项目名称:InfoZeal,代码行数:32,代码来源:ndgc.py
示例2: __load__
def __load__(region=None):
"""加载/建立索引
:param region: 索引范围,None表示加载所有索引;news\blog表示加载对应索引
:return: 是否加载成功
"""
# 加载索引
if region:
if region in Indexer.__index__:
return True
else:
if region not in index_dir:
return False
if not os.path.exists(index_dir[region]):
os.makedirs(index_dir[region])
Indexer.__index__[region] = index.create_in(index_dir[region], schema, indexname=region)
else:
Indexer.__index__[region] = index.open_dir(index_dir[region], indexname=region)
return True
else: # 加载全部索引
for reg in index_dir.keys():
if reg in Indexer.__index__:
return True
else:
if not os.path.exists(index_dir[reg]):
os.mkdir(index_dir[reg])
Indexer.__index__[reg] = index.create_in(index_dir[reg], schema, indexname=reg)
else:
Indexer.__index__[reg] = index.open_dir(index_dir[reg], indexname=reg)
return True
开发者ID:DMGbupt,项目名称:wechat-crawler,代码行数:29,代码来源:indexer.py
示例3: init
def init():
# Setting my schema ...
schema_email = Schema(
path=TEXT(stored=True),
sender_email=TEXT(stored=True),
recipient_emails=TEXT,
date=DATETIME,
subject=TEXT(stored=True),
body=TEXT,
)
schema_book = Schema(email=TEXT(stored=True), name=TEXT(stored=True))
schemas = {"index_emails": schema_email, "index_book": schema_book}
if not os.path.exists(index_path):
os.mkdir(index_path)
indexes = {}
for ixname, schema in schemas.items():
"""
Esta parte es mejorable, ya que sólo indexa si no existe indice.
No tiene en cuenta si los archivos indexados se han modificado o si
se han eliminado como se explica aquí:
@url http://pythonhosted.org/Whoosh/indexing.html#incremental-indexing
"""
exists = index.exists_in(index_path, indexname=ixname)
if not exists:
ix = index.create_in(index_path, schema, indexname=ixname)
# Indexing ...
ix = index.open_dir(index_path, indexname=ixname)
writer = ix.writer()
if ixname == "index_emails":
files = read_dir()
index_emails(files, writer)
elif ixname == "index_book":
index_book(writer)
else:
ix = index.open_dir(index_path, indexname=ixname)
indexes[ixname] = ix
# Main routine
while True:
ix = indexes.get("index_emails")
with ix.searcher() as searcher:
input_user = str(raw_input("Introduzca una palabra del asunto o cuerpo (p.e. contrato): "))
mparser = MultifieldParser(["subject", "body"], schema=ix.schema)
myquery = mparser.parse(unicode(input_user))
results = searcher.search(myquery)
print "=================================================="
for result in results:
# read_file(result.get("path"))
print ("Remitente: " + findNameBySender(indexes, result.get("sender_email")))
print ("Asunto: " + result.get("subject"))
print "=================================================="
开发者ID:garridev,项目名称:practicas-aii,代码行数:56,代码来源:practica.py
示例4: getIndex
def getIndex(self):
module_dir = self.indexdir +"/"+ self.sword_module_name
if os.path.exists(module_dir):
ix = index.open_dir(module_dir)
else:
indexer = Indexer(self.sword_module_name, self.Reader)
indexer.buildIndex()
ix = index.open_dir(module_dir)
return ix
开发者ID:nano13,项目名称:nvcli,代码行数:11,代码来源:search.py
示例5: init
def init():
# Setting my schema ...
schema_email = Schema(path=TEXT(stored=True), sender_email=TEXT(stored=True), recipient_emails=TEXT(stored=True), date=DATETIME, subject=TEXT(stored=True), body=TEXT)
schema_book = Schema(email=TEXT(stored=True), name=TEXT(stored=True))
schemas = {"index_emails": schema_email, "index_book": schema_book }
if not os.path.exists(index_path):
os.mkdir(index_path)
indexes = {}
for ixname, schema in schemas.items():
'''
Esta parte es mejorable, ya que sólo indexa si no existe indice.
No tiene en cuenta si los archivos indexados se han modificado o si
se han eliminado como se explica aquí:
@url http://pythonhosted.org/Whoosh/indexing.html#incremental-indexing
'''
exists = index.exists_in(index_path, indexname=ixname)
if(not exists):
ix = index.create_in(index_path, schema, indexname=ixname)
# Indexing ...
ix = index.open_dir(index_path, indexname=ixname)
writer = ix.writer()
if(ixname == "index_emails"):
files = read_dir()
index_emails(files, writer)
elif(ixname == "index_book"):
index_book(writer)
else:
ix = index.open_dir(index_path, indexname=ixname)
indexes[ixname] = ix
# Main routine
while(True):
options = ['a', 'b', 'c']
input_user = str(raw_input("Elija el apartado que desea ejecutar (A, B o C): "))
input_user_lower = string.lower(input_user)
if input_user_lower in options:
def apartadoA():
input_user = raw_input("Introduzca una palabra del asunto o cuerpo (p.e. contrato): ")
findMailByWordInSubjectOrBody(indexes, input_user)
def apartadoB():
input_user = raw_input("Buscar emails posteriores a la fecha (con formato YYYYMMDD): ")
findMailbyDate(indexes, input_user)
def apartadoC():
input_user = raw_input("Introduzca palabras spam (p.e. 'Contrato Gracias compraventa'): ")
findMailBySpamWords(indexes, input_user)
options = { 'a': apartadoA, 'b': apartadoB, 'c': apartadoC }
options[input_user_lower]()
else:
print "Opción no válida."
开发者ID:garridev,项目名称:practicas-aii,代码行数:52,代码来源:merge_test.py
示例6: update
def update(self, tmp=False):
"""
Make sure index reflects current backend state, add missing stuff, remove outdated stuff.
This is intended to be used:
* after a full rebuild that was done at tmp location
* after wiki is made read-only or taken offline
* after the index was moved to the normal index location
Reason: new revisions that were created after the rebuild started might be missing in new index.
:returns: index changed (bool)
"""
index_dir = self.index_dir_tmp if tmp else self.index_dir
index_all = open_dir(index_dir, indexname=ALL_REVS)
try:
# NOTE: self.backend iterator gives (mountpoint, revid) tuples, which is NOT
# the same as (name, revid), thus we do the set operations just on the revids.
# first update ALL_REVS index:
revids_mountpoints = dict((revid, mountpoint) for mountpoint, revid in self.backend)
backend_revids = set(revids_mountpoints)
with index_all.searcher() as searcher:
ix_revids_names = dict((doc[REVID], doc[NAME]) for doc in searcher.all_stored_fields())
revids_mountpoints.update(ix_revids_names) # this is needed for stuff that was deleted from storage
ix_revids = set(ix_revids_names)
add_revids = backend_revids - ix_revids
del_revids = ix_revids - backend_revids
changed = add_revids or del_revids
add_revids = [(revids_mountpoints[revid], revid) for revid in add_revids]
del_revids = [(revids_mountpoints[revid], revid) for revid in del_revids]
self._modify_index(index_all, self.schemas[ALL_REVS], self.wikiname, add_revids, 'add')
self._modify_index(index_all, self.schemas[ALL_REVS], self.wikiname, del_revids, 'delete')
backend_latest_names_revids = set(self._find_latest_names_revids(index_all))
finally:
index_all.close()
index_latest = open_dir(index_dir, indexname=LATEST_REVS)
try:
# now update LATEST_REVS index:
with index_latest.searcher() as searcher:
ix_revids = set(doc[REVID] for doc in searcher.all_stored_fields())
backend_latest_revids = set(revid for name, revid in backend_latest_names_revids)
upd_revids = backend_latest_revids - ix_revids
upd_revids = [(revids_mountpoints[revid], revid) for revid in upd_revids]
self._modify_index(index_latest, self.schemas[LATEST_REVS], self.wikiname, upd_revids, 'update')
self._modify_index(index_latest, self.schemas[LATEST_REVS], self.wikiname, del_revids, 'delete')
finally:
index_latest.close()
return changed
开发者ID:pombredanne,项目名称:moin2,代码行数:49,代码来源:indexing.py
示例7: batch_query
def batch_query(querypath, indexpath):
ix = index.open_dir(indexpath)
with open(querypath, 'r') as fh:
with open('output.txt', 'w') as out_file:
rawdata = fh.read()
document = get_section2(rawdata, "<top>", "</top>")
for d in document:
topicnum = get_section(d, "<num> Number:", "<title>")[0].strip(" ").strip("\n")
title = get_section(d, "<title>", "<desc>")[0].strip(" ").strip("\n")
desc = get_section(d, "<desc>", "<narr>")[0].replace("Description:","").strip(" ")
narr = get_section(d, "<narr>", "</top>")[0].replace("Narrative:","").strip(" ")
print topicnum, title, desc, narr
with ix.searcher() as searcher:
parser = qparser.QueryParser("content", schema=ix.schema,
group=qparser.OrGroup)
query = parser.parse(desc+" "+title)
# query = QueryParser("content", ix.schema)
results = searcher.search(query, limit=1000)
print results[0]
print results[1]
# return
for i in range(1000):
out_file.write("%s\tQ0\t%s\t%s\t%s\t jack\n" % (topicnum, results[i].values()[1], results[i].rank+1, results[i].score))
开发者ID:bgshin,项目名称:irqa,代码行数:29,代码来源:bquery.py
示例8: query
def query(indexpath):
ix = index.open_dir(indexpath)
with ix.searcher() as searcher:
query = QueryParser("content", ix.schema).parse("test")
results = searcher.search(query)
print results[0]
开发者ID:bgshin,项目名称:irqa,代码行数:7,代码来源:bquery.py
示例9: run
def run(self):
# open index
self.buffer = deque(maxlen=BUFFERLINES)
if not exists(self.indexdir):
makedirs(self.indexdir)
self.ix = create_in(self.indexdir, SCHEMA)
else:
if exists_in(self.indexdir): self.ix = open_dir(self.indexdir)
else: self.ix = create_in(self.indexdir, SCHEMA)
self.qp = QueryParser("content", self.ix.schema)
self.searcher = self.ix.searcher()
index_p = self.index_p
while True:
try:
# check index_p
try:
type, data = index_p.recv()
except EOFError: break
try:
if type == QUERY: self._processSearch(data)
elif type == LOG: self._processLog(data)
elif type == RENAME: self._processRename(data)
else:
prnt("Unexpected data in logindexsearch.")
except:
print_exc()
prnt("EXCEPTION in logindexsearch process.")
except KeyboardInterrupt:
break
self._dumpBuffer(self.buffer)
self.searcher.close()
self.ix.close()
开发者ID:Clam-,项目名称:pyBurlyBot,代码行数:32,代码来源:pbm_logindexsearch.py
示例10: indexer
def indexer(self, create=True):
schema = self.bench.spec.whoosh_schema()
path = os.path.join(self.options.dir, "%s_whoosh" % self.options.indexname)
if not os.path.exists(path):
os.mkdir(path)
if create:
ix = index.create_in(path, schema)
else:
ix = index.open_dir(path)
poolclass = None
if self.options.pool:
poolclass = find_object(self.options.pool)
kwargs = dict(limitmb=int(self.options.limitmb), poolclass=poolclass,
dir=self.options.tempdir, procs=int(self.options.procs),
batchsize=int(self.options.batch))
if self.options.expw:
from whoosh.filedb.multiproc import MultiSegmentWriter
self.writer = MultiSegmentWriter(ix, **kwargs)
else:
self.writer = ix.writer(**kwargs)
self._procdoc = None
if hasattr(self.bench.spec, "process_document_whoosh"):
self._procdoc = self.bench.spec.process_document_whoosh
开发者ID:MapofLife,项目名称:MOL,代码行数:28,代码来源:bench.py
示例11: __init__
def __init__(self, idx_dir):
self.idx_dir = idx_dir
if not os.path.exists(idx_dir):
os.mkdir(idx_dir)
self.create()
else:
self.ix = open_dir(idx_dir)
开发者ID:Armagedoom,项目名称:leo-editor,代码行数:7,代码来源:leofts.py
示例12: __init__
def __init__(self, db_path):
ensuredir(db_path)
if index.exists_in(db_path):
self.index = index.open_dir(db_path)
else:
self.index = index.create_in(db_path, schema=self.schema)
self.qparser = QueryParser('text', self.schema)
开发者ID:lehmannro,项目名称:sphinx-mirror,代码行数:7,代码来源:whooshsearch.py
示例13: update_index
def update_index(self, offering):
"""
Update the document of a concrete offering in the search index
"""
if not os.path.exists(self._index_path) or os.listdir(self._index_path) == []:
raise Exception('The index does not exist')
index = open_dir(self._index_path)
index_writer = index.writer()
text = self._aggregate_text(offering)
purchasers_text = self._aggregate_purchasers(offering)
in_date = None
if offering.state == 'uploaded':
in_date = offering.creation_date
else:
in_date = offering.publication_date
# Get the document
index_writer.update_document(
id=unicode(offering.pk),
owner=unicode(offering.owner_organization.pk),
content=unicode(text),
name=unicode(offering.name),
popularity=Decimal(offering.rating),
date=in_date,
state=unicode(offering.state),
purchaser=purchasers_text
)
index_writer.commit()
开发者ID:jartieda,项目名称:wstore,代码行数:33,代码来源:search_engine.py
示例14: _get_index
def _get_index(cli_ctx):
from whoosh import index
# create index if it does not exist already
if not os.path.exists(INDEX_PATH):
_create_index(cli_ctx)
return index.open_dir(INDEX_PATH)
开发者ID:sptramer,项目名称:azure-cli,代码行数:7,代码来源:custom.py
示例15: perform_category_query
def perform_category_query(query_words):
"""
Perform a query using the supplied words on the product category index
"""
indexdir = settings.WHOOSH_INDEX_DIR
ix = open_dir(indexdir)
query = QueryParser("sub_category", ix.schema).parse(query_words)
categories = {}
suggestion = None
with ix.searcher() as searcher:
results = searcher.search(query)
for result in results:
if result['category'] not in categories:
categories[result['category']] = [result['sub_category']]
else:
categories[result['category']] += [result['sub_category']]
corrected = searcher.correct_query(query, query_words)
if hasattr(corrected.query, 'text') and corrected.query.text != query_words:
suggestion = corrected.string
return categories, suggestion
开发者ID:UKTradeInvestment,项目名称:navigator,代码行数:26,代码来源:search.py
示例16: search_toc
def search_toc(words, index_path):
words = _strip_diacritics(words)
" ".join(words.split()) # remove multiple spaces
ix = open_dir(index_path, indexname="toc-index")
parser = qparser.QueryParser("title", ix.schema, group=qparser.AndGroup)
words = suffix_query(words) # prefix and suffix
query = parser.parse(words)
final_result = []
with ix.searcher() as searcher:
results = searcher.search(query) # int(page_number), pagelen=10
total_count = len(results)
for hit in results:
title = hit["title"]
parent_title = hit["parent_title"]
full_title = shorten(title, 10)
if len(parent_title) > 0:
full_title = shorten(parent_title, 10) + " : " + shorten(title, 10)
final_result.append(
{
"title": full_title,
"serial": hit["serial"].encode("utf-8"), # "None" if case if not leaf
"topicid": hit["topicid"].encode("utf-8"),
}
)
return json.dumps({"count": total_count, "result": final_result}, ensure_ascii=False)
开发者ID:abdeldayemalimadany,项目名称:Margea,代码行数:27,代码来源:search.py
示例17: search
def search(self, ix=None):
if ix is None:
ix = open_dir(self.dir_name)
self.searcher = ix.searcher()
fields = []
qs = ''
# We use parenthesis to prevent operators like OR used in source
# to affect target
if self.source is not None and len(self.source) > 0:
qs += u' source:({0})'.format(self.source)
fields.append("source")
if self.target is not None and len(self.target) > 0:
qs += u' target:({0})'.format(self.target)
fields.append("target")
if self.project is not None and self.project != 'tots' and len(self.project) > 0:
if self.project == 'softcatala':
qs += u' softcatala:true'
fields.append("softcatala")
else:
if ',' in self.project:
projects = self.project.split(',')
val = ''.join(["'{0}',".format(project) for project in projects])
val = val[:-1].replace(',' , ' OR ')
qs += u' project:({0})'.format(val)
else:
qs += u' project:(\'{0}\')'.format(self.project)
fields.append("project")
self.query = MultifieldParser(fields, ix.schema).parse(qs)
开发者ID:Softcatala,项目名称:translation-memory-tools,代码行数:35,代码来源:search.py
示例18: delete_from_index
def delete_from_index(oblist):
ix = index.open_dir(app.config['SEARCH_INDEX_PATH'])
writer = ix.writer()
for item in oblist:
mapping = item.search_mapping()
writer.delete_by_term('idref', mapping['idref'])
writer.commit()
开发者ID:abhinaykumar,项目名称:hasjob,代码行数:7,代码来源:search.py
示例19: search_index
def search_index(search_model, query, fields=[], limit=None):
ix = index.open_dir(search_model.get_path())
fields = fields or search_model.fields
hits = []
query = smart_unicode(query)
limit = limit or getattr(settings, 'DJOOSH_SEARCH_LIMIT', 100)
if query and fields:
query = query.replace('+', ' AND ').replace('|', ' OR ')
parser = qparser.MultifieldParser(fields, schema=ix.schema)
qry = parser.parse(query)
try:
qry = parser.parse(query)
except:
qry = None
if qry:
searcher = ix.searcher()
try:
hits = searcher.search(qry, limit=limit)
except:
hits = []
ix.close()
return hits
开发者ID:MechanisM,项目名称:djoosh,代码行数:27,代码来源:utils.py
示例20: update_index
def update_index(search_model, obj=None, created=True):
ixpath = search_model.get_path()
ix = index.open_dir(ixpath)
if obj:
objects = [obj]
else:
objects = search_model.model.objects.all()
writer = ix.writer()
for obj in objects:
fields = {}
for field in search_model.fields:
fields[field] = smart_unicode(getattr(obj, field, ''))
if created:
try:
writer.update_document(**fields)
except:
pass
else:
try:
writer.add_document(**fields)
except:
pass
writer.commit()
ix.close()
开发者ID:MechanisM,项目名称:djoosh,代码行数:28,代码来源:utils.py
注:本文中的whoosh.index.open_dir函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论