• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python index.open_dir函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中whoosh.index.open_dir函数的典型用法代码示例。如果您正苦于以下问题:Python open_dir函数的具体用法?Python open_dir怎么用?Python open_dir使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了open_dir函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: searchModule

def searchModule(qq):
	global results_bm25, results_tfidf, results_tf, results ,idx, time_bm25, time_tfidf, time_tf
	results_bm25 =  results_tfidf = results_tf = results = []
	time_bm25 = time_tfidf = time_tf = None
	if flag == 1:
		if doStem!="false" and doStop!="false":
			idx = open_dir("Indexed/Index_stsw")
		elif doStem!="false" and doStop=="false":
			idx = open_dir("Indexed/Index_st")
		elif doStem=="false" and doStop!="false":
			idx = open_dir("Indexed/Index_sw")
		else:
			idx = open_dir("Indexed/Index") 
		qp = qparser.QueryParser("content", schema=idx.schema)
		q = qp.parse(unicode(qq))							#by default, the parser treats the words as if they were connected by AND
		s = idx.searcher()
		results 		= s.search(q)
		results.fragmenter.surround = 50
		start = time.time()
		results_bm25 	= idx.searcher().search(q)
		time_bm25 = (time.time()-start)
		results_bm25.fragmenter.surround = 50

		start = time.time()
		results_tfidf 	= idx.searcher(weighting=scoring.TF_IDF()).search(q)
		time_tfidf = (time.time()-start)
		results_tfidf.fragmenter.surround = 50

		start = time.time()
		results_tf 	= idx.searcher(weighting=scoring.Frequency()).search(q)
		time_tf = (time.time()-start)
		results_tf.fragmenter.surround = 50
开发者ID:abhishekgoyal1994,项目名称:InfoZeal,代码行数:32,代码来源:ndgc.py


示例2: __load__

 def __load__(region=None):
     """加载/建立索引
     :param region: 索引范围,None表示加载所有索引;news\blog表示加载对应索引
     :return: 是否加载成功
     """
     # 加载索引
     if region:
         if region in Indexer.__index__:
             return True
         else:
             if region not in index_dir:
                 return False
             if not os.path.exists(index_dir[region]):
                 os.makedirs(index_dir[region])
                 Indexer.__index__[region] = index.create_in(index_dir[region], schema, indexname=region)
             else:
                 Indexer.__index__[region] = index.open_dir(index_dir[region], indexname=region)
             return True
     else:  # 加载全部索引
         for reg in index_dir.keys():
             if reg in Indexer.__index__:
                 return True
             else:
                 if not os.path.exists(index_dir[reg]):
                     os.mkdir(index_dir[reg])
                     Indexer.__index__[reg] = index.create_in(index_dir[reg], schema, indexname=reg)
                 else:
                     Indexer.__index__[reg] = index.open_dir(index_dir[reg], indexname=reg)
                 return True
开发者ID:DMGbupt,项目名称:wechat-crawler,代码行数:29,代码来源:indexer.py


示例3: init

def init():
    # Setting my schema ...
    schema_email = Schema(
        path=TEXT(stored=True),
        sender_email=TEXT(stored=True),
        recipient_emails=TEXT,
        date=DATETIME,
        subject=TEXT(stored=True),
        body=TEXT,
    )
    schema_book = Schema(email=TEXT(stored=True), name=TEXT(stored=True))
    schemas = {"index_emails": schema_email, "index_book": schema_book}

    if not os.path.exists(index_path):
        os.mkdir(index_path)

    indexes = {}
    for ixname, schema in schemas.items():
        """
        Esta parte es mejorable, ya que sólo indexa si no existe indice. 
        No tiene en cuenta si los archivos indexados se han modificado o si 
        se han eliminado como se explica aquí:
            @url http://pythonhosted.org/Whoosh/indexing.html#incremental-indexing
        """
        exists = index.exists_in(index_path, indexname=ixname)
        if not exists:
            ix = index.create_in(index_path, schema, indexname=ixname)

            # Indexing ...
            ix = index.open_dir(index_path, indexname=ixname)
            writer = ix.writer()
            if ixname == "index_emails":
                files = read_dir()
                index_emails(files, writer)
            elif ixname == "index_book":
                index_book(writer)
        else:
            ix = index.open_dir(index_path, indexname=ixname)
        indexes[ixname] = ix

    # Main routine
    while True:
        ix = indexes.get("index_emails")
        with ix.searcher() as searcher:
            input_user = str(raw_input("Introduzca una palabra del asunto o cuerpo (p.e. contrato): "))
            mparser = MultifieldParser(["subject", "body"], schema=ix.schema)
            myquery = mparser.parse(unicode(input_user))

            results = searcher.search(myquery)
            print "=================================================="
            for result in results:
                # read_file(result.get("path"))

                print ("Remitente: " + findNameBySender(indexes, result.get("sender_email")))
                print ("Asunto: " + result.get("subject"))
                print "=================================================="
开发者ID:garridev,项目名称:practicas-aii,代码行数:56,代码来源:practica.py


示例4: getIndex

 def getIndex(self):
     module_dir = self.indexdir +"/"+ self.sword_module_name
     if os.path.exists(module_dir):
         ix = index.open_dir(module_dir)
     else:
         indexer = Indexer(self.sword_module_name, self.Reader)
         indexer.buildIndex()
         
         ix = index.open_dir(module_dir)
     
     return ix
开发者ID:nano13,项目名称:nvcli,代码行数:11,代码来源:search.py


示例5: init

def init():
    # Setting my schema ...
    schema_email = Schema(path=TEXT(stored=True), sender_email=TEXT(stored=True), recipient_emails=TEXT(stored=True), date=DATETIME, subject=TEXT(stored=True), body=TEXT)
    schema_book = Schema(email=TEXT(stored=True), name=TEXT(stored=True))
    schemas = {"index_emails": schema_email, "index_book": schema_book }

    if not os.path.exists(index_path):
        os.mkdir(index_path)

    indexes = {}
    for ixname, schema in schemas.items():
        '''
        Esta parte es mejorable, ya que sólo indexa si no existe indice. 
        No tiene en cuenta si los archivos indexados se han modificado o si 
        se han eliminado como se explica aquí:
            @url http://pythonhosted.org/Whoosh/indexing.html#incremental-indexing
        '''
        exists = index.exists_in(index_path, indexname=ixname)
        if(not exists):
            ix = index.create_in(index_path, schema, indexname=ixname)
        
            # Indexing ...
            ix = index.open_dir(index_path, indexname=ixname)
            writer = ix.writer()
            if(ixname == "index_emails"):
                files = read_dir()
                index_emails(files, writer)
            elif(ixname == "index_book"):
                index_book(writer)
        else:   
            ix = index.open_dir(index_path, indexname=ixname)
        indexes[ixname] = ix
    
    # Main routine
    while(True):
        options = ['a', 'b', 'c']
        input_user = str(raw_input("Elija el apartado que desea ejecutar (A, B o C): "))
        input_user_lower = string.lower(input_user)
        if input_user_lower in options:
            def apartadoA():
                input_user = raw_input("Introduzca una palabra del asunto o cuerpo (p.e. contrato): ")
                findMailByWordInSubjectOrBody(indexes, input_user)
            def apartadoB():
                input_user = raw_input("Buscar emails posteriores a la fecha (con formato YYYYMMDD): ")
                findMailbyDate(indexes, input_user)
            def apartadoC():
                input_user = raw_input("Introduzca palabras spam (p.e. 'Contrato Gracias compraventa'): ")
                findMailBySpamWords(indexes, input_user)
            options = { 'a': apartadoA, 'b': apartadoB, 'c': apartadoC } 
            options[input_user_lower]()
        else:
            print "Opción no válida."
开发者ID:garridev,项目名称:practicas-aii,代码行数:52,代码来源:merge_test.py


示例6: update

    def update(self, tmp=False):
        """
        Make sure index reflects current backend state, add missing stuff, remove outdated stuff.

        This is intended to be used:
        * after a full rebuild that was done at tmp location
        * after wiki is made read-only or taken offline
        * after the index was moved to the normal index location

        Reason: new revisions that were created after the rebuild started might be missing in new index.

        :returns: index changed (bool)
        """
        index_dir = self.index_dir_tmp if tmp else self.index_dir
        index_all = open_dir(index_dir, indexname=ALL_REVS)
        try:
            # NOTE: self.backend iterator gives (mountpoint, revid) tuples, which is NOT
            # the same as (name, revid), thus we do the set operations just on the revids.
            # first update ALL_REVS index:
            revids_mountpoints = dict((revid, mountpoint) for mountpoint, revid in self.backend)
            backend_revids = set(revids_mountpoints)
            with index_all.searcher() as searcher:
                ix_revids_names = dict((doc[REVID], doc[NAME]) for doc in searcher.all_stored_fields())
            revids_mountpoints.update(ix_revids_names) # this is needed for stuff that was deleted from storage
            ix_revids = set(ix_revids_names)
            add_revids = backend_revids - ix_revids
            del_revids = ix_revids - backend_revids
            changed = add_revids or del_revids
            add_revids = [(revids_mountpoints[revid], revid) for revid in add_revids]
            del_revids = [(revids_mountpoints[revid], revid) for revid in del_revids]
            self._modify_index(index_all, self.schemas[ALL_REVS], self.wikiname, add_revids, 'add')
            self._modify_index(index_all, self.schemas[ALL_REVS], self.wikiname, del_revids, 'delete')

            backend_latest_names_revids = set(self._find_latest_names_revids(index_all))
        finally:
            index_all.close()
        index_latest = open_dir(index_dir, indexname=LATEST_REVS)
        try:
            # now update LATEST_REVS index:
            with index_latest.searcher() as searcher:
                ix_revids = set(doc[REVID] for doc in searcher.all_stored_fields())
            backend_latest_revids = set(revid for name, revid in backend_latest_names_revids)
            upd_revids = backend_latest_revids - ix_revids
            upd_revids = [(revids_mountpoints[revid], revid) for revid in upd_revids]
            self._modify_index(index_latest, self.schemas[LATEST_REVS], self.wikiname, upd_revids, 'update')
            self._modify_index(index_latest, self.schemas[LATEST_REVS], self.wikiname, del_revids, 'delete')
        finally:
            index_latest.close()
        return changed
开发者ID:pombredanne,项目名称:moin2,代码行数:49,代码来源:indexing.py


示例7: batch_query

def batch_query(querypath, indexpath):
    ix = index.open_dir(indexpath)

    with open(querypath, 'r') as fh:
        with open('output.txt', 'w') as out_file:
            rawdata = fh.read()
            document = get_section2(rawdata, "<top>", "</top>")
            for d in document:
                topicnum = get_section(d, "<num> Number:", "<title>")[0].strip(" ").strip("\n")
                title = get_section(d, "<title>", "<desc>")[0].strip(" ").strip("\n")
                desc = get_section(d, "<desc>", "<narr>")[0].replace("Description:","").strip(" ")
                narr = get_section(d, "<narr>", "</top>")[0].replace("Narrative:","").strip(" ")

                print topicnum, title, desc, narr

                with ix.searcher() as searcher:
                    parser = qparser.QueryParser("content", schema=ix.schema,
                                 group=qparser.OrGroup)

                    query = parser.parse(desc+" "+title)

                    # query = QueryParser("content", ix.schema)
                    results = searcher.search(query, limit=1000)
                    print results[0]
                    print results[1]
                    # return

                    for i in range(1000):
                        out_file.write("%s\tQ0\t%s\t%s\t%s\t jack\n" % (topicnum, results[i].values()[1], results[i].rank+1, results[i].score))
开发者ID:bgshin,项目名称:irqa,代码行数:29,代码来源:bquery.py


示例8: query

def query(indexpath):
    ix = index.open_dir(indexpath)

    with ix.searcher() as searcher:
        query = QueryParser("content", ix.schema).parse("test")
        results = searcher.search(query)
        print results[0]
开发者ID:bgshin,项目名称:irqa,代码行数:7,代码来源:bquery.py


示例9: run

	def run(self):
		# open index
		self.buffer = deque(maxlen=BUFFERLINES)
		if not exists(self.indexdir):
			makedirs(self.indexdir)
			self.ix = create_in(self.indexdir, SCHEMA)
		else:
			if exists_in(self.indexdir): self.ix = open_dir(self.indexdir)
			else: self.ix = create_in(self.indexdir, SCHEMA)
		self.qp = QueryParser("content", self.ix.schema)
		self.searcher = self.ix.searcher()
		index_p = self.index_p
		while True:
			try:
				# check index_p
				try:
					type, data = index_p.recv()
				except EOFError: break
				try:
					if type == QUERY: self._processSearch(data)
					elif type == LOG: self._processLog(data)
					elif type == RENAME: self._processRename(data)
					else:
						prnt("Unexpected data in logindexsearch.")
				except:
					print_exc()
					prnt("EXCEPTION in logindexsearch process.")
			except KeyboardInterrupt:
				break
		self._dumpBuffer(self.buffer)
		self.searcher.close()
		self.ix.close()	
开发者ID:Clam-,项目名称:pyBurlyBot,代码行数:32,代码来源:pbm_logindexsearch.py


示例10: indexer

    def indexer(self, create=True):
        schema = self.bench.spec.whoosh_schema()
        path = os.path.join(self.options.dir, "%s_whoosh" % self.options.indexname)

        if not os.path.exists(path):
            os.mkdir(path)
        if create:
            ix = index.create_in(path, schema)
        else:
            ix = index.open_dir(path)

        poolclass = None
        if self.options.pool:
            poolclass = find_object(self.options.pool)

        kwargs = dict(limitmb=int(self.options.limitmb), poolclass=poolclass,
                      dir=self.options.tempdir, procs=int(self.options.procs),
                      batchsize=int(self.options.batch))

        if self.options.expw:
            from whoosh.filedb.multiproc import MultiSegmentWriter
            self.writer = MultiSegmentWriter(ix, **kwargs)
        else:
            self.writer = ix.writer(**kwargs)

        self._procdoc = None
        if hasattr(self.bench.spec, "process_document_whoosh"):
            self._procdoc = self.bench.spec.process_document_whoosh
开发者ID:MapofLife,项目名称:MOL,代码行数:28,代码来源:bench.py


示例11: __init__

 def __init__(self, idx_dir):
     self.idx_dir = idx_dir
     if not os.path.exists(idx_dir):
         os.mkdir(idx_dir)
         self.create()
     else:
         self.ix = open_dir(idx_dir)
开发者ID:Armagedoom,项目名称:leo-editor,代码行数:7,代码来源:leofts.py


示例12: __init__

 def __init__(self, db_path):
     ensuredir(db_path)
     if index.exists_in(db_path):
         self.index = index.open_dir(db_path)
     else:
         self.index = index.create_in(db_path, schema=self.schema)
     self.qparser = QueryParser('text', self.schema)
开发者ID:lehmannro,项目名称:sphinx-mirror,代码行数:7,代码来源:whooshsearch.py


示例13: update_index

    def update_index(self, offering):
        """
        Update the document of a concrete offering in the search index
        """

        if not os.path.exists(self._index_path) or os.listdir(self._index_path) == []:
            raise Exception('The index does not exist')

        index = open_dir(self._index_path)

        index_writer = index.writer()
        text = self._aggregate_text(offering)
        purchasers_text = self._aggregate_purchasers(offering)

        in_date = None
        if offering.state == 'uploaded':
            in_date = offering.creation_date
        else:
            in_date = offering.publication_date

        # Get the document
        index_writer.update_document(
            id=unicode(offering.pk),
            owner=unicode(offering.owner_organization.pk),
            content=unicode(text),
            name=unicode(offering.name),
            popularity=Decimal(offering.rating),
            date=in_date,
            state=unicode(offering.state),
            purchaser=purchasers_text
        )

        index_writer.commit()
开发者ID:jartieda,项目名称:wstore,代码行数:33,代码来源:search_engine.py


示例14: _get_index

def _get_index(cli_ctx):
    from whoosh import index

    # create index if it does not exist already
    if not os.path.exists(INDEX_PATH):
        _create_index(cli_ctx)
    return index.open_dir(INDEX_PATH)
开发者ID:sptramer,项目名称:azure-cli,代码行数:7,代码来源:custom.py


示例15: perform_category_query

def perform_category_query(query_words):
    """
    Perform a query using the supplied words on the product category index
    """

    indexdir = settings.WHOOSH_INDEX_DIR
    ix = open_dir(indexdir)
    query = QueryParser("sub_category", ix.schema).parse(query_words)

    categories = {}
    suggestion = None

    with ix.searcher() as searcher:
        results = searcher.search(query)

        for result in results:
            if result['category'] not in categories:
                categories[result['category']] = [result['sub_category']]
            else:
                categories[result['category']] += [result['sub_category']]

        corrected = searcher.correct_query(query, query_words)
        if hasattr(corrected.query, 'text') and corrected.query.text != query_words:
            suggestion = corrected.string

    return categories, suggestion
开发者ID:UKTradeInvestment,项目名称:navigator,代码行数:26,代码来源:search.py


示例16: search_toc

def search_toc(words, index_path):
    words = _strip_diacritics(words)
    " ".join(words.split())  # remove multiple spaces
    ix = open_dir(index_path, indexname="toc-index")
    parser = qparser.QueryParser("title", ix.schema, group=qparser.AndGroup)
    words = suffix_query(words)  # prefix and suffix
    query = parser.parse(words)
    final_result = []
    with ix.searcher() as searcher:
        results = searcher.search(query)  # int(page_number), pagelen=10
        total_count = len(results)
        for hit in results:
            title = hit["title"]
            parent_title = hit["parent_title"]
            full_title = shorten(title, 10)
            if len(parent_title) > 0:
                full_title = shorten(parent_title, 10) + " : " + shorten(title, 10)

            final_result.append(
                {
                    "title": full_title,
                    "serial": hit["serial"].encode("utf-8"),  #  "None" if case if not leaf
                    "topicid": hit["topicid"].encode("utf-8"),
                }
            )

    return json.dumps({"count": total_count, "result": final_result}, ensure_ascii=False)
开发者ID:abdeldayemalimadany,项目名称:Margea,代码行数:27,代码来源:search.py


示例17: search

    def search(self, ix=None):

        if ix is None:
            ix = open_dir(self.dir_name)

        self.searcher = ix.searcher()
        fields = []
        qs = ''

        # We use parenthesis to prevent operators like OR used in source
        # to affect target
        if self.source is not None and len(self.source) > 0:
            qs += u' source:({0})'.format(self.source)
            fields.append("source")

        if self.target is not None and len(self.target) > 0:
            qs += u' target:({0})'.format(self.target)
            fields.append("target")

        if self.project is not None and self.project != 'tots' and len(self.project) > 0:
            if self.project == 'softcatala':
                qs += u' softcatala:true'
                fields.append("softcatala")
            else:
                if ',' in self.project:
                    projects = self.project.split(',')
                    val = ''.join(["'{0}',".format(project) for project in projects])
                    val = val[:-1].replace(',' , ' OR ')
                    qs += u' project:({0})'.format(val)   
                else:
                    qs += u' project:(\'{0}\')'.format(self.project)

                fields.append("project")

        self.query = MultifieldParser(fields, ix.schema).parse(qs)
开发者ID:Softcatala,项目名称:translation-memory-tools,代码行数:35,代码来源:search.py


示例18: delete_from_index

def delete_from_index(oblist):
    ix = index.open_dir(app.config['SEARCH_INDEX_PATH'])
    writer = ix.writer()
    for item in oblist:
        mapping = item.search_mapping()
        writer.delete_by_term('idref', mapping['idref'])
    writer.commit()
开发者ID:abhinaykumar,项目名称:hasjob,代码行数:7,代码来源:search.py


示例19: search_index

def search_index(search_model, query, fields=[], limit=None):
    ix = index.open_dir(search_model.get_path())
    fields = fields or search_model.fields
    hits = []
    query = smart_unicode(query)
    
    limit = limit or getattr(settings, 'DJOOSH_SEARCH_LIMIT', 100)
    
    if query and fields:
        query = query.replace('+', ' AND ').replace('|', ' OR ')
        parser = qparser.MultifieldParser(fields, schema=ix.schema)
        qry = parser.parse(query)
        
        try:
            qry = parser.parse(query)
        except:
            qry = None
        
        if qry:
            searcher = ix.searcher()
            try:
                hits = searcher.search(qry, limit=limit)
            except:
                hits = []
    
    ix.close()
    return hits
开发者ID:MechanisM,项目名称:djoosh,代码行数:27,代码来源:utils.py


示例20: update_index

def update_index(search_model, obj=None, created=True):
    ixpath = search_model.get_path()
    ix = index.open_dir(ixpath)
    
    if obj:
        objects = [obj]
    else:
        objects = search_model.model.objects.all()
    
    writer = ix.writer()
    
    for obj in objects:
        fields = {}
        for field in search_model.fields:
            fields[field] = smart_unicode(getattr(obj, field, ''))
        if created:
            try:
                writer.update_document(**fields)
            except:
                pass
        else:
            try:
                writer.add_document(**fields)
            except:
                pass
            
    writer.commit()
    ix.close()
开发者ID:MechanisM,项目名称:djoosh,代码行数:28,代码来源:utils.py



注:本文中的whoosh.index.open_dir函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python index.writer函数代码示例发布时间:2022-05-26
下一篇:
Python index.full_prepare函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap