• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python xapian.sortable_serialise函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中xapian.sortable_serialise函数的典型用法代码示例。如果您正苦于以下问题:Python sortable_serialise函数的具体用法?Python sortable_serialise怎么用?Python sortable_serialise使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了sortable_serialise函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: index_document

    def index_document(self, document, properties):
        document.add_value(_VALUE_TIMESTAMP,
            xapian.sortable_serialise(float(properties['timestamp'])))
        document.add_value(_VALUE_TITLE, properties.get('title', '').strip())
        if 'filesize' in properties:
            try:
                document.add_value(_VALUE_FILESIZE,
                    xapian.sortable_serialise(int(properties['filesize'])))
            except (ValueError, TypeError):
                logging.debug('Invalid value for filesize property: %s',
                              properties['filesize'])
        if 'creation_time' in properties:
            try:
                document.add_value(
                    _VALUE_CREATION_TIME, xapian.sortable_serialise(
                        float(properties['creation_time'])))
            except (ValueError, TypeError):
                logging.debug('Invalid value for creation_time property: %s',
                              properties['creation_time'])

        self.set_document(document)

        properties = dict(properties)
        self._index_known(document, properties)
        self._index_unknown(document, properties)
开发者ID:ers-devs,项目名称:sugar-datastore,代码行数:25,代码来源:indexstore-with-ers.py


示例2: test_value_stats

def test_value_stats():
    """Simple test of being able to get value statistics.

    """
    dbpath = "db_test_value_stats"
    db = xapian.chert_open(dbpath, xapian.DB_CREATE_OR_OVERWRITE)

    vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
    for id in range(10):
        doc = xapian.Document()
        doc.add_value(1, xapian.sortable_serialise(vals[id]))
        db.add_document(doc)

    expect(db.get_value_freq(0), 0)
    expect(db.get_value_lower_bound(0), "")
    expect(db.get_value_upper_bound(0), "")
    expect(db.get_value_freq(1), 10)
    expect(db.get_value_lower_bound(1), xapian.sortable_serialise(0))
    expect(db.get_value_upper_bound(1), xapian.sortable_serialise(9))
    expect(db.get_value_freq(2), 0)
    expect(db.get_value_lower_bound(2), "")
    expect(db.get_value_upper_bound(2), "")

    db.close()
    shutil.rmtree(dbpath)
开发者ID:wangeguo,项目名称:xapian,代码行数:25,代码来源:pythontest2.py


示例3: __call__

 def __call__(self, begin, end):
     """
     Construct a tuple for value range processing.
     
     `begin` -- a string in the format '<field_name>:[low_range]'
                If 'low_range' is omitted, assume the smallest possible value.
     `end` -- a string in the the format '[high_range|*]'.  If '*', assume
              the highest possible value.
     
     Return a tuple of three strings: (column, low, high)
     """
     colon = begin.find(':')
     field_name = begin[:colon]
     begin = begin[colon + 1:len(begin)]
     for field_dict in self.sb.schema:
         if field_dict['field_name'] == field_name:
             if not begin:
                 if field_dict['type'] == 'text':
                     begin = u'a' # TODO: A better way of getting a min text value?
                 elif field_dict['type'] == 'long' or field_dict['type'] == 'float':
                     begin = float('-inf')
                 elif field_dict['type'] == 'date' or field_dict['type'] == 'datetime':
                     begin = u'00010101000000'
             elif end == '*':
                 if field_dict['type'] == 'text':
                     end = u'z' * 100 # TODO: A better way of getting a max text value?
                 elif field_dict['type'] == 'long' or field_dict['type'] == 'float':
                     end = float('inf')
                 elif field_dict['type'] == 'date' or field_dict['type'] == 'datetime':
                     end = u'99990101000000'
             if field_dict['type'] == 'long' or field_dict['type'] == 'float':
                 begin = xapian.sortable_serialise(float(begin))
                 end = xapian.sortable_serialise(float(end))
             return field_dict['column'], str(begin), str(end)
开发者ID:rob-b,项目名称:Grab,代码行数:34,代码来源:xapian_backend.py


示例4: __call__

 def __call__(self, doc):
     app = Application(self.db.get_appname(doc),
                       self.db.get_pkgname(doc))
     stats = self.review_loader.get_review_stats(app)
     import xapian
     if stats:
         return xapian.sortable_serialise(stats.dampened_rating)
     return xapian.sortable_serialise(0)
开发者ID:Alberto-Beralix,项目名称:Beralix,代码行数:8,代码来源:database.py


示例5: get_msg_terms

def get_msg_terms(db=None,msg=None):

#   This is pretty important: what data to be shown from the thing?
#   Maybe should be parsed into json already? Ot serialise a hash somehow?
    doc_data = msg.content
    doc_values = []

    doc_terms = []
    
    stemmer = xapian.Stem("finnish")

    for match in re.finditer(r'\b[a-zA-ZäöåüÄÖÅÜÉÈÁÀéèáà]{3,35}\b', to_lower_case(msg.content)):
        word = match.group(0)
        if is_stopword(word):
            continue
        term = stemmer(word)
        doc_terms.append(term)

    for term in ["_commented-by_"+msg.author]:
        doc_terms.append(term)

    if msg.date:
        doc_terms.append("_c_"+str(msg.date)[:7])
                   
    official_terms = ["_o_"+msg.id]

    if msg.places:
        place = db.place.getnode(msg.places[0])
        for term in get_place_terms(place = place):
            doc_terms.append (term)


        for match in re.finditer(r'\b[a-zA-ZäöåüÄÖÅÜÉÈÁÀéèáà]{3,35}\b', to_lower_case(place.address)):
            word = match.group(0)
            if is_stopword(word):
                continue
            term = stemmer(word)
            #print "adding term "+term
            doc_terms.append(term)


        doc_data += "  " + place.address

        for term in get_latlng_range(place.lat):
            doc_terms.append("_glatrange_"+term)
        for term in get_latlng_range(place.lng):
            doc_terms.append("_glngrange_"+term)

        
        doc_values.append({"field": XAPIAN_X_COORD_FIELD, "value":xapian.sortable_serialise(float(place.lat))})
        doc_values.append({"field": XAPIAN_Y_COORD_FIELD, "value":xapian.sortable_serialise(float(place.lng))})	
    if msg.date:
    	doc_values.append({"field": XAPIAN_CREATED_FIELD, "value": xapian.sortable_serialise( float( msg.date.serialise() ) ) })


    return {"doc_data":doc_data,
            "doc_terms":doc_terms,
            "doc_values":doc_values }
开发者ID:fillarikanava,项目名称:old-fillarikanava,代码行数:58,代码来源:indexer.py


示例6: index

def index(datapath, dbpath):
    # Create or open the database we're going to be writing to.
    db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN)

    # Set up a TermGenerator that we'll use in indexing.
    termgenerator = xapian.TermGenerator()
    termgenerator.set_stemmer(xapian.Stem("en"))

    for fields in parse_states(datapath):
        # 'fields' is a dictionary mapping from field name to value.
        # Pick out the fields we're going to index.
        name = fields.get('name', u'')
        description = fields.get('description', u'')
        motto = fields.get('motto', u'')
        admitted = fields.get('admitted', None)
        population = fields.get('population', None)
        order = fields.get('order', u'')

        # We make a document and tell the term generator to use this.
        doc = xapian.Document()
        termgenerator.set_document(doc)

        # index each field with a suitable prefix
        termgenerator.index_text(name, 1, 'S')
        termgenerator.index_text(description, 1, 'XD')
        termgenerator.index_text(motto, 1, 'XM')

        # Index fields without prefixes for general search.
        termgenerator.index_text(name)
        termgenerator.increase_termpos()
        termgenerator.index_text(description)
        termgenerator.increase_termpos()
        termgenerator.index_text(motto)

        # Add document values.
        if admitted is not None:
            doc.add_value(1, xapian.sortable_serialise(int(admitted[:4])))
            doc.add_value(2, admitted) # YYYYMMDD
        if population is not None:
            doc.add_value(3, xapian.sortable_serialise(population))
### Start of example code.
        midlat = fields['midlat']
        midlon = fields['midlon']
        if midlat and midlon:
            doc.add_value(4, "%f,%f" % (midlat, midlon))
### End of example code.

        # Store all the fields for display purposes.
        doc.set_data(json.dumps(fields))

        # We use the identifier to ensure each object ends up in the
        # database only once no matter how many times we run the
        # indexer.
        idterm = u"Q" + order
        doc.add_boolean_term(idterm)
        db.replace_document(idterm, doc)
开发者ID:Gedimar,项目名称:xapian-erlang-docs,代码行数:56,代码来源:index_values_with_geo.py


示例7: make_value

def make_value(s, term):
    """Parse various string values and return suitable numeric
    representations."""
    if term == 'year':
        # This is in a date string format due to serialization.
        return xapian.sortable_serialise(int(s))
    if term == 'mtime':
        return xapian.sortable_serialise(time.mktime(time.strptime(s)))
    if term == 'rating':
        return xapian.sortable_serialise(float(s))
    else:
        return xapian.sortable_serialise(int(s))
开发者ID:albins,项目名称:music-tools,代码行数:12,代码来源:xapian_music.py


示例8: create_index

def create_index(filename,databasePath):
  print "begin read",filename
  if not os.path.exists(databasePath):
    os.makedirs(databasePath)
  database = xapian.WritableDatabase(databasePath, xapian.DB_CREATE_OR_OPEN)
  stemmer=xapian.Stem('english')
  rex=re.compile(r'[0-9]+|[a-zA-Z]+|[\x80-\xff3]{3}')
  lines=open(filename).readlines()
  processed=0
  len_file=len(lines)
  print filename,"read end"
  time_begin=time.time()
  for line in lines:
    try:
      line=line.encode('utf-8')
    except:
      continue
    line_items=line.split('\t')
    document = xapian.Document()
    freq_sortable=xapian.sortable_serialise(float(line_items[3]))
    click_sortable=xapian.sortable_serialise(float(line_items[4]))
    document.add_value(FREQ,freq_sortable)
    document.add_value(CLICK,click_sortable)
    document.add_value(DATE,line_items[1])
    document.set_data(line_items[0])
    terms=rex.findall(line_items[0])
    for term in terms:
      if len(term) > MAX_TERM_LENGTH:
        document.add_term(stemmer(term[:MAX_TERM_LENGTH]))
      else:
        document.add_term(stemmer(term))
    database.add_document(document)
    processed+=1
    del line
    del line_items
    del document
    del freq_sortable
    del click_sortable
    del terms

    if processed%100000==0:
      end=time.time()
      speed=100000/float(end-time_begin)
      print "="*40
      print filename
      print "speed:\t",speed
      print "percent:\t%s %%" %(100.0*(processed/float(len_file)))
      print "time remain:\t %s hours" %( (len_file-processed)/(speed*3600))
      time_begin=time.time()
  
  gc.collect()
  os.system("rm -rf %s" % filename)
  print filename,"end"
开发者ID:gwtale,项目名称:onebox-scripts,代码行数:53,代码来源:mt_generate_index.py


示例9: index

def index(datapath, dbpath):
    # Create or open the database we're going to be writing to.
    db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN)

    # Set up a TermGenerator that we'll use in indexing.
    termgenerator = xapian.TermGenerator()
    termgenerator.set_stemmer(xapian.Stem("en"))

    for fields in parse_csv_file(datapath):
        # 'fields' is a dictionary mapping from field name to value.
        # Pick out the fields we're going to index.
        description = fields.get('DESCRIPTION', u'')
        title = fields.get('TITLE', u'')
        identifier = fields.get('id_NUMBER', u'')

        # We make a document and tell the term generator to use this.
        doc = xapian.Document()
        termgenerator.set_document(doc)

        # Index each field with a suitable prefix.
        termgenerator.index_text(title, 1, 'S')
        termgenerator.index_text(description, 1, 'XD')

        # Index fields without prefixes for general search.
        termgenerator.index_text(title)
        termgenerator.increase_termpos()
        termgenerator.index_text(description)

        # Store all the fields for display purposes.
        doc.set_data(json.dumps(fields, encoding='utf8'))

### Start of example code.
        # parse the two values we need
        measurements = fields.get('MEASUREMENTS', u'')
        if measurements != u'':
            numbers = numbers_from_string(measurements)
            if len(numbers) > 0:
                doc.add_value(0, xapian.sortable_serialise(max(numbers)))

        date_made = fields.get('DATE_MADE', u'')
        years = numbers_from_string(date_made)
        if len(years) > 0:
            doc.add_value(1, xapian.sortable_serialise(years[0]))
### End of example code.

        # We use the identifier to ensure each object ends up in the
        # database only once no matter how many times we run the
        # indexer.
        idterm = u"Q" + identifier
        doc.add_boolean_term(idterm)
        db.replace_document(idterm, doc)
开发者ID:PhoenixZhao,项目名称:xapian-docsprint,代码行数:51,代码来源:index_ranges.py


示例10: _encode_simple_value

def _encode_simple_value(field_cls, value):
    # Integers (FIXME this doesn't work with the big integers)
    if issubclass(field_cls, Integer):
        return sortable_serialise(value)
    elif issubclass(field_cls, Decimal):
        # FIXME: We convert decimal->float so we lost precision
        return sortable_serialise(float(value))

    # Datetimes: normalize to UTC, so searching works
    if type(value) is datetime:
        value = value.astimezone(fixed_offset(0))

    # A common field or a new field
    return field_cls.encode(value)
开发者ID:hforge,项目名称:itools,代码行数:14,代码来源:catalog.py


示例11: dict2doc

def dict2doc(y):
    doc = xapian.Document()
    indexer.set_document(doc)

    url = y['url']
    uid = urlid(url)
    sid = uid[:8]
    doc.add_boolean_term(P['id'] + uid)
    # add the id and short id as unprefixed/stemmed terms to
    # make it easier to select bookmarks from search results
    for idterm in [uid, sid, 'Z' + uid, 'Z' + sid]:
        doc.add_boolean_term(idterm)

    doc.add_value(VALUE_URL, url)

    # add hostname parts as site terms
    hostname = urlparse(url).hostname
    if hostname:
        hs = hostname.split('.')
        for i in xrange(len(hs)):
            doc.add_boolean_term(P['site'] + '.'.join(hs[i:]))

    archive_path = get_archive_path(uid)
    if archive_path:
        y['tags'].append('archived')

    # remove duplicate tags, preserving order
    y['tags'] = list(OrderedDict.fromkeys(y['tags']))
    alltags = u'\x1f'.join(y['tags'])
    doc.add_value(VALUE_TAGS, alltags)
    for tag in y['tags']:
        doc.add_boolean_term(P['tag'] + tag)

    if 'title' in y:
        doc.add_value(VALUE_TITLE, y['title'])
        index_text(y['title'], 'title')

    if 'notes' in y:
        doc.set_data(y['notes'])
        index_text(y['notes'], 'notes')

    created = y.get('created', arrow.utcnow()).timestamp
    doc.add_value(VALUE_CREATED, xapian.sortable_serialise(created))

    if archive_path:
        archived = y.get('archived', arrow.utcnow()).timestamp
        doc.add_value(VALUE_ARCHIVED, xapian.sortable_serialise(archived))
        index_archive(doc, archive_path)

    return doc
开发者ID:ttaylordev,项目名称:z,代码行数:50,代码来源:jot.py


示例12: normalize_range

def normalize_range(begin, end):
    """ 查询时,转换range 参数,主要是把 float/int 转换为 str 格式 """

    if begin is not None:
        if isinstance(begin, float):
            begin = xapian.sortable_serialise(float(begin))
        else:
            begin = str(begin)

    if end is not None:
        if isinstance(end, float):
            end = xapian.sortable_serialise(float(end))
        else:
            end = str(end)
    return begin, end
开发者ID:everydo,项目名称:zapian,代码行数:15,代码来源:query.py


示例13: add_product

    def add_product(self, product, database_path=None):
        """Adds product to repository.
        product - Product to be added to database
        database_path - Path of the database where product is added. Default: None
        When repository has been created with many database paths then database_path must
        be defined."""
        # Set up a TermGenerator that we'll use in indexing.
        if len(self._databases) > 1:
            assert database_path != None, \
                "With many databases you must identify the database where product is added"

        termgenerator = xapian.TermGenerator()
        termgenerator.set_stemmer(self._create_stem())

        # We make a document and tell the term generator to use this.
        doc = xapian.Document()
        termgenerator.set_document(doc)
        termgenerator.index_text(unicode(product.title))
        termgenerator.index_text(unicode(product.description))
        doc.set_data(unicode(json.dumps(product.__dict__)))
        doc.add_value(0, xapian.sortable_serialise(float(product.price)))

        idterm = "Q" + product.url
        doc.add_boolean_term(idterm)

        db = self._db
        if database_path:
            db = self._databases[database_path]

        db.replace_document(idterm, doc)
开发者ID:mlackman,项目名称:productrepository,代码行数:30,代码来源:productrepository.py


示例14: convert

    def convert(self, field_value):
        """
        Generates index values (for sorting) for given field value and its content type
        """
        if field_value is None:
            return None

        content_type = self._get_content_type(field_value)

        value = field_value

        if self._is_float_or_interger(content_type):
            value = xapian.sortable_serialise(field_value)
        elif isinstance(content_type, (models.BooleanField, bool)):
            # Boolean fields are stored as 't' or 'f'
            value = field_value and 't' or 'f'
        elif isinstance(content_type, (models.DateTimeField, datetime.datetime)):
            # DateTime fields are stored as %Y%m%d%H%M%S (better sorting)
            # value = field_value.strftime('%Y%m%d%H%M%S')
            value = '%d%02d%02d%02d%02d%02d' % ( field_value.year,
                                                 field_value.month,
                                                 field_value.day,
                                                 field_value.hour,
                                                 field_value.minute,
                                                 field_value.second )

        return smart_str(value)
开发者ID:mpmendespt,项目名称:dre,代码行数:27,代码来源:indexer.py


示例15: index

def index(contacts, database, prefixes):
    c = config(contacts)

    db = xapian.WritableDatabase(database, xapian.DB_CREATE_OR_OPEN)

    p = set()
    for person, data in c:
        doc = xapian.Document()
        termgenerator.set_document(doc)

        termgenerator.index_text(person, 1, u'id')
        for prefix, content in data:
            if prefix[0] in digits[:5]:
                doc.add_value(int(prefix[0]), xapian.sortable_serialise(int(content)))
            elif prefix[0] in digits[5:]:
                doc.add_value(int(prefix[0]), content)
            else:
                termgenerator.index_text(content, 1, u'X' + prefix)
                termgenerator.index_text(content)
                termgenerator.increase_termpos()
            p.add(prefix)

        doc.add_boolean_term(u'Q' + person)
        doc.set_data(person)
        db.replace_document(u'Q' + person, doc)

    with open(prefixes, 'wb') as fp:
        json.dump(list(p), fp)
开发者ID:tlevine,项目名称:cbuh,代码行数:28,代码来源:index.py


示例16: _add_value

        def _add_value(doc, slotnum, value):

            if isinstance(value, float):
                value = xapian.sortable_serialise(float(value))
                doc.add_value(int(slotnum), value)
            else:
                doc.add_value(int(slotnum), str(value))
开发者ID:everydo,项目名称:zapian,代码行数:7,代码来源:api.py


示例17: _encode_simple_value

def _encode_simple_value(field_cls, value):
    # Overload the Integer type
    # XXX warning: this doesn't work with the big integers!
    if issubclass(field_cls, Integer):
        return sortable_serialise(value)
    # A common field or a new field
    return field_cls.encode(value)
开发者ID:kennym,项目名称:itools,代码行数:7,代码来源:utils.py


示例18: add

    def add(self, msg):
        frombits = msg.from_addr() + ' '
        if msg.from_name():
            frombits += msg.from_name()

        with msg.open() as fp:
            mail = email.message_from_file(fp)
            bodybits = self._get_body(mail)

        if bodybits is None:
            return

        bodybits += u' ' + frombits
        bodybits += u' ' + (msg.subject() or u'')

        doc = xapian.Document()
        self.term_gen.set_document(doc)

        sortable_ts = xapian.sortable_serialise(mua.py.unix_dt(msg.date()))
        doc.add_value(SLOT_DATE, sortable_ts)
        doc.add_value(SLOT_STRONG_ID, mua.py.unb64(msg.strong_id()))

        self.term_gen.index_text(frombits, 1, PREFIX_FROM)
        self.term_gen.index_text(bodybits, 1, PREFIX_BODY)
        self.term_gen.index_text(msg.subject() or u'', 1, PREFIX_SUBJECT)
        return self.db.add_document(doc)
开发者ID:pombredanne,项目名称:mua,代码行数:26,代码来源:indexer.py


示例19: _marshal_value

def _marshal_value(value):
    """
    Private utility method that converts Python values to a string for Xapian values.
    """
    if isinstance(value, (int, long)):
        value = xapian.sortable_serialise(value)
    return value
开发者ID:bleachyin,项目名称:xapian_weibo,代码行数:7,代码来源:xapian_backend.py


示例20: __call__

 def __call__(self, doc):
     # we want to return a sortable string which represents
     # the distance from Washington, DC to the middle of this
     # state.
     coords = map(float, doc.get_value(4).split(","))
     washington = (38.012, -77.037)
     return xapian.sortable_serialise(support.distance_between_coords(coords, washington))
开发者ID:xapian,项目名称:xapian-docsprint,代码行数:7,代码来源:search_sorting3.py



注:本文中的xapian.sortable_serialise函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python xapian.sortable_unserialise函数代码示例发布时间:2022-05-26
下一篇:
Python xapian.inmemory_open函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap