本文整理汇总了Python中xapian.sortable_serialise函数的典型用法代码示例。如果您正苦于以下问题:Python sortable_serialise函数的具体用法?Python sortable_serialise怎么用?Python sortable_serialise使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sortable_serialise函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: index_document
def index_document(self, document, properties):
document.add_value(_VALUE_TIMESTAMP,
xapian.sortable_serialise(float(properties['timestamp'])))
document.add_value(_VALUE_TITLE, properties.get('title', '').strip())
if 'filesize' in properties:
try:
document.add_value(_VALUE_FILESIZE,
xapian.sortable_serialise(int(properties['filesize'])))
except (ValueError, TypeError):
logging.debug('Invalid value for filesize property: %s',
properties['filesize'])
if 'creation_time' in properties:
try:
document.add_value(
_VALUE_CREATION_TIME, xapian.sortable_serialise(
float(properties['creation_time'])))
except (ValueError, TypeError):
logging.debug('Invalid value for creation_time property: %s',
properties['creation_time'])
self.set_document(document)
properties = dict(properties)
self._index_known(document, properties)
self._index_unknown(document, properties)
开发者ID:ers-devs,项目名称:sugar-datastore,代码行数:25,代码来源:indexstore-with-ers.py
示例2: test_value_stats
def test_value_stats():
"""Simple test of being able to get value statistics.
"""
dbpath = "db_test_value_stats"
db = xapian.chert_open(dbpath, xapian.DB_CREATE_OR_OVERWRITE)
vals = (6, 9, 4.5, 4.4, 4.6, 2, 1, 4, 3, 0)
for id in range(10):
doc = xapian.Document()
doc.add_value(1, xapian.sortable_serialise(vals[id]))
db.add_document(doc)
expect(db.get_value_freq(0), 0)
expect(db.get_value_lower_bound(0), "")
expect(db.get_value_upper_bound(0), "")
expect(db.get_value_freq(1), 10)
expect(db.get_value_lower_bound(1), xapian.sortable_serialise(0))
expect(db.get_value_upper_bound(1), xapian.sortable_serialise(9))
expect(db.get_value_freq(2), 0)
expect(db.get_value_lower_bound(2), "")
expect(db.get_value_upper_bound(2), "")
db.close()
shutil.rmtree(dbpath)
开发者ID:wangeguo,项目名称:xapian,代码行数:25,代码来源:pythontest2.py
示例3: __call__
def __call__(self, begin, end):
"""
Construct a tuple for value range processing.
`begin` -- a string in the format '<field_name>:[low_range]'
If 'low_range' is omitted, assume the smallest possible value.
`end` -- a string in the the format '[high_range|*]'. If '*', assume
the highest possible value.
Return a tuple of three strings: (column, low, high)
"""
colon = begin.find(':')
field_name = begin[:colon]
begin = begin[colon + 1:len(begin)]
for field_dict in self.sb.schema:
if field_dict['field_name'] == field_name:
if not begin:
if field_dict['type'] == 'text':
begin = u'a' # TODO: A better way of getting a min text value?
elif field_dict['type'] == 'long' or field_dict['type'] == 'float':
begin = float('-inf')
elif field_dict['type'] == 'date' or field_dict['type'] == 'datetime':
begin = u'00010101000000'
elif end == '*':
if field_dict['type'] == 'text':
end = u'z' * 100 # TODO: A better way of getting a max text value?
elif field_dict['type'] == 'long' or field_dict['type'] == 'float':
end = float('inf')
elif field_dict['type'] == 'date' or field_dict['type'] == 'datetime':
end = u'99990101000000'
if field_dict['type'] == 'long' or field_dict['type'] == 'float':
begin = xapian.sortable_serialise(float(begin))
end = xapian.sortable_serialise(float(end))
return field_dict['column'], str(begin), str(end)
开发者ID:rob-b,项目名称:Grab,代码行数:34,代码来源:xapian_backend.py
示例4: __call__
def __call__(self, doc):
app = Application(self.db.get_appname(doc),
self.db.get_pkgname(doc))
stats = self.review_loader.get_review_stats(app)
import xapian
if stats:
return xapian.sortable_serialise(stats.dampened_rating)
return xapian.sortable_serialise(0)
开发者ID:Alberto-Beralix,项目名称:Beralix,代码行数:8,代码来源:database.py
示例5: get_msg_terms
def get_msg_terms(db=None,msg=None):
# This is pretty important: what data to be shown from the thing?
# Maybe should be parsed into json already? Ot serialise a hash somehow?
doc_data = msg.content
doc_values = []
doc_terms = []
stemmer = xapian.Stem("finnish")
for match in re.finditer(r'\b[a-zA-ZäöåüÄÖÅÜÉÈÁÀéèáà]{3,35}\b', to_lower_case(msg.content)):
word = match.group(0)
if is_stopword(word):
continue
term = stemmer(word)
doc_terms.append(term)
for term in ["_commented-by_"+msg.author]:
doc_terms.append(term)
if msg.date:
doc_terms.append("_c_"+str(msg.date)[:7])
official_terms = ["_o_"+msg.id]
if msg.places:
place = db.place.getnode(msg.places[0])
for term in get_place_terms(place = place):
doc_terms.append (term)
for match in re.finditer(r'\b[a-zA-ZäöåüÄÖÅÜÉÈÁÀéèáà]{3,35}\b', to_lower_case(place.address)):
word = match.group(0)
if is_stopword(word):
continue
term = stemmer(word)
#print "adding term "+term
doc_terms.append(term)
doc_data += " " + place.address
for term in get_latlng_range(place.lat):
doc_terms.append("_glatrange_"+term)
for term in get_latlng_range(place.lng):
doc_terms.append("_glngrange_"+term)
doc_values.append({"field": XAPIAN_X_COORD_FIELD, "value":xapian.sortable_serialise(float(place.lat))})
doc_values.append({"field": XAPIAN_Y_COORD_FIELD, "value":xapian.sortable_serialise(float(place.lng))})
if msg.date:
doc_values.append({"field": XAPIAN_CREATED_FIELD, "value": xapian.sortable_serialise( float( msg.date.serialise() ) ) })
return {"doc_data":doc_data,
"doc_terms":doc_terms,
"doc_values":doc_values }
开发者ID:fillarikanava,项目名称:old-fillarikanava,代码行数:58,代码来源:indexer.py
示例6: index
def index(datapath, dbpath):
# Create or open the database we're going to be writing to.
db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN)
# Set up a TermGenerator that we'll use in indexing.
termgenerator = xapian.TermGenerator()
termgenerator.set_stemmer(xapian.Stem("en"))
for fields in parse_states(datapath):
# 'fields' is a dictionary mapping from field name to value.
# Pick out the fields we're going to index.
name = fields.get('name', u'')
description = fields.get('description', u'')
motto = fields.get('motto', u'')
admitted = fields.get('admitted', None)
population = fields.get('population', None)
order = fields.get('order', u'')
# We make a document and tell the term generator to use this.
doc = xapian.Document()
termgenerator.set_document(doc)
# index each field with a suitable prefix
termgenerator.index_text(name, 1, 'S')
termgenerator.index_text(description, 1, 'XD')
termgenerator.index_text(motto, 1, 'XM')
# Index fields without prefixes for general search.
termgenerator.index_text(name)
termgenerator.increase_termpos()
termgenerator.index_text(description)
termgenerator.increase_termpos()
termgenerator.index_text(motto)
# Add document values.
if admitted is not None:
doc.add_value(1, xapian.sortable_serialise(int(admitted[:4])))
doc.add_value(2, admitted) # YYYYMMDD
if population is not None:
doc.add_value(3, xapian.sortable_serialise(population))
### Start of example code.
midlat = fields['midlat']
midlon = fields['midlon']
if midlat and midlon:
doc.add_value(4, "%f,%f" % (midlat, midlon))
### End of example code.
# Store all the fields for display purposes.
doc.set_data(json.dumps(fields))
# We use the identifier to ensure each object ends up in the
# database only once no matter how many times we run the
# indexer.
idterm = u"Q" + order
doc.add_boolean_term(idterm)
db.replace_document(idterm, doc)
开发者ID:Gedimar,项目名称:xapian-erlang-docs,代码行数:56,代码来源:index_values_with_geo.py
示例7: make_value
def make_value(s, term):
"""Parse various string values and return suitable numeric
representations."""
if term == 'year':
# This is in a date string format due to serialization.
return xapian.sortable_serialise(int(s))
if term == 'mtime':
return xapian.sortable_serialise(time.mktime(time.strptime(s)))
if term == 'rating':
return xapian.sortable_serialise(float(s))
else:
return xapian.sortable_serialise(int(s))
开发者ID:albins,项目名称:music-tools,代码行数:12,代码来源:xapian_music.py
示例8: create_index
def create_index(filename,databasePath):
print "begin read",filename
if not os.path.exists(databasePath):
os.makedirs(databasePath)
database = xapian.WritableDatabase(databasePath, xapian.DB_CREATE_OR_OPEN)
stemmer=xapian.Stem('english')
rex=re.compile(r'[0-9]+|[a-zA-Z]+|[\x80-\xff3]{3}')
lines=open(filename).readlines()
processed=0
len_file=len(lines)
print filename,"read end"
time_begin=time.time()
for line in lines:
try:
line=line.encode('utf-8')
except:
continue
line_items=line.split('\t')
document = xapian.Document()
freq_sortable=xapian.sortable_serialise(float(line_items[3]))
click_sortable=xapian.sortable_serialise(float(line_items[4]))
document.add_value(FREQ,freq_sortable)
document.add_value(CLICK,click_sortable)
document.add_value(DATE,line_items[1])
document.set_data(line_items[0])
terms=rex.findall(line_items[0])
for term in terms:
if len(term) > MAX_TERM_LENGTH:
document.add_term(stemmer(term[:MAX_TERM_LENGTH]))
else:
document.add_term(stemmer(term))
database.add_document(document)
processed+=1
del line
del line_items
del document
del freq_sortable
del click_sortable
del terms
if processed%100000==0:
end=time.time()
speed=100000/float(end-time_begin)
print "="*40
print filename
print "speed:\t",speed
print "percent:\t%s %%" %(100.0*(processed/float(len_file)))
print "time remain:\t %s hours" %( (len_file-processed)/(speed*3600))
time_begin=time.time()
gc.collect()
os.system("rm -rf %s" % filename)
print filename,"end"
开发者ID:gwtale,项目名称:onebox-scripts,代码行数:53,代码来源:mt_generate_index.py
示例9: index
def index(datapath, dbpath):
# Create or open the database we're going to be writing to.
db = xapian.WritableDatabase(dbpath, xapian.DB_CREATE_OR_OPEN)
# Set up a TermGenerator that we'll use in indexing.
termgenerator = xapian.TermGenerator()
termgenerator.set_stemmer(xapian.Stem("en"))
for fields in parse_csv_file(datapath):
# 'fields' is a dictionary mapping from field name to value.
# Pick out the fields we're going to index.
description = fields.get('DESCRIPTION', u'')
title = fields.get('TITLE', u'')
identifier = fields.get('id_NUMBER', u'')
# We make a document and tell the term generator to use this.
doc = xapian.Document()
termgenerator.set_document(doc)
# Index each field with a suitable prefix.
termgenerator.index_text(title, 1, 'S')
termgenerator.index_text(description, 1, 'XD')
# Index fields without prefixes for general search.
termgenerator.index_text(title)
termgenerator.increase_termpos()
termgenerator.index_text(description)
# Store all the fields for display purposes.
doc.set_data(json.dumps(fields, encoding='utf8'))
### Start of example code.
# parse the two values we need
measurements = fields.get('MEASUREMENTS', u'')
if measurements != u'':
numbers = numbers_from_string(measurements)
if len(numbers) > 0:
doc.add_value(0, xapian.sortable_serialise(max(numbers)))
date_made = fields.get('DATE_MADE', u'')
years = numbers_from_string(date_made)
if len(years) > 0:
doc.add_value(1, xapian.sortable_serialise(years[0]))
### End of example code.
# We use the identifier to ensure each object ends up in the
# database only once no matter how many times we run the
# indexer.
idterm = u"Q" + identifier
doc.add_boolean_term(idterm)
db.replace_document(idterm, doc)
开发者ID:PhoenixZhao,项目名称:xapian-docsprint,代码行数:51,代码来源:index_ranges.py
示例10: _encode_simple_value
def _encode_simple_value(field_cls, value):
# Integers (FIXME this doesn't work with the big integers)
if issubclass(field_cls, Integer):
return sortable_serialise(value)
elif issubclass(field_cls, Decimal):
# FIXME: We convert decimal->float so we lost precision
return sortable_serialise(float(value))
# Datetimes: normalize to UTC, so searching works
if type(value) is datetime:
value = value.astimezone(fixed_offset(0))
# A common field or a new field
return field_cls.encode(value)
开发者ID:hforge,项目名称:itools,代码行数:14,代码来源:catalog.py
示例11: dict2doc
def dict2doc(y):
doc = xapian.Document()
indexer.set_document(doc)
url = y['url']
uid = urlid(url)
sid = uid[:8]
doc.add_boolean_term(P['id'] + uid)
# add the id and short id as unprefixed/stemmed terms to
# make it easier to select bookmarks from search results
for idterm in [uid, sid, 'Z' + uid, 'Z' + sid]:
doc.add_boolean_term(idterm)
doc.add_value(VALUE_URL, url)
# add hostname parts as site terms
hostname = urlparse(url).hostname
if hostname:
hs = hostname.split('.')
for i in xrange(len(hs)):
doc.add_boolean_term(P['site'] + '.'.join(hs[i:]))
archive_path = get_archive_path(uid)
if archive_path:
y['tags'].append('archived')
# remove duplicate tags, preserving order
y['tags'] = list(OrderedDict.fromkeys(y['tags']))
alltags = u'\x1f'.join(y['tags'])
doc.add_value(VALUE_TAGS, alltags)
for tag in y['tags']:
doc.add_boolean_term(P['tag'] + tag)
if 'title' in y:
doc.add_value(VALUE_TITLE, y['title'])
index_text(y['title'], 'title')
if 'notes' in y:
doc.set_data(y['notes'])
index_text(y['notes'], 'notes')
created = y.get('created', arrow.utcnow()).timestamp
doc.add_value(VALUE_CREATED, xapian.sortable_serialise(created))
if archive_path:
archived = y.get('archived', arrow.utcnow()).timestamp
doc.add_value(VALUE_ARCHIVED, xapian.sortable_serialise(archived))
index_archive(doc, archive_path)
return doc
开发者ID:ttaylordev,项目名称:z,代码行数:50,代码来源:jot.py
示例12: normalize_range
def normalize_range(begin, end):
""" 查询时,转换range 参数,主要是把 float/int 转换为 str 格式 """
if begin is not None:
if isinstance(begin, float):
begin = xapian.sortable_serialise(float(begin))
else:
begin = str(begin)
if end is not None:
if isinstance(end, float):
end = xapian.sortable_serialise(float(end))
else:
end = str(end)
return begin, end
开发者ID:everydo,项目名称:zapian,代码行数:15,代码来源:query.py
示例13: add_product
def add_product(self, product, database_path=None):
"""Adds product to repository.
product - Product to be added to database
database_path - Path of the database where product is added. Default: None
When repository has been created with many database paths then database_path must
be defined."""
# Set up a TermGenerator that we'll use in indexing.
if len(self._databases) > 1:
assert database_path != None, \
"With many databases you must identify the database where product is added"
termgenerator = xapian.TermGenerator()
termgenerator.set_stemmer(self._create_stem())
# We make a document and tell the term generator to use this.
doc = xapian.Document()
termgenerator.set_document(doc)
termgenerator.index_text(unicode(product.title))
termgenerator.index_text(unicode(product.description))
doc.set_data(unicode(json.dumps(product.__dict__)))
doc.add_value(0, xapian.sortable_serialise(float(product.price)))
idterm = "Q" + product.url
doc.add_boolean_term(idterm)
db = self._db
if database_path:
db = self._databases[database_path]
db.replace_document(idterm, doc)
开发者ID:mlackman,项目名称:productrepository,代码行数:30,代码来源:productrepository.py
示例14: convert
def convert(self, field_value):
"""
Generates index values (for sorting) for given field value and its content type
"""
if field_value is None:
return None
content_type = self._get_content_type(field_value)
value = field_value
if self._is_float_or_interger(content_type):
value = xapian.sortable_serialise(field_value)
elif isinstance(content_type, (models.BooleanField, bool)):
# Boolean fields are stored as 't' or 'f'
value = field_value and 't' or 'f'
elif isinstance(content_type, (models.DateTimeField, datetime.datetime)):
# DateTime fields are stored as %Y%m%d%H%M%S (better sorting)
# value = field_value.strftime('%Y%m%d%H%M%S')
value = '%d%02d%02d%02d%02d%02d' % ( field_value.year,
field_value.month,
field_value.day,
field_value.hour,
field_value.minute,
field_value.second )
return smart_str(value)
开发者ID:mpmendespt,项目名称:dre,代码行数:27,代码来源:indexer.py
示例15: index
def index(contacts, database, prefixes):
c = config(contacts)
db = xapian.WritableDatabase(database, xapian.DB_CREATE_OR_OPEN)
p = set()
for person, data in c:
doc = xapian.Document()
termgenerator.set_document(doc)
termgenerator.index_text(person, 1, u'id')
for prefix, content in data:
if prefix[0] in digits[:5]:
doc.add_value(int(prefix[0]), xapian.sortable_serialise(int(content)))
elif prefix[0] in digits[5:]:
doc.add_value(int(prefix[0]), content)
else:
termgenerator.index_text(content, 1, u'X' + prefix)
termgenerator.index_text(content)
termgenerator.increase_termpos()
p.add(prefix)
doc.add_boolean_term(u'Q' + person)
doc.set_data(person)
db.replace_document(u'Q' + person, doc)
with open(prefixes, 'wb') as fp:
json.dump(list(p), fp)
开发者ID:tlevine,项目名称:cbuh,代码行数:28,代码来源:index.py
示例16: _add_value
def _add_value(doc, slotnum, value):
if isinstance(value, float):
value = xapian.sortable_serialise(float(value))
doc.add_value(int(slotnum), value)
else:
doc.add_value(int(slotnum), str(value))
开发者ID:everydo,项目名称:zapian,代码行数:7,代码来源:api.py
示例17: _encode_simple_value
def _encode_simple_value(field_cls, value):
# Overload the Integer type
# XXX warning: this doesn't work with the big integers!
if issubclass(field_cls, Integer):
return sortable_serialise(value)
# A common field or a new field
return field_cls.encode(value)
开发者ID:kennym,项目名称:itools,代码行数:7,代码来源:utils.py
示例18: add
def add(self, msg):
frombits = msg.from_addr() + ' '
if msg.from_name():
frombits += msg.from_name()
with msg.open() as fp:
mail = email.message_from_file(fp)
bodybits = self._get_body(mail)
if bodybits is None:
return
bodybits += u' ' + frombits
bodybits += u' ' + (msg.subject() or u'')
doc = xapian.Document()
self.term_gen.set_document(doc)
sortable_ts = xapian.sortable_serialise(mua.py.unix_dt(msg.date()))
doc.add_value(SLOT_DATE, sortable_ts)
doc.add_value(SLOT_STRONG_ID, mua.py.unb64(msg.strong_id()))
self.term_gen.index_text(frombits, 1, PREFIX_FROM)
self.term_gen.index_text(bodybits, 1, PREFIX_BODY)
self.term_gen.index_text(msg.subject() or u'', 1, PREFIX_SUBJECT)
return self.db.add_document(doc)
开发者ID:pombredanne,项目名称:mua,代码行数:26,代码来源:indexer.py
示例19: _marshal_value
def _marshal_value(value):
"""
Private utility method that converts Python values to a string for Xapian values.
"""
if isinstance(value, (int, long)):
value = xapian.sortable_serialise(value)
return value
开发者ID:bleachyin,项目名称:xapian_weibo,代码行数:7,代码来源:xapian_backend.py
示例20: __call__
def __call__(self, doc):
# we want to return a sortable string which represents
# the distance from Washington, DC to the middle of this
# state.
coords = map(float, doc.get_value(4).split(","))
washington = (38.012, -77.037)
return xapian.sortable_serialise(support.distance_between_coords(coords, washington))
开发者ID:xapian,项目名称:xapian-docsprint,代码行数:7,代码来源:search_sorting3.py
注:本文中的xapian.sortable_serialise函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论