本文整理汇总了Python中whoosh.util.byte_to_length函数的典型用法代码示例。如果您正苦于以下问题:Python byte_to_length函数的具体用法?Python byte_to_length怎么用?Python byte_to_length使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了byte_to_length函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: _read_header
def _read_header(self, dbfile, doccount):
first = dbfile.read(4) # Magic
assert first == self.magic
version = dbfile.read_int() # Version number
assert version == 1
dc = dbfile.read_uint() # Number of documents saved
if doccount is None:
doccount = dc
assert dc == doccount, "read=%s argument=%s" % (dc, doccount)
self._count = doccount
fieldcount = dbfile.read_ushort() # Number of fields
# Read per-field info
for i in xrange(fieldcount):
fieldname = dbfile.read_string().decode('utf-8')
self.totals[fieldname] = dbfile.read_long()
self.minlens[fieldname] = byte_to_length(dbfile.read_byte())
self.maxlens[fieldname] = byte_to_length(dbfile.read_byte())
self.starts[fieldname] = i * doccount
# Add header length to per-field offsets
eoh = dbfile.tell() # End of header
for fieldname in self.starts:
self.starts[fieldname] += eoh
开发者ID:BenSchwab,项目名称:portfolio,代码行数:25,代码来源:whoosh2.py
示例2: from_file
def from_file(file, stringids=False):
here = file.tell()
encoded_header = file.read(BlockInfo._struct.size)
header = BlockInfo._struct.unpack(encoded_header)
(flags, _, _, nextoffset, idslen, weightslen, postcount, maxweight,
maxwol, _, minlength) = header
if not flags:
nextoffset = unpack_long(encoded_header[:8])
else:
nextoffset = here + nextoffset
assert postcount > 0
minlength = byte_to_length(minlength)
if stringids:
maxid = utf8decode(file.read_string())[0]
else:
maxid = file.read_uint()
dataoffset = file.tell()
return BlockInfo(flags=flags, nextoffset=nextoffset,
postcount=postcount, maxweight=maxweight,
maxwol=maxwol, maxid=maxid, minlength=minlength,
dataoffset=dataoffset, idslen=idslen,
weightslen=weightslen)
开发者ID:gbdu,项目名称:wikidpad,代码行数:27,代码来源:filepostings.py
示例3: doc_field_length
def doc_field_length(self, docnum, fieldname, default=0):
try:
arry = self.lengths[fieldname]
except KeyError:
return default
if docnum >= len(arry):
return default
return byte_to_length(arry[docnum])
开发者ID:BenSchwab,项目名称:portfolio,代码行数:8,代码来源:whoosh2.py
示例4: load_old_lengths
def load_old_lengths(obj, dbfile, doccount):
fieldcount = dbfile.read_ushort() # Number of fields
for _ in xrange(fieldcount):
fieldname = dbfile.read_string().decode("utf-8")
obj.lengths[fieldname] = dbfile.read_array("B", doccount)
# Old format didn't store totals, so fake it by adding up the codes
obj.totals[fieldname] = sum(byte_to_length(b) for b
in obj.lengths[fieldname])
dbfile.close()
开发者ID:adamhorner,项目名称:Yaki,代码行数:9,代码来源:legacy.py
示例5: from_file
def from_file(cls, postfile, postingsize, stringids=False):
start = postfile.tell()
block = cls(postingsize, stringids=stringids)
block.postfile = postfile
header = cls._struct.unpack(postfile.read(cls._struct.size))
block.nextoffset = start + header[0]
block.cmp = header[1]
block.count = header[2]
block.idcode = header[3]
block.idslen = header[5]
block.wtslen = header[6]
block.maxweight = header[7]
block.maxlength = byte_to_length(header[11])
block.minlength = byte_to_length(header[12])
block.maxid = load(postfile) if stringids else postfile.read_uint()
block.dataoffset = postfile.tell()
return block
开发者ID:BenSchwab,项目名称:portfolio,代码行数:18,代码来源:legacy.py
示例6: test_block
def test_block():
st = RamStorage()
f = st.create_file("postfile")
b = current(f, 0)
b.append(0, 1.0, '', 1)
b.append(1, 2.0, '', 2)
b.append(2, 12.0, '', 6)
b.append(5, 6.5, '', 420)
assert b
assert_equal(len(b), 4)
assert_equal(list(b.ids), [0, 1, 2, 5])
assert_equal(list(b.weights), [1.0, 2.0, 12.0, 6.5])
assert_equal(b.values, None)
assert_equal(b.min_length(), 1)
assert_equal(b.max_length(), byte_to_length(length_to_byte(420)))
assert_equal(b.max_weight(), 12.0)
assert_equal(b.max_wol(), 2.0)
ti = FileTermInfo()
ti.add_block(b)
assert_equal(ti.weight(), 21.5)
assert_equal(ti.doc_frequency(), 4)
assert_equal(ti.min_length(), 1)
assert_equal(ti.max_length(), byte_to_length(length_to_byte(420)))
assert_equal(ti.max_weight(), 12.0)
assert_equal(ti.max_wol(), 2.0)
b.write(compression=3)
f.close()
f = st.open_file("postfile")
bb = current.from_file(f, 0)
bb.read_ids()
assert_equal(list(bb.ids), [0, 1, 2, 5])
bb.read_weights()
assert_equal(list(bb.weights), [1.0, 2.0, 12.0, 6.5])
bb.read_values()
assert_equal(b.values, None)
assert_equal(bb.min_length(), 1)
assert_equal(bb.max_length(), byte_to_length(length_to_byte(420)))
assert_equal(bb.max_weight(), 12.0)
assert_equal(bb.max_wol(), 2.0)
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:44,代码来源:test_quality.py
示例7: from_string
def from_string(cls, s):
assert isinstance(s, bytes_type)
if isinstance(s, string_type):
hbyte = ord(s[0]) # Python 2.x - str
else:
hbyte = s[0] # Python 3 - bytes
if hbyte < 2:
st = cls.struct
# Weight, Doc freq, min len, max len, max w, unused, min ID, max ID
w, df, ml, xl, xw, _, mid, xid = st.unpack(s[1:st.size + 1])
mid = None if mid == NO_ID else mid
xid = None if xid == NO_ID else xid
# Postings
pstr = s[st.size + 1:]
if hbyte == 0:
p = unpack_long(pstr)[0]
else:
p = loads(pstr + b("."))
else:
# Old format was encoded as a variable length pickled tuple
v = loads(s + b("."))
if len(v) == 1:
w = df = 1
p = v[0]
elif len(v) == 2:
w = df = v[1]
p = v[0]
else:
w, p, df = v
# Fake values for stats which weren't stored before
ml = 1
xl = 255
xw = 999999999
mid = -1
xid = -1
ml = byte_to_length(ml)
xl = byte_to_length(xl)
obj = cls(w, df, ml, xl, xw, mid, xid)
obj.postings = p
return obj
开发者ID:adamhorner,项目名称:Yaki,代码行数:43,代码来源:base.py
示例8: _minmax
def _minmax(self, fieldname, op, cache):
if fieldname in cache:
return cache[fieldname]
else:
ls = self.lengths[fieldname]
if ls:
result = byte_to_length(op(ls))
else:
result = 0
cache[fieldname] = result
return result
开发者ID:BenSchwab,项目名称:portfolio,代码行数:11,代码来源:whoosh2.py
示例9: test_lowlevel_block_writing
def test_lowlevel_block_writing():
st = RamStorage()
f = st.create_file("postfile")
fpw = FilePostingWriter(f, blocklimit=4)
fmt = formats.Frequency()
fpw.start(fmt)
fpw.write(0, 1.0, fmt.encode(1.0), 1)
fpw.write(1, 2.0, fmt.encode(2.0), 2)
fpw.write(2, 12.0, fmt.encode(12.0), 6)
fpw.write(5, 6.5, fmt.encode(6.5), 420)
fpw.write(11, 1.5, fmt.encode(1.5), 1)
fpw.write(12, 2.5, fmt.encode(2.5), 2)
fpw.write(26, 100.5, fmt.encode(100.5), 21)
fpw.write(50, 8.0, fmt.encode(8.0), 1020)
ti = fpw.finish()
assert_equal(ti.weight(), 134.0)
assert_equal(ti.doc_frequency(), 8)
assert_equal(ti.min_length(), 1)
assert_equal(ti.max_length(), byte_to_length(length_to_byte(1020)))
assert_equal(ti.max_weight(), 100.5)
assert_equal(ti.max_wol(), 100.5 / byte_to_length(length_to_byte(21)))
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:23,代码来源:test_quality.py
示例10: from_file
def from_file(cls, postfile, postingsize, stringids=False):
block = cls(postingsize, stringids=stringids)
block.postfile = postfile
delta = postfile.read_uint()
block.nextoffset = postfile.tell() + delta
info = postfile.read_pickle()
block.dataoffset = postfile.tell()
for key, value in zip(cls.infokeys, info):
if key in ("minlength", "maxlength"):
value = byte_to_length(value)
setattr(block, key, value)
return block
开发者ID:BenSchwab,项目名称:portfolio,代码行数:15,代码来源:whoosh2.py
示例11: test_many_lengths
def test_many_lengths():
domain = u("alfa bravo charlie delta echo foxtrot golf hotel").split()
schema = fields.Schema(text=fields.TEXT)
ix = RamStorage().create_index(schema)
w = ix.writer()
for i, word in enumerate(domain):
length = (i + 1) ** 6
w.add_document(text=" ".join(word for _ in xrange(length)))
w.commit()
s = ix.searcher()
for i, word in enumerate(domain):
target = byte_to_length(length_to_byte((i + 1) ** 6))
ti = s.term_info("text", word)
assert_equal(ti.min_length(), target)
assert_equal(ti.max_length(), target)
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:16,代码来源:test_indexing.py
示例12: test_lengths
def test_lengths():
s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
f2=fields.KEYWORD(stored=True, scorable=True))
with TempIndex(s, "testlengths") as ix:
w = ix.writer()
tokens = u("ABCDEFG")
from itertools import cycle, islice
lengths = [10, 20, 2, 102, 45, 3, 420, 2]
for length in lengths:
w.add_document(f2=u(" ").join(islice(cycle(tokens), length)))
w.commit()
with ix.reader() as dr:
ls1 = [dr.doc_field_length(i, "f1") for i in xrange(0, len(lengths))]
assert_equal(ls1, [0] * len(lengths))
ls2 = [dr.doc_field_length(i, "f2") for i in xrange(0, len(lengths))]
assert_equal(ls2, [byte_to_length(length_to_byte(l))for l in lengths])
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:17,代码来源:test_indexing.py
示例13: add_field_length
def add_field_length(self, docnum, fieldname, length):
self._fieldlength_totals[fieldname] += length
bytelength = length_to_byte(length)
normalized = byte_to_length(bytelength)
if normalized < self._fieldlength_mins.get(fieldname, 999999999):
self._fieldlength_mins[fieldname] = normalized
if normalized > self._fieldlength_maxes.get(fieldname, 0):
self._fieldlength_maxes[fieldname] = normalized
if fieldname not in self.length_arrays:
self.length_arrays[fieldname] = array("B")
arry = self.length_arrays[fieldname]
if len(arry) <= docnum:
for _ in xrange(docnum - len(arry) + 1):
arry.append(0)
arry[docnum] = bytelength
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:20,代码来源:pools.py
示例14: append
def append(self, id, weight, valuestring, dfl):
self.ids.append(id)
self.weights.append(weight)
if weight > self._maxweight:
self._maxweight = weight
if valuestring:
if self.values is None:
self.values = []
self.values.append(valuestring)
if dfl:
length_byte = length_to_byte(dfl)
if self._minlength is None or length_byte < self._minlength:
self._minlength = length_byte
if dfl > self._maxlength:
self._maxlength = length_byte
wol = weight / byte_to_length(length_byte)
if wol > self._maxwol:
self._maxwol = wol
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:20,代码来源:postblocks.py
示例15: from_file
def from_file(cls, postfile, stringids=False):
start = postfile.tell()
block = cls(postfile, stringids=stringids)
header = cls._struct.unpack(postfile.read(cls._struct.size))
block.nextoffset = start + header[0]
block.compression = header[1]
block.postcount = header[2]
block.typecode = header[3]
block.idslen = header[5]
block.weightslen = header[6]
block.maxweight = header[7]
block.maxwol = header[8]
block.minlen = byte_to_length(header[10])
if stringids:
block.maxid = load(postfile)
else:
block.maxid = postfile.read_uint()
block.dataoffset = postfile.tell()
return block
开发者ID:20after4,项目名称:Yaki,代码行数:22,代码来源:postblocks.py
示例16: get
def get(self, docnum, fieldname, default=0):
lengths = self.lengths
if fieldname not in lengths:
return default
byte = lengths[fieldname][docnum] or default
return byte_to_length(byte)
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:6,代码来源:filetables.py
示例17: test_length_byte
def test_length_byte():
source = list(range(11))
xform = [length_to_byte(n) for n in source]
result = [byte_to_length(n) for n in xform]
assert_equal(source, result)
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:5,代码来源:test_misc.py
示例18: read_min_and_max_length
def read_min_and_max_length(cls, dbfile, datapos):
lenpos = datapos + 1 + _FLOAT_SIZE + _INT_SIZE
ml = byte_to_length(dbfile.get_byte(lenpos))
xl = byte_to_length(dbfile.get_byte(lenpos + 1))
return ml, xl
开发者ID:adamhorner,项目名称:Yaki,代码行数:5,代码来源:base.py
示例19: max_length
def max_length(self):
return byte_to_length(self._maxlength)
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:2,代码来源:filetables.py
示例20: min_length
def min_length(self):
return byte_to_length(self._minlength)
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:2,代码来源:filetables.py
注:本文中的whoosh.util.byte_to_length函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论