本文整理汇总了Python中whoosh.compat.dumps函数的典型用法代码示例。如果您正苦于以下问题:Python dumps函数的具体用法?Python dumps怎么用?Python dumps使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了dumps函数的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_pickle_schema
def test_pickle_schema():
from whoosh import analysis
from whoosh.support.charset import accent_map
from whoosh.compat import dumps
freetext_analyzer = (
analysis.StemmingAnalyzer() |
analysis.CharsetFilter(accent_map)
)
schema = fields.Schema(
path=fields.ID(stored=True, unique=True),
file_mtime=fields.DATETIME(stored=True),
name=fields.TEXT(stored=False, field_boost=2.0),
description=fields.TEXT(stored=False, field_boost=1.5,
analyzer=freetext_analyzer),
content=fields.TEXT(analyzer=freetext_analyzer)
)
# Try to make some sentences that will require stemming
docs = [
u"The rain in spain falls mainly in the plain",
u"Plainly sitting on the plain",
u"Imagine a greatly improved sentence here"
]
with TempIndex(schema) as ix:
with ix.writer() as w:
for doc in docs:
w.add_document(description=doc, content=doc)
assert dumps(schema, 2)
with ix.reader() as r:
assert dumps(r.schema, 2)
开发者ID:rsirres,项目名称:Whoosh,代码行数:35,代码来源:test_fields.py
示例2: test_charset_pickeability
def test_charset_pickeability():
from whoosh.support import charset
charmap = charset.charset_table_to_dict(charset.default_charset)
ana = analysis.StandardAnalyzer() | analysis.CharsetFilter(charmap)
_ = dumps(ana, -1)
ana = analysis.CharsetTokenizer(charmap)
_ = dumps(ana, -1)
开发者ID:gsadikin,项目名称:whoosh,代码行数:8,代码来源:test_analysis.py
示例3: _write_block
def _write_block(self, last=False):
# Write the buffered block to the postings file
# If this is the first block, write a small header first
if not self._blockcount:
self._postfile.write(WHOOSH3_HEADER_MAGIC)
# Add this block's statistics to the terminfo object, which tracks the
# overall statistics for all term postings
self._terminfo.add_block(self)
# Minify the IDs, weights, and values, and put them in a tuple
data = (self._mini_ids(), self._mini_weights(), self._mini_values())
# Pickle the tuple
databytes = dumps(data)
# If the pickle is less than 20 bytes, don't bother compressing
if len(databytes) < 20:
comp = 0
# Compress the pickle (if self._compression > 0)
comp = self._compression
if comp:
databytes = zlib.compress(databytes, comp)
# Make a tuple of block info. The posting reader can check this info
# and decide whether to skip the block without having to decompress the
# full block data
#
# - Number of postings in block
# - Last ID in block
# - Maximum weight in block
# - Compression level
# - Minimum length byte
# - Maximum length byte
ids = self._ids
infobytes = dumps((len(ids), ids[-1], self._maxweight, comp,
length_to_byte(self._minlength),
length_to_byte(self._maxlength),
))
# Write block length
postfile = self._postfile
blocklength = len(infobytes) + len(databytes)
if last:
# If this is the last block, use a negative number
blocklength *= -1
postfile.write_int(blocklength)
# Write block info
postfile.write(infobytes)
# Write block data
postfile.write(databytes)
self._blockcount += 1
# Reset block buffer
self._new_block()
开发者ID:32footsteps,项目名称:SpecialCollectionsProject,代码行数:54,代码来源:whoosh3.py
示例4: to_string
def to_string(self):
# Encode the lengths as 0-255 values
ml = 0 if self._minlength is None else length_to_byte(self._minlength)
xl = length_to_byte(self._maxlength)
# Convert None values to the out-of-band NO_ID constant so they can be
# stored as unsigned ints
mid = NO_ID if self._minid is None else self._minid
xid = NO_ID if self._maxid is None else self._maxid
# Pack the term info into bytes
st = self.struct.pack(self._weight, self._df, ml, xl, self._maxweight,
0, mid, xid)
if isinstance(self.postings, tuple):
# Postings are inlined - dump them using the pickle protocol
isinlined = 1
st += dumps(self.postings, -1)[2:-1]
else:
# Append postings pointer as long to end of term info bytes
isinlined = 0
# It's possible for a term info to not have a pointer to postings
# on disk, in which case postings will be None. Convert a None
# value to -1 so it can be stored as a long.
p = -1 if self.postings is None else self.postings
st += pack_long(p)
# Prepend byte indicating whether the postings are inlined to the term
# info bytes
return pack("B", isinlined) + st
开发者ID:adamhorner,项目名称:Yaki,代码行数:29,代码来源:base.py
示例5: encode
def encode(self, positions):
codes = []
base = 0
for pos in positions:
codes.append(pos - base)
base = pos
return pack_uint(len(codes)) + dumps(codes, -1)[2:-1]
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:7,代码来源:formats.py
示例6: word_values
def word_values(self, value, analyzer, **kwargs):
fb = self.field_boost
seen = defaultdict(list)
kwargs["positions"] = True
kwargs["chars"] = True
kwargs["boosts"] = True
for t in tokens(value, analyzer, kwargs):
seen[t.text].append((t.pos, t.startchar, t.endchar, t.boost))
for w, poses in iteritems(seen):
# posns_chars_boosts = [(pos, startchar, endchar, boost), ...]
codes = []
posbase = 0
charbase = 0
summedboost = 0
for pos, startchar, endchar, boost in poses:
codes.append((pos - posbase, startchar - charbase,
endchar - startchar, boost))
posbase = pos
charbase = endchar
summedboost += boost
value = (pack_uint(len(poses)) + pack_float(summedboost * fb)
+ dumps(codes, -1)[2:-1])
yield (w, len(poses), summedboost * fb, value)
开发者ID:adamhorner,项目名称:Yaki,代码行数:27,代码来源:formats.py
示例7: encode
def encode(self, poslist):
deltas = []
base = 0
for pos in poslist:
deltas.append(pos - base)
base = pos
return pack_uint(len(deltas)) + dumps(deltas, -1)[2:-1]
开发者ID:BenSchwab,项目名称:portfolio,代码行数:7,代码来源:formats.py
示例8: minimize_values
def minimize_values(postingsize, values, compression=0):
if postingsize < 0:
string = dumps(values, -1)[2:]
elif postingsize == 0:
string = b('')
else:
string = b('').join(values)
if string and compression:
string = compress(string, compression)
return string
开发者ID:adamhorner,项目名称:Yaki,代码行数:10,代码来源:base.py
示例9: encode
def encode(self, poses):
fb = self.field_boost
# posns_chars_boosts = [(pos, startchar, endchar, boost), ...]
codes = []
posbase = 0
charbase = 0
summedboost = 0
for pos, startchar, endchar, boost in poses:
codes.append((pos - posbase, startchar - charbase,
endchar - startchar, boost))
posbase = pos
charbase = endchar
summedboost += boost
return ((pack_uint(len(poses)) + pack_float(summedboost * fb)
+ dumps(codes, 2)), summedboost)
开发者ID:Apophus,项目名称:microblog,代码行数:16,代码来源:formats.py
示例10: append
def append(self, values):
f = self.dbfile
name_map = self.name_map
vlist = [None] * len(name_map)
for k, v in iteritems(values):
if k in name_map:
vlist[name_map[k]] = v
else:
# For dynamic stored fields, put them at the end of the list
# as a tuple of (fieldname, value)
vlist.append((k, v))
v = dumps(vlist, -1)[2:-1]
self.length += 1
self.directory.append(pack_stored_pointer(f.tell(), len(v)))
f.write(v)
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:18,代码来源:filetables.py
示例11: add
def add(self, vdict):
f = self.dbfile
names = self.names
name_map = self.name_map
vlist = [None] * len(names)
for k, v in iteritems(vdict):
if k in name_map:
vlist[name_map[k]] = v
else:
name_map[k] = len(names)
names.append(k)
vlist.append(v)
vstring = dumps(tuple(vlist), -1)[2:-1]
self.length += 1
self.directory.append(pack_stored_pointer(f.tell(), len(vstring)))
f.write(vstring)
开发者ID:BenSchwab,项目名称:portfolio,代码行数:18,代码来源:whoosh2.py
示例12: to_file
def to_file(self, postfile, compression=3):
ids = self.ids
idcode, idstring = minimize_ids(ids, self.stringids, compression)
wtstring = minimize_weights(self.weights, compression)
vstring = minimize_values(self.postingsize, self.values, compression)
info = (len(ids), ids[-1], self.maxweight,
length_to_byte(self.minlength), length_to_byte(self.maxlength),
idcode, compression, len(idstring), len(wtstring))
infostring = dumps(info, -1)
# Offset to next block
postfile.write_uint(len(infostring) + len(idstring) + len(wtstring)
+ len(vstring))
# Block contents
postfile.write(infostring)
postfile.write(idstring)
postfile.write(wtstring)
postfile.write(vstring)
开发者ID:BenSchwab,项目名称:portfolio,代码行数:19,代码来源:whoosh2.py
示例13: minimize_ids
def minimize_ids(arry, stringids, compression=0):
amax = arry[-1]
if stringids:
typecode = ''
string = dumps(arry)
else:
code = arry.typecode
if amax <= 255:
typecode = "B"
elif amax <= 65535:
typecode = "H"
if typecode != code:
arry = array(typecode, iter(arry))
if not IS_LITTLE:
arry.byteswap()
string = arry.tostring()
if compression:
string = compress(string, compression)
return (typecode, string)
开发者ID:adamhorner,项目名称:Yaki,代码行数:20,代码来源:base.py
示例14: test_pickleability
def test_pickleability():
# Ignore base classes
ignore = (columns.Column, columns.WrappedColumn, columns.ListColumn)
# Required arguments
init_args = {"ClampedNumericColumn": (columns.NumericColumn("B"),),
"FixedBytesColumn": (5,),
"FixedBytesListColumn": (5,),
"NumericColumn": ("i",),
"PickleColumn": (columns.VarBytesColumn(),),
"StructColumn": ("=if", (0, 0.0)),
}
coltypes = [c for _, c in inspect.getmembers(columns, inspect.isclass)
if issubclass(c, columns.Column) and not c in ignore]
for coltype in coltypes:
args = init_args.get(coltype.__name__, ())
try:
inst = coltype(*args)
except TypeError:
e = sys.exc_info()[1]
raise TypeError("Error instantiating %r: %s" % (coltype, e))
_ = loads(dumps(inst, -1))
开发者ID:pombredanne,项目名称:whoosh-clone,代码行数:23,代码来源:test_columns.py
示例15: to_bytes
def to_bytes(self):
isinlined = self.is_inlined()
# Encode the lengths as 0-255 values
minlength = (0 if self._minlength is None
else length_to_byte(self._minlength))
maxlength = length_to_byte(self._maxlength)
# Convert None values to the out-of-band NO_ID constant so they can be
# stored as unsigned ints
minid = 0xffffffff if self._minid is None else self._minid
maxid = 0xffffffff if self._maxid is None else self._maxid
# Pack the term info into bytes
st = self._struct.pack(isinlined, self._weight, self._df,
minlength, maxlength, self._maxweight,
minid, maxid)
if isinlined:
# Postings are inlined - dump them using the pickle protocol
postbytes = dumps(self._inlined, -1)
else:
postbytes = pack_long(self._offset) + pack_int(self._length)
st += postbytes
return st
开发者ID:32footsteps,项目名称:SpecialCollectionsProject,代码行数:24,代码来源:whoosh3.py
示例16: add_field
def add_field(self, fieldname, fieldobj, value, length):
if value is not None:
value = dumps(value, -1)
self._print_line(2, "DOCFIELD", fn=fieldname, v=value, len=length)
开发者ID:32footsteps,项目名称:SpecialCollectionsProject,代码行数:4,代码来源:plaintext.py
示例17: add
def add(self, docnum, v):
if v is None:
v = emptybytes
else:
v = dumps(v, -1)
self._child.add(docnum, v)
开发者ID:32footsteps,项目名称:SpecialCollectionsProject,代码行数:6,代码来源:columns.py
示例18: test_la_pickleability
def test_la_pickleability():
ana = analysis.LanguageAnalyzer("en")
_ = dumps(ana, -1)
开发者ID:gsadikin,项目名称:whoosh,代码行数:3,代码来源:test_analysis.py
示例19: write
def write(self, compression=3):
postfile = self.postfile
stringids = self.stringids
ids = self.ids
weights = self.weights
values = self.values
postcount = len(ids)
if postcount <= 4 or not can_compress:
compression = 0
# Max ID
maxid = ids[-1]
if stringids:
maxid_string = dumps(maxid, -1)[2:]
else:
maxid_string = pack_uint(maxid)
# IDs
typecode = "I"
if stringids:
ids_string = dumps(ids, -1)[2:]
typecode = "s"
else:
if maxid <= 255:
typecode = "B"
elif maxid <= 65535:
typecode = "H"
if typecode != ids.typecode:
ids = array(typecode, iter(ids))
if not IS_LITTLE:
ids.byteswap()
ids_string = ids.tostring()
if compression:
ids_string = compress(ids_string, compression)
# Weights
if all(w == 1.0 for w in weights):
weights_string = b('')
else:
if not IS_LITTLE:
weights.byteswap()
weights_string = weights.tostring()
if weights_string and compression:
weights_string = compress(weights_string, compression)
# Values
postingsize = self.postingsize
if postingsize < 0:
values_string = dumps(values, -1)[2:]
elif postingsize == 0:
values_string = b('')
else:
values_string = b("").join(values)
if values_string and compression:
values_string = compress(values_string, compression)
# Header
flags = 1 if compression else 0
blocksize = sum((self._struct.size, len(maxid_string), len(ids_string),
len(weights_string), len(values_string)))
header = self._struct.pack(blocksize, flags, postcount,
typecode.encode('latin-1'), 0,
len(ids_string), len(weights_string),
self.max_weight(), self.max_wol(), 0, 0,
self._maxlength, self._minlength or 0)
postfile.write(header)
postfile.write(maxid_string)
postfile.write(ids_string)
postfile.write(weights_string)
postfile.write(values_string)
开发者ID:ChimmyTee,项目名称:oh-mainline,代码行数:72,代码来源:postblocks.py
注:本文中的whoosh.compat.dumps函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论