本文整理汇总了Java中org.apache.lucene.util.fst.Util类的典型用法代码示例。如果您正苦于以下问题:Java Util类的具体用法?Java Util怎么用?Java Util使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Util类属于org.apache.lucene.util.fst包,在下文中一共展示了Util类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: finishTerm
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
public void finishTerm(long defaultWeight) throws IOException {
ArrayUtil.timSort(surfaceFormsAndPayload, 0, count);
int deduplicator = 0;
analyzed.append((byte) 0);
analyzed.setLength(analyzed.length() + 1);
analyzed.grow(analyzed.length());
for (int i = 0; i < count; i++) {
analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++);
Util.toIntsRef(analyzed.get(), scratchInts);
SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i];
long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight;
builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload));
}
seenSurfaceForms.clear();
count = 0;
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:17,代码来源:XAnalyzingSuggester.java
示例2: build
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
/** Builds the NormalizeCharMap; call this once you
* are done calling {@link #add}. */
public NormalizeCharMap build() {
final FST<CharsRef> map;
try {
final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
final IntsRefBuilder scratch = new IntsRefBuilder();
for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
builder.add(Util.toUTF16(ent.getKey(), scratch),
new CharsRef(ent.getValue()));
}
map = builder.finish();
pendingPairs.clear();
} catch (IOException ioe) {
// Bogus FST IOExceptions!! (will never happen)
throw new RuntimeException(ioe);
}
return new NormalizeCharMap(map);
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:23,代码来源:NormalizeCharMap.java
示例3: parseConversions
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException {
Map<String,String> mappings = new TreeMap<>();
for (int i = 0; i < num; i++) {
String line = reader.readLine();
String parts[] = line.split("\\s+");
if (parts.length != 3) {
throw new ParseException("invalid syntax: " + line, reader.getLineNumber());
}
if (mappings.put(parts[1], parts[2]) != null) {
throw new IllegalStateException("duplicate mapping specified for: " + parts[1]);
}
}
Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
IntsRefBuilder scratchInts = new IntsRefBuilder();
for (Map.Entry<String,String> entry : mappings.entrySet()) {
Util.toUTF16(entry.getKey(), scratchInts);
builder.add(scratchInts.get(), new CharsRef(entry.getValue()));
}
return builder.finish();
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:25,代码来源:Dictionary.java
示例4: finishTerm
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
// write term meta data into fst
final BlockTermState state = postingsWriter.newTermState();
final FSTTermOutputs.TermData meta = new FSTTermOutputs.TermData();
meta.longs = new long[longsSize];
meta.bytes = null;
meta.docFreq = state.docFreq = stats.docFreq;
meta.totalTermFreq = state.totalTermFreq = stats.totalTermFreq;
postingsWriter.finishTerm(state);
postingsWriter.encodeTerm(meta.longs, metaWriter, fieldInfo, state, true);
final int bytesSize = (int)metaWriter.getFilePointer();
if (bytesSize > 0) {
meta.bytes = new byte[bytesSize];
metaWriter.writeTo(meta.bytes, 0);
metaWriter.reset();
}
builder.add(Util.toIntsRef(text, scratchTerm), meta);
numTerms++;
}
开发者ID:europeana,项目名称:search,代码行数:21,代码来源:FSTTermsWriter.java
示例5: writeFST
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
meta.writeVInt(field.number);
meta.writeByte(FST);
meta.writeLong(data.getFilePointer());
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs);
IntsRefBuilder scratch = new IntsRefBuilder();
long ord = 0;
for (BytesRef v : values) {
builder.add(Util.toIntsRef(v, scratch), ord);
ord++;
}
FST<Long> fst = builder.finish();
if (fst != null) {
fst.save(data);
}
meta.writeVLong(ord);
}
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:MemoryDocValuesConsumer.java
示例6: build
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
@Override
public void build(TermFreqIterator iterator) throws IOException {
BytesRef scratch = new BytesRef();
TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator);
IntsRef scratchInts = new IntsRef();
BytesRef previous = null;
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
while ((scratch = iter.next()) != null) {
long cost = iter.weight();
if (previous == null) {
previous = new BytesRef();
} else if (scratch.equals(previous)) {
continue; // for duplicate suggestions, the best weight is actually
// added
}
Util.toIntsRef(scratch, scratchInts);
builder.add(scratchInts, cost);
previous.copyBytes(scratch);
}
fst = builder.finish();
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:24,代码来源:WFSTCompletionLookup.java
示例7: build
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
/** Builds the NormalizeCharMap; call this once you
* are done calling {@link #add}. */
public NormalizeCharMap build() {
final FST<CharsRef> map;
try {
final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
final IntsRef scratch = new IntsRef();
for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
builder.add(Util.toUTF16(ent.getKey(), scratch),
new CharsRef(ent.getValue()));
}
map = builder.finish();
pendingPairs.clear();
} catch (IOException ioe) {
// Bogus FST IOExceptions!! (will never happen)
throw new RuntimeException(ioe);
}
return new NormalizeCharMap(map);
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:23,代码来源:NormalizeCharMap.java
示例8: writeFST
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
meta.writeVInt(field.number);
meta.writeByte(FST);
meta.writeLong(data.getFilePointer());
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
IntsRef scratch = new IntsRef();
long ord = 0;
for (BytesRef v : values) {
builder.add(Util.toIntsRef(v, scratch), ord);
ord++;
}
FST<Long> fst = builder.finish();
if (fst != null) {
fst.save(data);
}
meta.writeVLong(ord);
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:19,代码来源:Lucene42DocValuesConsumer.java
示例9: writeFST
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException {
meta.writeVInt(field.number);
meta.writeByte(FST);
meta.writeLong(data.getFilePointer());
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
IntsRef scratch = new IntsRef();
long ord = 0;
for (BytesRef v : values) {
builder.add(Util.toIntsRef(v, scratch), ord);
ord++;
}
FST<Long> fst = builder.finish();
if (fst != null) {
fst.save(data);
}
meta.writeVLong(ord);
}
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:19,代码来源:MemoryDocValuesConsumer.java
示例10: build
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
@Override
public void build(InputIterator iterator) throws IOException {
if (iterator.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads");
}
BytesRef scratch = new BytesRef();
InputIterator iter = new WFSTInputIterator(iterator);
IntsRef scratchInts = new IntsRef();
BytesRef previous = null;
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
while ((scratch = iter.next()) != null) {
long cost = iter.weight();
if (previous == null) {
previous = new BytesRef();
} else if (scratch.equals(previous)) {
continue; // for duplicate suggestions, the best weight is actually
// added
}
Util.toIntsRef(scratch, scratchInts);
builder.add(scratchInts, cost);
previous.copyBytes(scratch);
}
fst = builder.finish();
}
开发者ID:jimaguere,项目名称:Maskana-Gestor-de-Conocimiento,代码行数:27,代码来源:WFSTCompletionLookup.java
示例11: append
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException {
final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
while((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
}
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:11,代码来源:BlockTreeTermsWriter.java
示例12: seekExact
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
@Override
public void seekExact(long ord) throws IOException {
// TODO: would be better to make this simpler and faster.
// but we dont want to introduce a bug that corrupts our enum state!
bytesReader.setPosition(0);
fst.getFirstArc(firstArc);
IntsRef output = Util.getByOutput(fst, ord, bytesReader, firstArc, scratchArc, scratchInts);
BytesRefBuilder scratchBytes = new BytesRefBuilder();
scratchBytes.clear();
Util.toBytesRef(output, scratchBytes);
// TODO: we could do this lazily, better to try to push into FSTEnum though?
in.seekExact(scratchBytes.get());
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:14,代码来源:Lucene42DocValuesProducer.java
示例13: affixFST
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException {
IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs);
IntsRefBuilder scratch = new IntsRefBuilder();
for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) {
Util.toUTF32(entry.getKey(), scratch);
List<Integer> entries = entry.getValue();
IntsRef output = new IntsRef(entries.size());
for (Integer c : entries) {
output.ints[output.length++] = c;
}
builder.add(scratch.get(), output);
}
return builder.finish();
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:16,代码来源:Dictionary.java
示例14: incrementToken
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
@Override
public boolean incrementToken() throws IOException {
clearAttributes();
if (finiteStrings == null) {
Set<IntsRef> strings = toFiniteStrings.toFiniteStrings(input);
if (strings.size() > MAX_PATHS) {
throw new IllegalArgumentException("TokenStream expanded to " + strings.size() + " finite strings. Only <= " + MAX_PATHS
+ " finite strings are supported");
}
posInc = strings.size();
finiteStrings = strings.iterator();
}
if (finiteStrings.hasNext()) {
posAttr.setPositionIncrement(posInc);
/*
* this posInc encodes the number of paths that this surface form
* produced. Multi Fields have the same surface form and therefore sum up
*/
posInc = 0;
Util.toBytesRef(finiteStrings.next(), bytesAtt.builder()); // now we have UTF-8
if (charTermAttribute != null) {
charTermAttribute.setLength(0);
charTermAttribute.append(bytesAtt.toUTF16());
}
if (payload != null) {
payloadAttr.setPayload(this.payload);
}
return true;
}
return false;
}
开发者ID:baidu,项目名称:Elasticsearch,代码行数:34,代码来源:CompletionTokenStream.java
示例15: seekExact
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
@Override
public void seekExact(long ord) throws IOException {
// TODO: would be better to make this simpler and faster.
// but we dont want to introduce a bug that corrupts our enum state!
bytesReader.setPosition(0);
fst.getFirstArc(firstArc);
IntsRef output = Util.getByOutput(fst, ord, bytesReader, firstArc, scratchArc, scratchInts);
// TODO: we could do this lazily, better to try to push into FSTEnum though?
in.seekExact(Util.toBytesRef(output, new BytesRefBuilder()));
}
开发者ID:europeana,项目名称:search,代码行数:11,代码来源:MemoryDocValuesProducer.java
示例16: loadCeilFrame
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
/** Load frame for target arc(node) on fst, so that
* arc.label >= label and !fsa.reject(arc.label) */
Frame loadCeilFrame(int label, Frame top, Frame frame) throws IOException {
FST.Arc<FSTTermOutputs.TermData> arc = frame.fstArc;
arc = Util.readCeilArc(label, fst, top.fstArc, arc, fstReader);
if (arc == null) {
return null;
}
frame.fsaState = fsa.step(top.fsaState, arc.label);
//if (TEST) System.out.println(" loadCeil frame="+frame);
if (frame.fsaState == -1) {
return loadNextFrame(top, frame);
}
return frame;
}
开发者ID:europeana,项目名称:search,代码行数:16,代码来源:FSTTermsReader.java
示例17: finishTerm
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
if (numTerms > 0 && numTerms % SKIP_INTERVAL == 0) {
bufferSkip();
}
// write term meta data into fst
final long longs[] = new long[longsSize];
final long delta = stats.totalTermFreq - stats.docFreq;
if (stats.totalTermFreq > 0) {
if (delta == 0) {
statsOut.writeVInt(stats.docFreq<<1|1);
} else {
statsOut.writeVInt(stats.docFreq<<1|0);
statsOut.writeVLong(stats.totalTermFreq-stats.docFreq);
}
} else {
statsOut.writeVInt(stats.docFreq);
}
BlockTermState state = postingsWriter.newTermState();
state.docFreq = stats.docFreq;
state.totalTermFreq = stats.totalTermFreq;
postingsWriter.finishTerm(state);
postingsWriter.encodeTerm(longs, metaBytesOut, fieldInfo, state, true);
for (int i = 0; i < longsSize; i++) {
metaLongsOut.writeVLong(longs[i] - lastLongs[i]);
lastLongs[i] = longs[i];
}
metaLongsOut.writeVLong(metaBytesOut.getFilePointer() - lastMetaBytesFP);
builder.add(Util.toIntsRef(text, scratchTerm), numTerms);
numTerms++;
lastMetaBytesFP = metaBytesOut.getFilePointer();
}
开发者ID:europeana,项目名称:search,代码行数:35,代码来源:FSTOrdTermsWriter.java
示例18: loadCeilFrame
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
/** Load frame for target arc(node) on fst, so that
* arc.label >= label and !fsa.reject(arc.label) */
Frame loadCeilFrame(int label, Frame top, Frame frame) throws IOException {
FST.Arc<Long> arc = frame.arc;
arc = Util.readCeilArc(label, fst, top.arc, arc, fstReader);
if (arc == null) {
return null;
}
frame.state = fsa.step(top.state, arc.label);
//if (TEST) System.out.println(" loadCeil frame="+frame);
if (frame.state == -1) {
return loadNextFrame(top, frame);
}
return frame;
}
开发者ID:europeana,项目名称:search,代码行数:16,代码来源:FSTOrdTermsReader.java
示例19: append
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
private void append(Builder<Output> builder, FST<Output> subIndex, long termOrdOffset, IntsRefBuilder scratchIntsRef) throws IOException {
final BytesRefFSTEnum<Output> subIndexEnum = new BytesRefFSTEnum<>(subIndex);
BytesRefFSTEnum.InputOutput<Output> indexEnt;
while ((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
Output output = indexEnt.output;
long blockTermCount = output.endOrd - output.startOrd + 1;
Output newOutput = FST_OUTPUTS.newOutput(output.bytes, termOrdOffset+output.startOrd, output.endOrd-termOrdOffset);
//System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output + " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput=" + newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd));
builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput);
}
}
开发者ID:europeana,项目名称:search,代码行数:15,代码来源:OrdsBlockTreeTermsWriter.java
示例20: loadTermsIndex
import org.apache.lucene.util.fst.Util; //导入依赖的package包/类
private void loadTermsIndex() throws IOException {
if (fst == null) {
IndexInput clone = in.clone();
clone.seek(indexStart);
fst = new FST<>(clone, fstOutputs);
clone.close();
/*
final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
Util.toDot(fst, w, false, false);
System.out.println("FST INDEX: SAVED to " + dotFileName);
w.close();
*/
if (indexDivisor > 1) {
// subsample
final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);
BytesRefFSTEnum.InputOutput<Long> result;
int count = indexDivisor;
while((result = fstEnum.next()) != null) {
if (count == indexDivisor) {
builder.add(Util.toIntsRef(result.input, scratchIntsRef), result.output);
count = 0;
}
count++;
}
fst = builder.finish();
}
}
}
开发者ID:europeana,项目名称:search,代码行数:35,代码来源:VariableGapTermsIndexReader.java
注:本文中的org.apache.lucene.util.fst.Util类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论