本文整理汇总了Java中org.apache.lucene.util.automaton.Automaton类的典型用法代码示例。如果您正苦于以下问题:Java Automaton类的具体用法?Java Automaton怎么用?Java Automaton使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Automaton类属于org.apache.lucene.util.automaton包,在下文中一共展示了Automaton类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getFullPrefixPaths
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
@Override
protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(
List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton,
FST<PairOutputs.Pair<Long,BytesRef>> fst)
throws IOException {
// TODO: right now there's no penalty for fuzzy/edits,
// ie a completion whose prefix matched exactly what the
// user typed gets no boost over completions that
// required an edit, which get no boost over completions
// requiring two edits. I suspect a multiplicative
// factor is appropriate (eg, say a fuzzy match must be at
// least 2X better weight than the non-fuzzy match to
// "compete") ... in which case I think the wFST needs
// to be log weights or something ...
Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
/*
Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
w.write(levA.toDot());
w.close();
System.out.println("Wrote LevA to out.dot");
*/
return FSTUtil.intersectPrefixPaths(levA, fst);
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:26,代码来源:XFuzzySuggester.java
示例2: topoSortStates
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
private int[] topoSortStates(Automaton a) {
int[] states = new int[a.getNumStates()];
final Set<Integer> visited = new HashSet<>();
final LinkedList<Integer> worklist = new LinkedList<>();
worklist.add(0);
visited.add(0);
int upto = 0;
states[upto] = 0;
upto++;
Transition t = new Transition();
while (worklist.size() > 0) {
int s = worklist.removeFirst();
int count = a.initTransition(s, t);
for (int i=0;i<count;i++) {
a.getNextTransition(t);
if (!visited.contains(t.dest)) {
visited.add(t.dest);
worklist.add(t.dest);
states[upto++] = t.dest;
}
}
}
return states;
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:25,代码来源:XAnalyzingSuggester.java
示例3: toAutomaton
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
final Automaton toAutomaton(TokenStream ts, final TokenStreamToAutomaton ts2a) throws IOException {
// Create corresponding automaton: labels are bytes
// from each analyzed token, with byte 0 used as
// separator between tokens:
Automaton automaton = ts2a.toAutomaton(ts);
automaton = replaceSep(automaton);
automaton = convertAutomaton(automaton);
// TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
// assert SpecialOperations.isFinite(automaton);
// Get all paths from the automaton (there can be
// more than one path, eg if the analyzer created a
// graph using SynFilter or WDF):
return automaton;
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:19,代码来源:XAnalyzingSuggester.java
示例4: toLookupAutomaton
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
// TODO: is there a Reader from a CharSequence?
// Turn tokenstream into automaton:
Automaton automaton = null;
try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
automaton = getTokenStreamToAutomaton().toAutomaton(ts);
}
automaton = replaceSep(automaton);
// TODO: we can optimize this somewhat by determinizing
// while we convert
// This automaton should not blow up during determinize:
automaton = Operations.determinize(automaton, Integer.MAX_VALUE);
return automaton;
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:19,代码来源:XAnalyzingSuggester.java
示例5: toAutomaton
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
private Automaton toAutomaton() {
Automaton a = null;
if (include != null) {
a = include.toAutomaton();
} else if (includeValues != null) {
a = Automata.makeStringUnion(includeValues);
} else {
a = Automata.makeAnyString();
}
if (exclude != null) {
a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
} else if (excludeValues != null) {
a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
}
return a;
}
开发者ID:justor,项目名称:elasticsearch_my,代码行数:17,代码来源:IncludeExclude.java
示例6: initAutomata
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
//System.out.println("cached automata size: " + runAutomata.size());
if (runAutomata.size() <= maxDistance &&
maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
LevenshteinAutomata builder =
new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
for (int i = runAutomata.size(); i <= maxDistance; i++) {
Automaton a = builder.toAutomaton(i, prefix);
//System.out.println("compute automaton n=" + i);
runAutomata.add(new CompiledAutomaton(a, true, false));
}
}
return runAutomata;
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:19,代码来源:FuzzyTermsEnum.java
示例7: toAutomaton
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
/**
* Create a automaton for a given context query this automaton will be used
* to find the matching paths with the fst
*
* @param preserveSep set an additional char (<code>XAnalyzingSuggester.SEP_LABEL</code>) between each context query
* @param queries list of {@link ContextQuery} defining the lookup context
*
* @return Automaton matching the given Query
*/
public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) {
Automaton a = Automata.makeEmptyString();
Automaton gap = Automata.makeChar(ContextMapping.SEPARATOR);
if (preserveSep) {
// if separators are preserved the fst contains a SEP_LABEL
// behind each gap. To have a matching automaton, we need to
// include the SEP_LABEL in the query as well
gap = Operations.concatenate(gap, Automata.makeChar(XAnalyzingSuggester.SEP_LABEL));
}
for (ContextQuery query : queries) {
a = Operations.concatenate(Arrays.asList(query.toAutomaton(), gap, a));
}
// TODO: should we limit this? Do any of our ContextQuery impls really create exponential regexps? GeoQuery looks safe (union
// of strings).
return Operations.determinize(a, Integer.MAX_VALUE);
}
开发者ID:baidu,项目名称:Elasticsearch,代码行数:29,代码来源:ContextMapping.java
示例8: getFullPrefixPaths
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
@Override
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
Automaton lookupAutomaton,
FST<Pair<Long,BytesRef>> fst)
throws IOException {
// TODO: right now there's no penalty for fuzzy/edits,
// ie a completion whose prefix matched exactly what the
// user typed gets no boost over completions that
// required an edit, which get no boost over completions
// requiring two edits. I suspect a multiplicative
// factor is appropriate (eg, say a fuzzy match must be at
// least 2X better weight than the non-fuzzy match to
// "compete") ... in which case I think the wFST needs
// to be log weights or something ...
Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
/*
Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8);
w.write(levA.toDot());
w.close();
System.out.println("Wrote LevA to out.dot");
*/
return FSTUtil.intersectPrefixPaths(levA, fst);
}
开发者ID:europeana,项目名称:search,代码行数:26,代码来源:FuzzySuggester.java
示例9: toLookupAutomaton
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
// TODO: is there a Reader from a CharSequence?
// Turn tokenstream into automaton:
Automaton automaton = null;
TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
try {
automaton = getTokenStreamToAutomaton().toAutomaton(ts);
} finally {
IOUtils.closeWhileHandlingException(ts);
}
automaton = replaceSep(automaton);
// TODO: we can optimize this somewhat by determinizing
// while we convert
automaton = Operations.determinize(automaton, DEFAULT_MAX_DETERMINIZED_STATES);
return automaton;
}
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:AnalyzingSuggester.java
示例10: TermAutomatonWeight
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
public TermAutomatonWeight(Automaton automaton, IndexSearcher searcher, Map<Integer,TermContext> termStates) throws IOException {
this.automaton = automaton;
this.searcher = searcher;
this.termStates = termStates;
this.similarity = searcher.getSimilarity();
List<TermStatistics> allTermStats = new ArrayList<>();
for(Map.Entry<Integer,BytesRef> ent : idToTerm.entrySet()) {
Integer termID = ent.getKey();
if (ent.getValue() != null) {
allTermStats.add(searcher.termStatistics(new Term(field, ent.getValue()), termStates.get(termID)));
}
}
stats = similarity.computeWeight(getBoost(),
searcher.collectionStatistics(field),
allTermStats.toArray(new TermStatistics[allTermStats.size()]));
}
开发者ID:europeana,项目名称:search,代码行数:18,代码来源:TermAutomatonQuery.java
示例11: setUp
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
@Override
public void setUp() throws Exception {
super.setUp();
Automaton single = new Automaton();
int initial = single.createState();
int accept = single.createState();
single.setAccept(accept, true);
// build an automaton matching this jvm's letter definition
for (int i = 0; i <= 0x10FFFF; i++) {
if (Character.isLetter(i)) {
single.addTransition(initial, accept, i);
}
}
Automaton repeat = Operations.repeat(single);
jvmLetter = new CharacterRunAutomaton(repeat);
}
开发者ID:europeana,项目名称:search,代码行数:18,代码来源:TestDuelingAnalyzers.java
示例12: testCustomProvider
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
public void testCustomProvider() throws IOException {
AutomatonProvider myProvider = new AutomatonProvider() {
// automaton that matches quick or brown
private Automaton quickBrownAutomaton = Operations.union(Arrays
.asList(Automata.makeString("quick"),
Automata.makeString("brown"),
Automata.makeString("bob")));
@Override
public Automaton getAutomaton(String name) {
if (name.equals("quickBrown")) return quickBrownAutomaton;
else return null;
}
};
RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
myProvider, DEFAULT_MAX_DETERMINIZED_STATES);
assertEquals(1, searcher.search(query, 5).totalHits);
}
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:TestRegexpQuery.java
示例13: assertAutomatonHits
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
private void assertAutomatonHits(int expected, Automaton automaton)
throws IOException {
AutomatonQuery query = new AutomatonQuery(newTerm("bogus"), automaton);
query.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
assertEquals(expected, automatonQueryNrHits(query));
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
assertEquals(expected, automatonQueryNrHits(query));
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
assertEquals(expected, automatonQueryNrHits(query));
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
assertEquals(expected, automatonQueryNrHits(query));
}
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:TestAutomatonQuery.java
示例14: assertAutomatonHits
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
private void assertAutomatonHits(int expected, Automaton automaton)
throws IOException {
AutomatonQuery query = new AutomatonQuery(newTerm("bogus"), automaton);
query.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
assertEquals(expected, automatonQueryNrHits(query));
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
assertEquals(expected, automatonQueryNrHits(query));
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
assertEquals(expected, automatonQueryNrHits(query));
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
assertEquals(expected, automatonQueryNrHits(query));
}
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:TestAutomatonQueryUnicode.java
示例15: getFullPrefixPaths
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
@Override
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
Automaton lookupAutomaton,
FST<Pair<Long,BytesRef>> fst)
throws IOException {
// TODO: right now there's no penalty for fuzzy/edits,
// ie a completion whose prefix matched exactly what the
// user typed gets no boost over completions that
// required an edit, which get no boost over completions
// requiring two edits. I suspect a multiplicative
// factor is appropriate (eg, say a fuzzy match must be at
// least 2X better weight than the non-fuzzy match to
// "compete") ... in which case I think the wFST needs
// to be log weights or something ...
Automaton levA = toLevenshteinAutomata(lookupAutomaton);
/*
Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
w.write(levA.toDot());
w.close();
System.out.println("Wrote LevA to out.dot");
*/
return FSTUtil.intersectPrefixPaths(levA, fst);
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:26,代码来源:FuzzySuggester.java
示例16: toLookupAutomaton
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
// TODO: is there a Reader from a CharSequence?
// Turn tokenstream into automaton:
TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()));
Automaton automaton = (getTokenStreamToAutomaton()).toAutomaton(ts);
ts.end();
ts.close();
// TODO: we could use the end offset to "guess"
// whether the final token was a partial token; this
// would only be a heuristic ... but maybe an OK one.
// This way we could eg differentiate "net" from "net ",
// which we can't today...
replaceSep(automaton);
// TODO: we can optimize this somewhat by determinizing
// while we convert
BasicOperations.determinize(automaton);
return automaton;
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:22,代码来源:AnalyzingSuggester.java
示例17: setUp
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
@Override
public void setUp() throws Exception {
super.setUp();
// build an automaton matching this jvm's letter definition
State initial = new State();
State accept = new State();
accept.setAccept(true);
for (int i = 0; i <= 0x10FFFF; i++) {
if (Character.isLetter(i)) {
initial.addTransition(new Transition(i, i, accept));
}
}
Automaton single = new Automaton(initial);
single.reduce();
Automaton repeat = BasicOperations.repeat(single);
jvmLetter = new CharacterRunAutomaton(repeat);
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:18,代码来源:TestDuelingAnalyzers.java
示例18: initAutomata
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
//System.out.println("cached automata size: " + runAutomata.size());
if (runAutomata.size() <= maxDistance &&
maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
LevenshteinAutomata builder =
new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
for (int i = runAutomata.size(); i <= maxDistance; i++) {
Automaton a = builder.toAutomaton(i);
//System.out.println("compute automaton n=" + i);
// constant prefix
if (realPrefixLength > 0) {
Automaton prefix = BasicAutomata.makeString(
UnicodeUtil.newString(termText, 0, realPrefixLength));
a = BasicOperations.concatenate(prefix, a);
}
runAutomata.add(new CompiledAutomaton(a, true, false));
}
}
return runAutomata;
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:24,代码来源:FuzzyTermsEnum.java
示例19: testCustomProvider
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
public void testCustomProvider() throws IOException {
AutomatonProvider myProvider = new AutomatonProvider() {
// automaton that matches quick or brown
private Automaton quickBrownAutomaton = BasicOperations.union(Arrays
.asList(BasicAutomata.makeString("quick"),
BasicAutomata.makeString("brown"),
BasicAutomata.makeString("bob")));
@Override
public Automaton getAutomaton(String name) {
if (name.equals("quickBrown")) return quickBrownAutomaton;
else return null;
}
};
RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
myProvider);
assertEquals(1, searcher.search(query, 5).totalHits);
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:19,代码来源:TestRegexpQuery.java
示例20: testOverlappedTokensLattice
import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
public void testOverlappedTokensLattice() throws Exception {
final TokenStream ts = new CannedTokenStream(
new Token[] {
token("abc", 1, 1),
token("xyz", 0, 2),
token("def", 1, 1),
});
final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
final Automaton a1 = BasicAutomata.makeString("xyz");
final Automaton a2 = join("abc", "def");
final Automaton expected = BasicOperations.union(a1, a2);
//toDot(actual);
assertTrue(BasicOperations.sameLanguage(expected, actual));
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:17,代码来源:TestGraphTokenizers.java
注:本文中的org.apache.lucene.util.automaton.Automaton类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论