• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Java Automaton类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Java中org.apache.lucene.util.automaton.Automaton的典型用法代码示例。如果您正苦于以下问题:Java Automaton类的具体用法?Java Automaton怎么用?Java Automaton使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



Automaton类属于org.apache.lucene.util.automaton包,在下文中一共展示了Automaton类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: getFullPrefixPaths

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
@Override
protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(
    List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton,
    FST<PairOutputs.Pair<Long,BytesRef>> fst)
        throws IOException {

    // TODO: right now there's no penalty for fuzzy/edits,
    // ie a completion whose prefix matched exactly what the
    // user typed gets no boost over completions that
    // required an edit, which get no boost over completions
    // requiring two edits.  I suspect a multiplicative
    // factor is appropriate (eg, say a fuzzy match must be at
    // least 2X better weight than the non-fuzzy match to
    // "compete") ... in which case I think the wFST needs
    // to be log weights or something ...

    Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
/*
  Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
  w.write(levA.toDot());
  w.close();
  System.out.println("Wrote LevA to out.dot");
*/
    return FSTUtil.intersectPrefixPaths(levA, fst);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:26,代码来源:XFuzzySuggester.java


示例2: topoSortStates

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
private int[] topoSortStates(Automaton a) {
  int[] states = new int[a.getNumStates()];
  final Set<Integer> visited = new HashSet<>();
  final LinkedList<Integer> worklist = new LinkedList<>();
  worklist.add(0);
  visited.add(0);
  int upto = 0;
  states[upto] = 0;
  upto++;
  Transition t = new Transition();
  while (worklist.size() > 0) {
    int s = worklist.removeFirst();
    int count = a.initTransition(s, t);
    for (int i=0;i<count;i++) {
      a.getNextTransition(t);
      if (!visited.contains(t.dest)) {
        visited.add(t.dest);
        worklist.add(t.dest);
        states[upto++] = t.dest;
      }
    }
  }
  return states;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:25,代码来源:XAnalyzingSuggester.java


示例3: toAutomaton

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
final Automaton toAutomaton(TokenStream ts, final TokenStreamToAutomaton ts2a) throws IOException {
    // Create corresponding automaton: labels are bytes
    // from each analyzed token, with byte 0 used as
    // separator between tokens:
    Automaton automaton = ts2a.toAutomaton(ts);

    automaton = replaceSep(automaton);
    automaton = convertAutomaton(automaton);

    // TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings
    // assert SpecialOperations.isFinite(automaton);

    // Get all paths from the automaton (there can be
    // more than one path, eg if the analyzer created a
    // graph using SynFilter or WDF):

    return automaton;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:19,代码来源:XAnalyzingSuggester.java


示例4: toLookupAutomaton

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
    // TODO: is there a Reader from a CharSequence?
    // Turn tokenstream into automaton:
    Automaton automaton = null;

    try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
        automaton = getTokenStreamToAutomaton().toAutomaton(ts);
    }

    automaton = replaceSep(automaton);

    // TODO: we can optimize this somewhat by determinizing
    // while we convert

    // This automaton should not blow up during determinize:
    automaton = Operations.determinize(automaton, Integer.MAX_VALUE);
    return automaton;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:19,代码来源:XAnalyzingSuggester.java


示例5: toAutomaton

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
private Automaton toAutomaton() {
    Automaton a = null;
    if (include != null) {
        a = include.toAutomaton();
    } else if (includeValues != null) {
        a = Automata.makeStringUnion(includeValues);
    } else {
        a = Automata.makeAnyString();
    }
    if (exclude != null) {
        a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
    } else if (excludeValues != null) {
        a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
    }
    return a;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:17,代码来源:IncludeExclude.java


示例6: initAutomata

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance &&
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i, prefix);
      //System.out.println("compute automaton n=" + i);
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:19,代码来源:FuzzyTermsEnum.java


示例7: toAutomaton

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
/**
 * Create a automaton for a given context query this automaton will be used
 * to find the matching paths with the fst
 *
 * @param preserveSep set an additional char (<code>XAnalyzingSuggester.SEP_LABEL</code>) between each context query
 * @param queries list of {@link ContextQuery} defining the lookup context
 *
 * @return Automaton matching the given Query
 */
public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) {
    Automaton a = Automata.makeEmptyString();

    Automaton gap = Automata.makeChar(ContextMapping.SEPARATOR);
    if (preserveSep) {
        // if separators are preserved the fst contains a SEP_LABEL
        // behind each gap. To have a matching automaton, we need to
        // include the SEP_LABEL in the query as well
        gap = Operations.concatenate(gap, Automata.makeChar(XAnalyzingSuggester.SEP_LABEL));
    }

    for (ContextQuery query : queries) {
        a = Operations.concatenate(Arrays.asList(query.toAutomaton(), gap, a));
    }

    // TODO: should we limit this?  Do any of our ContextQuery impls really create exponential regexps?  GeoQuery looks safe (union
    // of strings).
    return Operations.determinize(a, Integer.MAX_VALUE);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:29,代码来源:ContextMapping.java


示例8: getFullPrefixPaths

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
@Override
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {

  // TODO: right now there's no penalty for fuzzy/edits,
  // ie a completion whose prefix matched exactly what the
  // user typed gets no boost over completions that
  // required an edit, which get no boost over completions
  // requiring two edits.  I suspect a multiplicative
  // factor is appropriate (eg, say a fuzzy match must be at
  // least 2X better weight than the non-fuzzy match to
  // "compete") ... in which case I think the wFST needs
  // to be log weights or something ...

  Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
  /*
    Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8);
    w.write(levA.toDot());
    w.close();
    System.out.println("Wrote LevA to out.dot");
  */
  return FSTUtil.intersectPrefixPaths(levA, fst);
}
 
开发者ID:europeana,项目名称:search,代码行数:26,代码来源:FuzzySuggester.java


示例9: toLookupAutomaton

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
  // TODO: is there a Reader from a CharSequence?
  // Turn tokenstream into automaton:
  Automaton automaton = null;
  TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
  try {
      automaton = getTokenStreamToAutomaton().toAutomaton(ts);
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }

  automaton = replaceSep(automaton);

  // TODO: we can optimize this somewhat by determinizing
  // while we convert
  automaton = Operations.determinize(automaton, DEFAULT_MAX_DETERMINIZED_STATES);
  return automaton;
}
 
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:AnalyzingSuggester.java


示例10: TermAutomatonWeight

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
public TermAutomatonWeight(Automaton automaton, IndexSearcher searcher, Map<Integer,TermContext> termStates) throws IOException {
  this.automaton = automaton;
  this.searcher = searcher;
  this.termStates = termStates;
  this.similarity = searcher.getSimilarity();
  List<TermStatistics> allTermStats = new ArrayList<>();
  for(Map.Entry<Integer,BytesRef> ent : idToTerm.entrySet()) {
    Integer termID = ent.getKey();
    if (ent.getValue() != null) {
      allTermStats.add(searcher.termStatistics(new Term(field, ent.getValue()), termStates.get(termID)));
    }
  }

  stats = similarity.computeWeight(getBoost(),
                                   searcher.collectionStatistics(field),
                                   allTermStats.toArray(new TermStatistics[allTermStats.size()]));
}
 
开发者ID:europeana,项目名称:search,代码行数:18,代码来源:TermAutomatonQuery.java


示例11: setUp

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
@Override
public void setUp() throws Exception {
  super.setUp();
  Automaton single = new Automaton();
  int initial = single.createState();
  int accept = single.createState();
  single.setAccept(accept, true);

  // build an automaton matching this jvm's letter definition
  for (int i = 0; i <= 0x10FFFF; i++) {
    if (Character.isLetter(i)) {
      single.addTransition(initial, accept, i);
    }
  }
  Automaton repeat = Operations.repeat(single);
  jvmLetter = new CharacterRunAutomaton(repeat);
}
 
开发者ID:europeana,项目名称:search,代码行数:18,代码来源:TestDuelingAnalyzers.java


示例12: testCustomProvider

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
public void testCustomProvider() throws IOException {
  AutomatonProvider myProvider = new AutomatonProvider() {
    // automaton that matches quick or brown
    private Automaton quickBrownAutomaton = Operations.union(Arrays
        .asList(Automata.makeString("quick"),
        Automata.makeString("brown"),
        Automata.makeString("bob")));
    
    @Override
    public Automaton getAutomaton(String name) {
      if (name.equals("quickBrown")) return quickBrownAutomaton;
      else return null;
    }
  };
  RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
    myProvider, DEFAULT_MAX_DETERMINIZED_STATES);
  assertEquals(1, searcher.search(query, 5).totalHits);
}
 
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:TestRegexpQuery.java


示例13: assertAutomatonHits

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
private void assertAutomatonHits(int expected, Automaton automaton)
    throws IOException {
  AutomatonQuery query = new AutomatonQuery(newTerm("bogus"), automaton);
  
  query.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
  assertEquals(expected, automatonQueryNrHits(query));
  
  query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
  assertEquals(expected, automatonQueryNrHits(query));
  
  query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
  assertEquals(expected, automatonQueryNrHits(query));
  
  query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
  assertEquals(expected, automatonQueryNrHits(query));
}
 
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:TestAutomatonQuery.java


示例14: assertAutomatonHits

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
private void assertAutomatonHits(int expected, Automaton automaton)
    throws IOException {
  AutomatonQuery query = new AutomatonQuery(newTerm("bogus"), automaton);

  query.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
  assertEquals(expected, automatonQueryNrHits(query));

  query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
  assertEquals(expected, automatonQueryNrHits(query));

  query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
  assertEquals(expected, automatonQueryNrHits(query));

  query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
  assertEquals(expected, automatonQueryNrHits(query));
}
 
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:TestAutomatonQueryUnicode.java


示例15: getFullPrefixPaths

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
@Override
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
                                                                     Automaton lookupAutomaton,
                                                                     FST<Pair<Long,BytesRef>> fst)
  throws IOException {

  // TODO: right now there's no penalty for fuzzy/edits,
  // ie a completion whose prefix matched exactly what the
  // user typed gets no boost over completions that
  // required an edit, which get no boost over completions
  // requiring two edits.  I suspect a multiplicative
  // factor is appropriate (eg, say a fuzzy match must be at
  // least 2X better weight than the non-fuzzy match to
  // "compete") ... in which case I think the wFST needs
  // to be log weights or something ...

  Automaton levA = toLevenshteinAutomata(lookupAutomaton);
  /*
    Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
    w.write(levA.toDot());
    w.close();
    System.out.println("Wrote LevA to out.dot");
  */
  return FSTUtil.intersectPrefixPaths(levA, fst);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:26,代码来源:FuzzySuggester.java


示例16: toLookupAutomaton

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
  // TODO: is there a Reader from a CharSequence?
  // Turn tokenstream into automaton:
  TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()));
  Automaton automaton = (getTokenStreamToAutomaton()).toAutomaton(ts);
  ts.end();
  ts.close();

  // TODO: we could use the end offset to "guess"
  // whether the final token was a partial token; this
  // would only be a heuristic ... but maybe an OK one.
  // This way we could eg differentiate "net" from "net ",
  // which we can't today...

  replaceSep(automaton);

  // TODO: we can optimize this somewhat by determinizing
  // while we convert
  BasicOperations.determinize(automaton);
  return automaton;
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:22,代码来源:AnalyzingSuggester.java


示例17: setUp

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
@Override
public void setUp() throws Exception {
  super.setUp();
  // build an automaton matching this jvm's letter definition
  State initial = new State();
  State accept = new State();
  accept.setAccept(true);
  for (int i = 0; i <= 0x10FFFF; i++) {
    if (Character.isLetter(i)) {
      initial.addTransition(new Transition(i, i, accept));
    }
  }
  Automaton single = new Automaton(initial);
  single.reduce();
  Automaton repeat = BasicOperations.repeat(single);
  jvmLetter = new CharacterRunAutomaton(repeat);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:18,代码来源:TestDuelingAnalyzers.java


示例18: initAutomata

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
  final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
  //System.out.println("cached automata size: " + runAutomata.size());
  if (runAutomata.size() <= maxDistance && 
      maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
    LevenshteinAutomata builder = 
      new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);

    for (int i = runAutomata.size(); i <= maxDistance; i++) {
      Automaton a = builder.toAutomaton(i);
      //System.out.println("compute automaton n=" + i);
      // constant prefix
      if (realPrefixLength > 0) {
        Automaton prefix = BasicAutomata.makeString(
          UnicodeUtil.newString(termText, 0, realPrefixLength));
        a = BasicOperations.concatenate(prefix, a);
      }
      runAutomata.add(new CompiledAutomaton(a, true, false));
    }
  }
  return runAutomata;
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:24,代码来源:FuzzyTermsEnum.java


示例19: testCustomProvider

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
public void testCustomProvider() throws IOException {
  AutomatonProvider myProvider = new AutomatonProvider() {
    // automaton that matches quick or brown
    private Automaton quickBrownAutomaton = BasicOperations.union(Arrays
        .asList(BasicAutomata.makeString("quick"),
        BasicAutomata.makeString("brown"),
        BasicAutomata.makeString("bob")));
    
    @Override
    public Automaton getAutomaton(String name) {
      if (name.equals("quickBrown")) return quickBrownAutomaton;
      else return null;
    }
  };
  RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
      myProvider);
  assertEquals(1, searcher.search(query, 5).totalHits);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:19,代码来源:TestRegexpQuery.java


示例20: testOverlappedTokensLattice

import org.apache.lucene.util.automaton.Automaton; //导入依赖的package包/类
public void testOverlappedTokensLattice() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("abc", 1, 1),
        token("xyz", 0, 2),
        token("def", 1, 1),
      });
    final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
    final Automaton a1 = BasicAutomata.makeString("xyz");
    final Automaton a2 = join("abc", "def");
                                                                   
    final Automaton expected = BasicOperations.union(a1, a2);
    //toDot(actual);
    assertTrue(BasicOperations.sameLanguage(expected, actual));
  }
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:17,代码来源:TestGraphTokenizers.java



注:本文中的org.apache.lucene.util.automaton.Automaton类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Java Info类代码示例发布时间:2022-05-22
下一篇:
Java RootRegionTracker类代码示例发布时间:2022-05-22
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap