• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Java CharFilter类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Java中org.apache.lucene.analysis.CharFilter的典型用法代码示例。如果您正苦于以下问题:Java CharFilter类的具体用法?Java CharFilter怎么用?Java CharFilter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



CharFilter类属于org.apache.lucene.analysis包,在下文中一共展示了CharFilter类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: testNormalizerCharFilter

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
@Test
public void testNormalizerCharFilter() throws Exception {
    String query = "한국어를 처리하는 예시입니닼ㅋ. 오픈코리안텍스틓ㅎㅎㅎㅎㅎㅎㅎ";
    String expected = "한국어를 처리하는 예시입니다ㅋ. 오픈코리안텍스트ㅎㅎㅎ";

    CharFilter inputReader = new OpenKoreanTextNormalizer(new StringReader(query));

    char[] tempBuff = new char[10];
    StringBuilder actual = new StringBuilder();

    while (true) {
        int length = inputReader.read(tempBuff);
        if (length == -1) break;
        actual.append(tempBuff, 0, length);
    }

    Assert.assertEquals(expected, actual.toString());
}
 
开发者ID:open-korean-text,项目名称:elasticsearch-analysis-openkoreantext,代码行数:19,代码来源:OpenKoreanTextNormalizerTest.java


示例2: testDefaultSetting

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testDefaultSetting() throws Exception {
    Settings settings = Settings.builder()
        .put("index.analysis.char_filter.myNormalizerChar.type", "icu_normalizer")
        .build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    CharFilterFactory charFilterFactory = analysis.charFilter.get("myNormalizerChar");

    String input = "ʰ㌰゙5℃№㈱㌘,バッファーの正規化のテスト.㋐㋑㋒㋓㋔カキクケコザジズゼゾg̈각/각நிเกषिchkʷक्षि";
    Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE);
    String expectedOutput = normalizer.normalize(input);
    CharFilter inputReader = (CharFilter) charFilterFactory.create(new StringReader(input));
    char[] tempBuff = new char[10];
    StringBuilder output = new StringBuilder();
    while (true) {
        int length = inputReader.read(tempBuff);
        if (length == -1) break;
        output.append(tempBuff, 0, length);
        assertEquals(output.toString(), normalizer.normalize(input.substring(0, inputReader.correctOffset(output.length()))));
    }
    assertEquals(expectedOutput, output.toString());
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:22,代码来源:SimpleIcuNormalizerCharFilterTests.java


示例3: testNameAndModeSetting

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testNameAndModeSetting() throws Exception {
    Settings settings = Settings.builder()
        .put("index.analysis.char_filter.myNormalizerChar.type", "icu_normalizer")
        .put("index.analysis.char_filter.myNormalizerChar.name", "nfkc")
        .put("index.analysis.char_filter.myNormalizerChar.mode", "decompose")
        .build();
    TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisICUPlugin());
    CharFilterFactory charFilterFactory = analysis.charFilter.get("myNormalizerChar");

    String input = "ʰ㌰゙5℃№㈱㌘,バッファーの正規化のテスト.㋐㋑㋒㋓㋔カキクケコザジズゼゾg̈각/각நிเกषिchkʷक्षि";
    Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.DECOMPOSE);
    String expectedOutput = normalizer.normalize(input);
    CharFilter inputReader = (CharFilter) charFilterFactory.create(new StringReader(input));
    char[] tempBuff = new char[10];
    StringBuilder output = new StringBuilder();
    while (true) {
        int length = inputReader.read(tempBuff);
        if (length == -1) break;
        output.append(tempBuff, 0, length);
        assertEquals(output.toString(), normalizer.normalize(input.substring(0, inputReader.correctOffset(output.length()))));
    }
    assertEquals(expectedOutput, output.toString());
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:24,代码来源:SimpleIcuNormalizerCharFilterTests.java


示例4: testNormalization

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testNormalization() throws IOException {
  String input = "ʰ㌰゙5℃№㈱㌘,バッファーの正規化のテスト.㋐㋑㋒㋓㋔カキクケコザジズゼゾg̈각/각நிเกषिchkʷक्षि";
  Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE);
  String expectedOutput = normalizer.normalize(input);

  CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input), normalizer);
  char[] tempBuff = new char[10];
  StringBuilder output = new StringBuilder();
  while (true) {
    int length = reader.read(tempBuff);
    if (length == -1) {
      break;
    }
    output.append(tempBuff, 0, length);
    assertEquals(output.toString(), normalizer.normalize(input.substring(0, reader.correctOffset(output.length()))));
  }

  assertEquals(expectedOutput, output.toString());
}
 
开发者ID:europeana,项目名称:search,代码行数:20,代码来源:TestICUNormalizer2CharFilter.java


示例5: newCharFilterChain

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
private CharFilterSpec newCharFilterChain(Random random, Reader reader) {
  CharFilterSpec spec = new CharFilterSpec();
  spec.reader = reader;
  StringBuilder descr = new StringBuilder();
  int numFilters = random.nextInt(3);
  for (int i = 0; i < numFilters; i++) {
    while (true) {
      final Constructor<? extends CharFilter> ctor = charfilters.get(random.nextInt(charfilters.size()));
      final Object args[] = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
      if (broken(ctor, args)) {
        continue;
      }
      reader = createComponent(ctor, args, descr);
      if (reader != null) {
        spec.reader = reader;
        break;
      }
    }
  }
  spec.toString = descr.toString();
  return spec;
}
 
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:TestRandomChains.java


示例6: testIterationMarksWithJapaneseTokenizer

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testIterationMarksWithJapaneseTokenizer() throws IOException {
  JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
  Map<String, String> tokenizerArgs = Collections.emptyMap();
  tokenizerFactory.init(tokenizerArgs);
  tokenizerFactory.inform(new StringMockResourceLoader(""));

  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
  Map<String, String> filterArgs = Collections.emptyMap();
  filterFactory.init(filterArgs);

  CharFilter filter = filterFactory.create(
      new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
  );
  TokenStream tokenStream = tokenizerFactory.create(filter);
  assertTokenStreamContents(tokenStream, new String[]{"時時", "馬鹿馬鹿しい", "ところどころ", "ミ", "スズ"});
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:17,代码来源:TestJapaneseIterationMarkCharFilterFactory.java


示例7: testKanjiOnlyIterationMarksWithJapaneseTokenizer

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testKanjiOnlyIterationMarksWithJapaneseTokenizer() throws IOException {
  JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
  Map<String, String> tokenizerArgs = Collections.emptyMap();
  tokenizerFactory.init(tokenizerArgs);
  tokenizerFactory.inform(new StringMockResourceLoader(""));

  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
  Map<String, String> filterArgs = new HashMap<String, String>();
  filterArgs.put("normalizeKanji", "true");
  filterArgs.put("normalizeKana", "false");
  filterFactory.init(filterArgs);
  
  CharFilter filter = filterFactory.create(
      new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
  );
  TokenStream tokenStream = tokenizerFactory.create(filter);
  assertTokenStreamContents(tokenStream, new String[]{"時時", "馬鹿馬鹿しい", "ところ", "ゞ", "ゝ", "ゝ", "ミス", "ヾ"});
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:19,代码来源:TestJapaneseIterationMarkCharFilterFactory.java


示例8: testKanaOnlyIterationMarksWithJapaneseTokenizer

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testKanaOnlyIterationMarksWithJapaneseTokenizer() throws IOException {
  JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory();
  Map<String, String> tokenizerArgs = Collections.emptyMap();
  tokenizerFactory.init(tokenizerArgs);
  tokenizerFactory.inform(new StringMockResourceLoader(""));

  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory();
  Map<String, String> filterArgs = new HashMap<String, String>();
  filterArgs.put("normalizeKanji", "false");
  filterArgs.put("normalizeKana", "true");
  filterFactory.init(filterArgs);

  CharFilter filter = filterFactory.create(
      new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
  );
  TokenStream tokenStream = tokenizerFactory.create(filter);
  assertTokenStreamContents(tokenStream, new String[]{"時々", "馬鹿", "々", "々", "しい", "ところどころ", "ミ", "スズ"});
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:19,代码来源:TestJapaneseIterationMarkCharFilterFactory.java


示例9: testIterationMarksWithKeywordTokenizer

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testIterationMarksWithKeywordTokenizer() throws IOException {
  final String text = "時々馬鹿々々しいところゞゝゝミスヾ";
  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>());
  CharFilter filter = filterFactory.create(new StringReader(text));
  TokenStream tokenStream = new MockTokenizer(filter, MockTokenizer.KEYWORD, false);
  assertTokenStreamContents(tokenStream, new String[]{"時時馬鹿馬鹿しいところどころミスズ"});
}
 
开发者ID:europeana,项目名称:search,代码行数:8,代码来源:TestJapaneseIterationMarkCharFilterFactory.java


示例10: testIterationMarksWithJapaneseTokenizer

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testIterationMarksWithJapaneseTokenizer() throws IOException {
  JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
  tokenizerFactory.inform(new StringMockResourceLoader(""));

  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new HashMap<String,String>());
  CharFilter filter = filterFactory.create(
      new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
  );
  TokenStream tokenStream = tokenizerFactory.create(newAttributeFactory(), filter);
  assertTokenStreamContents(tokenStream, new String[]{"時時", "馬鹿馬鹿しい", "ところどころ", "ミ", "スズ"});
}
 
开发者ID:europeana,项目名称:search,代码行数:12,代码来源:TestJapaneseIterationMarkCharFilterFactory.java


示例11: testKanjiOnlyIterationMarksWithJapaneseTokenizer

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testKanjiOnlyIterationMarksWithJapaneseTokenizer() throws IOException {
  JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
  tokenizerFactory.inform(new StringMockResourceLoader(""));

  Map<String, String> filterArgs = new HashMap<>();
  filterArgs.put("normalizeKanji", "true");
  filterArgs.put("normalizeKana", "false");
  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(filterArgs);
  
  CharFilter filter = filterFactory.create(
      new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
  );
  TokenStream tokenStream = tokenizerFactory.create(newAttributeFactory(), filter);
  assertTokenStreamContents(tokenStream, new String[]{"時時", "馬鹿馬鹿しい", "ところ", "ゞ", "ゝ", "ゝ", "ミス", "ヾ"});
}
 
开发者ID:europeana,项目名称:search,代码行数:16,代码来源:TestJapaneseIterationMarkCharFilterFactory.java


示例12: testKanaOnlyIterationMarksWithJapaneseTokenizer

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testKanaOnlyIterationMarksWithJapaneseTokenizer() throws IOException {
  JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String,String>());
  tokenizerFactory.inform(new StringMockResourceLoader(""));

  Map<String, String> filterArgs = new HashMap<>();
  filterArgs.put("normalizeKanji", "false");
  filterArgs.put("normalizeKana", "true");
  JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(filterArgs);

  CharFilter filter = filterFactory.create(
      new StringReader("時々馬鹿々々しいところゞゝゝミスヾ")
  );
  TokenStream tokenStream = tokenizerFactory.create(newAttributeFactory(), filter);
  assertTokenStreamContents(tokenStream, new String[]{"時々", "馬鹿", "々", "々", "しい", "ところどころ", "ミ", "スズ"});
}
 
开发者ID:europeana,项目名称:search,代码行数:16,代码来源:TestJapaneseIterationMarkCharFilterFactory.java


示例13: testKanjiOnly

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testKanjiOnly() throws IOException {
  // Test kanji only repetition marks
  CharFilter filter = new JapaneseIterationMarkCharFilter(
      new StringReader("時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。"),
      true, // kanji
      false // no kana
  );
  assertCharFilterEquals(filter, "時時、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。");
}
 
开发者ID:europeana,项目名称:search,代码行数:10,代码来源:TestJapaneseIterationMarkCharFilter.java


示例14: testKanaOnly

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testKanaOnly() throws IOException {
  // Test kana only repetition marks
  CharFilter filter = new JapaneseIterationMarkCharFilter(
      new StringReader("時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。"),
      false, // no kanji
      true   // kana
  );
  assertCharFilterEquals(filter, "時々、おおのさんと一緒にお寿司が食べたいです。abcところどころ。");
}
 
开发者ID:europeana,项目名称:search,代码行数:10,代码来源:TestJapaneseIterationMarkCharFilter.java


示例15: testNone

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testNone() throws IOException {
  // Test no repetition marks
  CharFilter filter = new JapaneseIterationMarkCharFilter(
      new StringReader("時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。"),
      false, // no kanji
      false  // no kana
  );
  assertCharFilterEquals(filter, "時々、おゝのさんと一緒にお寿司が食べたいです。abcところゞゝゝ。");
}
 
开发者ID:europeana,项目名称:search,代码行数:10,代码来源:TestJapaneseIterationMarkCharFilter.java


示例16: testTokenStream

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testTokenStream() throws IOException {
  // '℃', '№', '㈱', '㌘', 'サ'+'<<', 'ソ'+'<<', '㌰'+'<<'
  String input = "℃ № ㈱ ㌘ ザ ゾ ㌰゙";

  CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input),
    Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE));

  Tokenizer tokenStream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

  assertTokenStreamContents(tokenStream,
    new String[] {"°C", "No", "(株)", "グラム", "ザ", "ゾ", "ピゴ"},
    new int[] {0, 2, 4, 6, 8, 11, 14},
    new int[] {1, 3, 5, 7, 10, 13, 16},
    input.length());
}
 
开发者ID:europeana,项目名称:search,代码行数:16,代码来源:TestICUNormalizer2CharFilter.java


示例17: testTokenStream2

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testTokenStream2() throws IOException {
  // '㌰', '<<'゙, '5', '℃', '№', '㈱', '㌘', 'サ', '<<', 'ソ', '<<'
  String input = "㌰゙5℃№㈱㌘ザゾ";

  CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input),
    Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));

  Tokenizer tokenStream = new NGramTokenizer(newAttributeFactory(), reader, 1, 1);

  assertTokenStreamContents(tokenStream,
    new String[] {"ピ", "ゴ", "5", "°", "c", "n", "o", "(", "株", ")", "グ", "ラ", "ム", "ザ", "ゾ"},
    new int[]{0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 9},
    new int[]{1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 9, 11},
    input.length()
  );
}
 
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:TestICUNormalizer2CharFilter.java


示例18: testMassiveLigature

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testMassiveLigature() throws IOException {
  String input = "\uFDFA";

  CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input),
    Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));

  Tokenizer tokenStream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);

  assertTokenStreamContents(tokenStream,
    new String[] {"صلى", "الله", "عليه", "وسلم"},
    new int[]{0, 0, 0, 0},
    new int[]{0, 0, 0, 1},
    input.length()
  );
}
 
开发者ID:europeana,项目名称:search,代码行数:16,代码来源:TestICUNormalizer2CharFilter.java


示例19: checkOutput

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
private void checkOutput(String input, String pattern, String replacement,
    String expectedOutput, String expectedIndexMatchedOutput) throws IOException {
    CharFilter cs = new PatternReplaceCharFilter(pattern(pattern), replacement,
      new StringReader(input));

  StringBuilder output = new StringBuilder();
  for (int chr = cs.read(); chr > 0; chr = cs.read()) {
    output.append((char) chr);
  }

  StringBuilder indexMatched = new StringBuilder();
  for (int i = 0; i < output.length(); i++) {
    indexMatched.append((cs.correctOffset(i) < 0 ? "-" : input.charAt(cs.correctOffset(i))));
  }

  boolean outputGood = expectedOutput.equals(output.toString());
  boolean indexMatchedGood = expectedIndexMatchedOutput.equals(indexMatched.toString());

  if (!outputGood || !indexMatchedGood || false) {
    System.out.println("Pattern : " + pattern);
    System.out.println("Replac. : " + replacement);
    System.out.println("Input   : " + input);
    System.out.println("Output  : " + output);
    System.out.println("Expected: " + expectedOutput);
    System.out.println("Output/i: " + indexMatched);
    System.out.println("Expected: " + expectedIndexMatchedOutput);
    System.out.println();
  }

  assertTrue("Output doesn't match.", outputGood);
  assertTrue("Index-matched output doesn't match.", indexMatchedGood);
}
 
开发者ID:europeana,项目名称:search,代码行数:33,代码来源:TestPatternReplaceCharFilter.java


示例20: testNothingChange

import org.apache.lucene.analysis.CharFilter; //导入依赖的package包/类
public void testNothingChange() throws IOException {
  final String BLOCK = "this is test.";
  CharFilter cs = new PatternReplaceCharFilter( pattern("(aa)\\s+(bb)\\s+(cc)"), "$1$2$3",
        new StringReader( BLOCK ) );
  TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
  assertTokenStreamContents(ts,
      new String[] { "this", "is", "test." },
      new int[] { 0, 5, 8 },
      new int[] { 4, 7, 13 }, 
      BLOCK.length());
}
 
开发者ID:europeana,项目名称:search,代码行数:12,代码来源:TestPatternReplaceCharFilter.java



注:本文中的org.apache.lucene.analysis.CharFilter类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Java OFStatsReplyFlagsSerializerVer14类代码示例发布时间:2022-05-22
下一篇:
Java Redirect类代码示例发布时间:2022-05-22
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap