• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

C# Standard.StandardTokenizer类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了C#中Lucene.Net.Analysis.Standard.StandardTokenizer的典型用法代码示例。如果您正苦于以下问题:C# StandardTokenizer类的具体用法?C# StandardTokenizer怎么用?C# StandardTokenizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



StandardTokenizer类属于Lucene.Net.Analysis.Standard命名空间,在下文中一共展示了StandardTokenizer类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C#代码示例。

示例1: TokenStream

 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     var tokenizer = new StandardTokenizer(Version.LUCENE_30, reader);
     var shingleMatrix = new ShingleMatrixFilter(tokenizer, 2, 8, ' ');
     var lowerCaseFilter = new LowerCaseFilter(shingleMatrix);
     return new StopFilter(true, lowerCaseFilter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
 }
开发者ID:kevinobee,项目名称:autohaus,代码行数:7,代码来源:NGramAnalyzer.cs


示例2: CreateComponents

 /// <summary>
 /// Constructs a <seealso cref="StandardTokenizer"/> filtered by a {@link
 ///    StandardFilter}, a <seealso cref="LowerCaseFilter"/>, a <seealso cref="StopFilter"/>,
 ///    and a <seealso cref="SnowballFilter"/> 
 /// </summary>
 public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
 {
     Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader);
     TokenStream result = new StandardFilter(matchVersion, tokenizer);
     // remove the possessive 's for english stemmers
     if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) && (name.Equals("English") || name.Equals("Porter") || name.Equals("Lovins")))
     {
         result = new EnglishPossessiveFilter(result);
     }
     // Use a special lowercase filter for turkish, the stemmer expects it.
     if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) && name.Equals("Turkish"))
     {
         result = new TurkishLowerCaseFilter(result);
     }
     else
     {
         result = new LowerCaseFilter(matchVersion, result);
     }
     if (stopSet != null)
     {
         result = new StopFilter(matchVersion, result, stopSet);
     }
     result = new SnowballFilter(result, name);
     return new TokenStreamComponents(tokenizer, result);
 }
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:30,代码来源:SnowballAnalyzer.cs


示例3: TokenStream

        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            // This should be a good tokenizer for most European-language documents:
            // Splits words at punctuation characters, removing punctuation.
            // Splits words at hyphens, unless there's a number in the token...
            // Recognizes email addresses and internet hostnames as one token.
            var intput = new StandardTokenizer(Version.LUCENE_30, reader);

            // A ShingleMatrixFilter constructs shingles from a token stream.
            // "2010 Audi RS5 Quattro Coupe" => "2010 Audi", "Audi RS5", "RS5 Quattro", "Quattro Coupe"
            var shingleMatrixOutput = new ShingleMatrixFilter(
                                                // stream from which to construct the matrix
                                                intput,
                                                // minimum number of tokens in any shingle
                                                2,
                                                // maximum number of tokens in any shingle.
                                                8,
                                                // character to use between texts of the token parts in a shingle.
                                                ' ');

            // Normalizes token text to lower case.
            var lowerCaseFilter = new LowerCaseFilter(shingleMatrixOutput);

            // Removes stop words from a token stream.
            return new StopFilter(true, lowerCaseFilter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
        }
开发者ID:alexlapinski,项目名称:autohaus,代码行数:26,代码来源:NGramAnalyzer.cs


示例4: TokenStream

 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     var tokenizer = new StandardTokenizer(Version.LUCENE_30, reader);
     TokenStream filterStream = new StandardFilter(tokenizer);
     TokenStream stream = new StopFilter(true, filterStream, _stopWords, true);
     return stream;
 }
开发者ID:McBits,项目名称:LanguageLib,代码行数:7,代码来源:CaseSensitiveStandardAnalyzer.cs


示例5: TokenStream

 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     TokenStream result = new StandardTokenizer(reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     result = new RuSnowballFilter(result);
     return result;
 }
开发者ID:AzarinSergey,项目名称:learn,代码行数:8,代码来源:RuAnalyzer.cs


示例6: TokenStream

 /// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link
 /// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. 
 /// </summary>
 public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
 {
     TokenStream result = new StandardTokenizer(reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     result = new StopFilter(result, stopSet);
     return result;
 }
开发者ID:ArsenShnurkov,项目名称:beagle-1,代码行数:11,代码来源:StandardAnalyzer.cs


示例7: TokenStream

 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     var tokenizer = new StandardTokenizer(Version.LUCENE_29, reader);
     tokenizer.MaxTokenLength = 255;
     TokenStream filter = new StandardFilter(tokenizer);
     filter = new LowerCaseFilter(filter);
     filter = new StopFilter(false, filter, StandardAnalyzer.STOP_WORDS_SET);
     return new NGramTokenFilter(filter, 2, 6);
 }
开发者ID:j2jensen,项目名称:ravendb,代码行数:9,代码来源:NGramAnalyzer.cs


示例8: TokenStream

 public override TokenStream TokenStream(String fieldName, TextReader reader)
 {
     Tokenizer source = new StandardTokenizer(matchVersion, reader);
     TokenStream sink = new StandardFilter(source);
     sink = new LowerCaseFilter(sink);
     //sink = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), sink, stopSet);
     sink = new CroatianStemFilter(sink, stemmer);
     return sink;
 }
开发者ID:bneuhold,项目名称:pb-dev,代码行数:9,代码来源:CroatianAnalyzer.cs


示例9: TokenStream

 public override TokenStream TokenStream(String fieldName, TextReader reader)
 {
     TokenStream ts = new StandardTokenizer(matchVersion, reader);
     ts = new StandardFilter(ts);
     ts = new ThaiWordFilter(ts);
     ts = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                         ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
     return ts;
 }
开发者ID:synhershko,项目名称:lucene.net,代码行数:9,代码来源:ThaiAnalyzer.cs


示例10: TokenStream

 /** Constructs a {@link StandardTokenizer} filtered by a {@link
  * StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}
  * and a {@link SpanishStemFilter}. */
 public override TokenStream TokenStream(string fieldName, TextReader reader)
 {
     TokenStream result = new StandardTokenizer(Version.LUCENE_24,reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     result = new StopFilter(true,result, stopTable);
     result = new SpanishStemFilter(result);
     return result;
 }
开发者ID:supayhuasi,项目名称:LaGaceta,代码行数:12,代码来源:SpanishAnalyzer.cs


示例11: TokenStream

 public override TokenStream TokenStream(string fieldname, TextReader reader)
 {
     TokenStream result = new StandardTokenizer(_version, reader);
     result = new LowerCaseFilter(result);
     result = new PersianNormalizationFilter(result);
     result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(_version), result, _stoptable);
     result = new PersianStemFilter(result);
     return result;
 }
开发者ID:shahr00z,项目名称:Lucene.Net.Analysis.Fa,代码行数:9,代码来源:PersianAnalyzer.cs


示例12: TokenStream

        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            TokenStream result = new StandardTokenizer(matchVersion, reader);
            result = new StandardFilter(result);
            result = new LowerCaseFilter(result);
            result = new StopFilter(this.enableStopPositionIncrements, result, stoptable);
            result = new BulgarianStemFilter(result);

            return result;
        }
开发者ID:KristianKirov,项目名称:PoshBoutique,代码行数:10,代码来源:BulgarianAnalyzer.cs


示例13: TokenStream

        public override TokenStream TokenStream(string fieldName, TextReader reader)
        {
            var tokenizer = new StandardTokenizer(Version.LUCENE_30, reader);
            tokenizer.MaxTokenLength = 255;
            TokenStream filter = new StandardFilter(tokenizer);
            filter = new LowerCaseFilter(filter);
            filter = new NGramTokenFilter(filter, 2, 255);

            return filter;
        }
开发者ID:jncc,项目名称:topcat,代码行数:10,代码来源:NGramAnalyzer.cs


示例14: TokenStream

 /// <summary>Constructs a <see cref="StandardTokenizer"/> filtered by a {@link
 /// StandardFilter}, a <see cref="LowerCaseFilter"/> and a <see cref="StopFilter"/>. 
 /// </summary>
 public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
 {
     TokenStream result = new StandardTokenizer(matchVersion, reader);
     result = new StandardFilter(result);
     result = new LowerCaseFilter(result);
     if (stopSet != null)
         result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
                                 result, stopSet);
     result = new SnowballFilter(result, name);
     return result;
 }
开发者ID:synhershko,项目名称:lucene.net,代码行数:14,代码来源:SnowballAnalyzer.cs


示例15: TestElision_

 public virtual void TestElision_()
 {
     string test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
     Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(test));
     CharArraySet articles = new CharArraySet(TEST_VERSION_CURRENT, AsSet("l", "M"), false);
     TokenFilter filter = new ElisionFilter(tokenizer, articles);
     IList<string> tas = Filter(filter);
     assertEquals("embrouille", tas[4]);
     assertEquals("O'brian", tas[6]);
     assertEquals("enfin", tas[7]);
 }
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:11,代码来源:TestElision.cs


示例16: TestHugeDoc

 public virtual void TestHugeDoc()
 {
     StringBuilder sb = new StringBuilder();
     char[] whitespace = new char[4094];
     Arrays.Fill(whitespace, ' ');
     sb.Append(whitespace);
     sb.Append("testing 1234");
     string input = sb.ToString();
     StandardTokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
     BaseTokenStreamTestCase.AssertTokenStreamContents(tokenizer, new string[] { "testing", "1234" });
 }
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:11,代码来源:TestStandardAnalyzer.cs


示例17: TokenizingReturnsExpectedTerms

        public void TokenizingReturnsExpectedTerms(string text, TokenAttributes[] expected)
        {
            // Arrange
            var tokenStream = new StandardTokenizer(Version.LUCENE_30, new StringReader(text));
            var filter = new ExpandAcronymsFilter(tokenStream, NuGetAcronymExpansionProvider.Instance);

            // Act
            var actual = filter.Tokenize().ToArray();

            // Assert
            Assert.Equal(expected, actual);
        }
开发者ID:NuGet,项目名称:NuGet.Services.Metadata,代码行数:12,代码来源:ExpandAcronymsFilterTests.cs


示例18: TokenStream

        public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
        {
            TokenStream result = new StandardTokenizer(kLuceneVersion, reader);

            result = new StandardFilter(result);
            result = new LowerCaseFilter(result);
            result = new ASCIIFoldingFilter(result);
            result = new StopFilter(false, result, StopFilter.MakeStopSet(kEnglishStopWords));
            result = new EdgeNGramTokenFilter(
                result, Lucene.Net.Analysis.NGram.EdgeNGramTokenFilter.Side.FRONT, 1, 20);

            return result;
        }
开发者ID:vishalniit,项目名称:SitecoreAutoUpdateIndexBuilder,代码行数:13,代码来源:AutoSearch.cs


示例19: TokenStream

 public override TokenStream TokenStream(string fieldName, TextReader reader) {
     TokenStream result = new StandardTokenizer(this._luceneVersion, reader);
     result = new StandardFilter(result);
     result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(this._luceneVersion),
         result,
         CharArraySet.UnmodifiableSet(new CharArraySet((IEnumerable<string>)FRENCH_STOP_WORDS, false))
     );
     result = new FrenchStemFilter(result, CharArraySet.EMPTY_SET);
     // Convert to lowercase after stemming!
     result = new LowerCaseFilter(result);
     result = new ASCIIFoldingFilter(result);
     return result;
 }
开发者ID:Codinlab,项目名称:Lucene.FrenchAnalyser,代码行数:13,代码来源:FrenchAnalyser.cs


示例20: TestElision2

 public void TestElision2()
 {
     String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
     Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(test));
     HashSet<String> articles = new HashSet<String>();
     articles.Add("l");
     articles.Add("M");
     TokenFilter filter = new ElisionFilter(tokenizer, articles);
     List<string> tas = Filtre(filter);
     Assert.AreEqual("embrouille", tas[4]);
     Assert.AreEqual("O'brian", tas[6]);
     Assert.AreEqual("enfin", tas[7]);
 }
开发者ID:hanabi1224,项目名称:lucene.net,代码行数:13,代码来源:TestElision.cs



注:本文中的Lucene.Net.Analysis.Standard.StandardTokenizer类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
C# Documents.Document类代码示例发布时间:2022-05-26
下一篇:
C# Standard.StandardAnalyzer类代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap