本文整理汇总了C#中Lucene.Net.Analysis.Standard.StandardTokenizer类的典型用法代码示例。如果您正苦于以下问题:C# StandardTokenizer类的具体用法?C# StandardTokenizer怎么用?C# StandardTokenizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
StandardTokenizer类属于Lucene.Net.Analysis.Standard命名空间,在下文中一共展示了StandardTokenizer类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C#代码示例。
示例1: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
var tokenizer = new StandardTokenizer(Version.LUCENE_30, reader);
var shingleMatrix = new ShingleMatrixFilter(tokenizer, 2, 8, ' ');
var lowerCaseFilter = new LowerCaseFilter(shingleMatrix);
return new StopFilter(true, lowerCaseFilter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
}
开发者ID:kevinobee,项目名称:autohaus,代码行数:7,代码来源:NGramAnalyzer.cs
示例2: CreateComponents
/// <summary>
/// Constructs a <seealso cref="StandardTokenizer"/> filtered by a {@link
/// StandardFilter}, a <seealso cref="LowerCaseFilter"/>, a <seealso cref="StopFilter"/>,
/// and a <seealso cref="SnowballFilter"/>
/// </summary>
public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
Tokenizer tokenizer = new StandardTokenizer(matchVersion, reader);
TokenStream result = new StandardFilter(matchVersion, tokenizer);
// remove the possessive 's for english stemmers
if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) && (name.Equals("English") || name.Equals("Porter") || name.Equals("Lovins")))
{
result = new EnglishPossessiveFilter(result);
}
// Use a special lowercase filter for turkish, the stemmer expects it.
if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31) && name.Equals("Turkish"))
{
result = new TurkishLowerCaseFilter(result);
}
else
{
result = new LowerCaseFilter(matchVersion, result);
}
if (stopSet != null)
{
result = new StopFilter(matchVersion, result, stopSet);
}
result = new SnowballFilter(result, name);
return new TokenStreamComponents(tokenizer, result);
}
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:30,代码来源:SnowballAnalyzer.cs
示例3: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
// This should be a good tokenizer for most European-language documents:
// Splits words at punctuation characters, removing punctuation.
// Splits words at hyphens, unless there's a number in the token...
// Recognizes email addresses and internet hostnames as one token.
var intput = new StandardTokenizer(Version.LUCENE_30, reader);
// A ShingleMatrixFilter constructs shingles from a token stream.
// "2010 Audi RS5 Quattro Coupe" => "2010 Audi", "Audi RS5", "RS5 Quattro", "Quattro Coupe"
var shingleMatrixOutput = new ShingleMatrixFilter(
// stream from which to construct the matrix
intput,
// minimum number of tokens in any shingle
2,
// maximum number of tokens in any shingle.
8,
// character to use between texts of the token parts in a shingle.
' ');
// Normalizes token text to lower case.
var lowerCaseFilter = new LowerCaseFilter(shingleMatrixOutput);
// Removes stop words from a token stream.
return new StopFilter(true, lowerCaseFilter, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
}
开发者ID:alexlapinski,项目名称:autohaus,代码行数:26,代码来源:NGramAnalyzer.cs
示例4: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
var tokenizer = new StandardTokenizer(Version.LUCENE_30, reader);
TokenStream filterStream = new StandardFilter(tokenizer);
TokenStream stream = new StopFilter(true, filterStream, _stopWords, true);
return stream;
}
开发者ID:McBits,项目名称:LanguageLib,代码行数:7,代码来源:CaseSensitiveStandardAnalyzer.cs
示例5: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
TokenStream result = new StandardTokenizer(reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new RuSnowballFilter(result);
return result;
}
开发者ID:AzarinSergey,项目名称:learn,代码行数:8,代码来源:RuAnalyzer.cs
示例6: TokenStream
/// <summary>Constructs a {@link StandardTokenizer} filtered by a {@link
/// StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
/// </summary>
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
{
TokenStream result = new StandardTokenizer(reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopSet);
return result;
}
开发者ID:ArsenShnurkov,项目名称:beagle-1,代码行数:11,代码来源:StandardAnalyzer.cs
示例7: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
var tokenizer = new StandardTokenizer(Version.LUCENE_29, reader);
tokenizer.MaxTokenLength = 255;
TokenStream filter = new StandardFilter(tokenizer);
filter = new LowerCaseFilter(filter);
filter = new StopFilter(false, filter, StandardAnalyzer.STOP_WORDS_SET);
return new NGramTokenFilter(filter, 2, 6);
}
开发者ID:j2jensen,项目名称:ravendb,代码行数:9,代码来源:NGramAnalyzer.cs
示例8: TokenStream
public override TokenStream TokenStream(String fieldName, TextReader reader)
{
Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream sink = new StandardFilter(source);
sink = new LowerCaseFilter(sink);
//sink = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), sink, stopSet);
sink = new CroatianStemFilter(sink, stemmer);
return sink;
}
开发者ID:bneuhold,项目名称:pb-dev,代码行数:9,代码来源:CroatianAnalyzer.cs
示例9: TokenStream
public override TokenStream TokenStream(String fieldName, TextReader reader)
{
TokenStream ts = new StandardTokenizer(matchVersion, reader);
ts = new StandardFilter(ts);
ts = new ThaiWordFilter(ts);
ts = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
return ts;
}
开发者ID:synhershko,项目名称:lucene.net,代码行数:9,代码来源:ThaiAnalyzer.cs
示例10: TokenStream
/** Constructs a {@link StandardTokenizer} filtered by a {@link
* StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}
* and a {@link SpanishStemFilter}. */
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
TokenStream result = new StandardTokenizer(Version.LUCENE_24,reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new StopFilter(true,result, stopTable);
result = new SpanishStemFilter(result);
return result;
}
开发者ID:supayhuasi,项目名称:LaGaceta,代码行数:12,代码来源:SpanishAnalyzer.cs
示例11: TokenStream
public override TokenStream TokenStream(string fieldname, TextReader reader)
{
TokenStream result = new StandardTokenizer(_version, reader);
result = new LowerCaseFilter(result);
result = new PersianNormalizationFilter(result);
result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(_version), result, _stoptable);
result = new PersianStemFilter(result);
return result;
}
开发者ID:shahr00z,项目名称:Lucene.Net.Analysis.Fa,代码行数:9,代码来源:PersianAnalyzer.cs
示例12: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
TokenStream result = new StandardTokenizer(matchVersion, reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new StopFilter(this.enableStopPositionIncrements, result, stoptable);
result = new BulgarianStemFilter(result);
return result;
}
开发者ID:KristianKirov,项目名称:PoshBoutique,代码行数:10,代码来源:BulgarianAnalyzer.cs
示例13: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
var tokenizer = new StandardTokenizer(Version.LUCENE_30, reader);
tokenizer.MaxTokenLength = 255;
TokenStream filter = new StandardFilter(tokenizer);
filter = new LowerCaseFilter(filter);
filter = new NGramTokenFilter(filter, 2, 255);
return filter;
}
开发者ID:jncc,项目名称:topcat,代码行数:10,代码来源:NGramAnalyzer.cs
示例14: TokenStream
/// <summary>Constructs a <see cref="StandardTokenizer"/> filtered by a {@link
/// StandardFilter}, a <see cref="LowerCaseFilter"/> and a <see cref="StopFilter"/>.
/// </summary>
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
{
TokenStream result = new StandardTokenizer(matchVersion, reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
if (stopSet != null)
result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion),
result, stopSet);
result = new SnowballFilter(result, name);
return result;
}
开发者ID:synhershko,项目名称:lucene.net,代码行数:14,代码来源:SnowballAnalyzer.cs
示例15: TestElision_
public virtual void TestElision_()
{
string test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(test));
CharArraySet articles = new CharArraySet(TEST_VERSION_CURRENT, AsSet("l", "M"), false);
TokenFilter filter = new ElisionFilter(tokenizer, articles);
IList<string> tas = Filter(filter);
assertEquals("embrouille", tas[4]);
assertEquals("O'brian", tas[6]);
assertEquals("enfin", tas[7]);
}
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:11,代码来源:TestElision.cs
示例16: TestHugeDoc
public virtual void TestHugeDoc()
{
StringBuilder sb = new StringBuilder();
char[] whitespace = new char[4094];
Arrays.Fill(whitespace, ' ');
sb.Append(whitespace);
sb.Append("testing 1234");
string input = sb.ToString();
StandardTokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
BaseTokenStreamTestCase.AssertTokenStreamContents(tokenizer, new string[] { "testing", "1234" });
}
开发者ID:ChristopherHaws,项目名称:lucenenet,代码行数:11,代码来源:TestStandardAnalyzer.cs
示例17: TokenizingReturnsExpectedTerms
public void TokenizingReturnsExpectedTerms(string text, TokenAttributes[] expected)
{
// Arrange
var tokenStream = new StandardTokenizer(Version.LUCENE_30, new StringReader(text));
var filter = new ExpandAcronymsFilter(tokenStream, NuGetAcronymExpansionProvider.Instance);
// Act
var actual = filter.Tokenize().ToArray();
// Assert
Assert.Equal(expected, actual);
}
开发者ID:NuGet,项目名称:NuGet.Services.Metadata,代码行数:12,代码来源:ExpandAcronymsFilterTests.cs
示例18: TokenStream
public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
{
TokenStream result = new StandardTokenizer(kLuceneVersion, reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new ASCIIFoldingFilter(result);
result = new StopFilter(false, result, StopFilter.MakeStopSet(kEnglishStopWords));
result = new EdgeNGramTokenFilter(
result, Lucene.Net.Analysis.NGram.EdgeNGramTokenFilter.Side.FRONT, 1, 20);
return result;
}
开发者ID:vishalniit,项目名称:SitecoreAutoUpdateIndexBuilder,代码行数:13,代码来源:AutoSearch.cs
示例19: TokenStream
public override TokenStream TokenStream(string fieldName, TextReader reader) {
TokenStream result = new StandardTokenizer(this._luceneVersion, reader);
result = new StandardFilter(result);
result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(this._luceneVersion),
result,
CharArraySet.UnmodifiableSet(new CharArraySet((IEnumerable<string>)FRENCH_STOP_WORDS, false))
);
result = new FrenchStemFilter(result, CharArraySet.EMPTY_SET);
// Convert to lowercase after stemming!
result = new LowerCaseFilter(result);
result = new ASCIIFoldingFilter(result);
return result;
}
开发者ID:Codinlab,项目名称:Lucene.FrenchAnalyser,代码行数:13,代码来源:FrenchAnalyser.cs
示例20: TestElision2
public void TestElision2()
{
String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(test));
HashSet<String> articles = new HashSet<String>();
articles.Add("l");
articles.Add("M");
TokenFilter filter = new ElisionFilter(tokenizer, articles);
List<string> tas = Filtre(filter);
Assert.AreEqual("embrouille", tas[4]);
Assert.AreEqual("O'brian", tas[6]);
Assert.AreEqual("enfin", tas[7]);
}
开发者ID:hanabi1224,项目名称:lucene.net,代码行数:13,代码来源:TestElision.cs
注:本文中的Lucene.Net.Analysis.Standard.StandardTokenizer类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论