本文整理汇总了Java中org.apache.lucene.analysis.util.CharacterUtils类的典型用法代码示例。如果您正苦于以下问题:Java CharacterUtils类的具体用法?Java CharacterUtils怎么用?Java CharacterUtils使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CharacterUtils类属于org.apache.lucene.analysis.util包,在下文中一共展示了CharacterUtils类的16个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: Lucene43EdgeNGramTokenFilter
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
/**
* Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
*
* @param input {@link org.apache.lucene.analysis.TokenStream} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
public Lucene43EdgeNGramTokenFilter(TokenStream input, int minGram, int maxGram) {
super(input);
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram) {
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
this.charUtils = CharacterUtils.getJava4Instance();
this.minGram = minGram;
this.maxGram = maxGram;
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:23,代码来源:Lucene43EdgeNGramTokenFilter.java
示例2: init
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
private void init(Version version, int minGram, int maxGram, boolean edgesOnly) {
if (!version.onOrAfter(Version.LUCENE_4_4_0)) {
throw new IllegalArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer");
}
charUtils = version.onOrAfter(Version.LUCENE_4_4_0)
? CharacterUtils.getInstance(version)
: CharacterUtils.getJava4Instance();
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram) {
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
this.minGram = minGram;
this.maxGram = maxGram;
this.edgesOnly = edgesOnly;
charBuffer = CharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
buffer = new int[charBuffer.getBuffer().length];
// Make the term att large enough
termAtt.resizeBuffer(2 * maxGram);
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:22,代码来源:NGramTokenizer.java
示例3: MorfologikFilter
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
/**
* @deprecated Use {@link #MorfologikFilter(TokenStream,String)}
*/
@Deprecated
public MorfologikFilter(final TokenStream in, final String dict, final Version version) {
super(in);
this.input = in;
// SOLR-4007: temporarily substitute context class loader to allow finding dictionary resources.
Thread me = Thread.currentThread();
ClassLoader cl = me.getContextClassLoader();
try {
me.setContextClassLoader(morfologik.stemming.Dictionary.class.getClassLoader());
this.stemmer = new DictionaryLookup(morfologik.stemming.Dictionary.getForLanguage(dict));
this.charUtils = CharacterUtils.getInstance(version);
this.lemmaList = Collections.emptyList();
} finally {
me.setContextClassLoader(cl);
}
}
开发者ID:europeana,项目名称:search,代码行数:21,代码来源:MorfologikFilter.java
示例4: PinyinNGramTokenFilter
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
/**
* Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of
* the given range
*
* @param input
* {@link TokenStream} holding the input to be tokenized
* @param side
* the {@link Side} from which to chop off an n-gram
* @param minGram
* the smallest n-gram to generate
* @param maxGram
* the largest n-gram to generate
*/
public PinyinNGramTokenFilter(TokenStream input, int minGram, int maxGram) {
super(input);
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram) {
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
this.charUtils = CharacterUtils.getInstance();
this.minGram = minGram;
this.maxGram = maxGram;
}
开发者ID:liangbaolin,项目名称:pinyinAnalyzer,代码行数:29,代码来源:PinyinNGramTokenFilter.java
示例5: MorfologikFilter
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
/**
* Builds a filter for given PolishStemmer.DICTIONARY enum.
*
* @param in input token stream
* @param dict PolishStemmer.DICTIONARY enum
* @param version Lucene version compatibility for lowercasing.
*/
public MorfologikFilter(final TokenStream in, final DICTIONARY dict, final Version version) {
super(in);
this.input = in;
// SOLR-4007: temporarily substitute context class loader to allow finding dictionary resources.
Thread me = Thread.currentThread();
ClassLoader cl = me.getContextClassLoader();
try {
me.setContextClassLoader(PolishStemmer.class.getClassLoader());
this.stemmer = new PolishStemmer(dict);
this.charUtils = CharacterUtils.getInstance(version);
this.lemmaList = Collections.emptyList();
} finally {
me.setContextClassLoader(cl);
}
}
开发者ID:pkarmstr,项目名称:NYBC,代码行数:24,代码来源:MorfologikFilter.java
示例6: init
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
private void init(Version version, int minGram, int maxGram, boolean edgesOnly) {
if (!version.onOrAfter(Version.LUCENE_44)) {
throw new IllegalArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer/Lucene43EdgeNGramTokenizer");
}
charUtils = version.onOrAfter(Version.LUCENE_44)
? CharacterUtils.getInstance(version)
: CharacterUtils.getJava4Instance();
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram) {
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
this.minGram = minGram;
this.maxGram = maxGram;
this.edgesOnly = edgesOnly;
charBuffer = CharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
buffer = new int[charBuffer.getBuffer().length];
// Make the term att large enough
termAtt.resizeBuffer(2 * maxGram);
}
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:22,代码来源:NGramTokenizer.java
示例7: MorfologikFilter
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
/**
* Creates MorfologikFilter
* @param in input token stream
* @param version Lucene version compatibility for lowercasing.
*/
public MorfologikFilter(final TokenStream in, final Version version) {
super(in);
this.input = in;
// SOLR-4007: temporarily substitute context class loader to allow finding dictionary resources.
Thread me = Thread.currentThread();
ClassLoader cl = me.getContextClassLoader();
try {
me.setContextClassLoader(PolishStemmer.class.getClassLoader());
this.stemmer = new PolishStemmer();
this.charUtils = CharacterUtils.getInstance(version);
this.lemmaList = Collections.emptyList();
} finally {
me.setContextClassLoader(cl);
}
}
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:22,代码来源:MorfologikFilter.java
示例8: Lucene43NGramTokenFilter
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
/**
* Creates Lucene43NGramTokenFilter with given min and max n-grams.
* @param input {@link org.apache.lucene.analysis.TokenStream} holding the input to be tokenized
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
public Lucene43NGramTokenFilter(TokenStream input, int minGram, int maxGram) {
super(new CodepointCountFilter(input, minGram, Integer.MAX_VALUE));
this.charUtils = CharacterUtils.getJava4Instance();
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram) {
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
this.minGram = minGram;
this.maxGram = maxGram;
posIncAtt = new PositionIncrementAttribute() {
@Override
public void setPositionIncrement(int positionIncrement) {}
@Override
public int getPositionIncrement() {
return 0;
}
};
posLenAtt = new PositionLengthAttribute() {
@Override
public void setPositionLength(int positionLength) {}
@Override
public int getPositionLength() {
return 0;
}
};
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:36,代码来源:Lucene43NGramTokenFilter.java
示例9: EdgeNGramTokenFilter
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
/**
* @deprecated For {@link Version#LUCENE_4_3_0} or below, use {@link Lucene43EdgeNGramTokenFilter}, otherwise use {@link #EdgeNGramTokenFilter(TokenStream, int, int)}
*/
@Deprecated
public EdgeNGramTokenFilter(Version version, TokenStream input, Side side, int minGram, int maxGram) {
super(input);
if (version.onOrAfter(Version.LUCENE_4_4) && side == Side.BACK) {
throw new IllegalArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
}
if (side == null) {
throw new IllegalArgumentException("sideLabel must be either front or back");
}
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram) {
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
this.version = version;
this.charUtils = version.onOrAfter(Version.LUCENE_4_4)
? CharacterUtils.getInstance(version)
: CharacterUtils.getJava4Instance();
this.minGram = minGram;
this.maxGram = maxGram;
this.side = side;
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:32,代码来源:EdgeNGramTokenFilter.java
示例10: CharArrayMap
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
/**
* @deprecated Use {@link #CharArrayMap(int, boolean)}
*/
@Deprecated
@SuppressWarnings("unchecked")
public CharArrayMap(Version matchVersion, int startSize, boolean ignoreCase) {
this.ignoreCase = ignoreCase;
int size = INIT_SIZE;
while(startSize + (startSize>>2) > size)
size <<= 1;
keys = new char[size][];
values = (V[]) new Object[size];
this.charUtils = CharacterUtils.getInstance(matchVersion);
this.matchVersion = matchVersion;
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:16,代码来源:CharArrayMap.java
示例11: EdgeNGramTokenFilter
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
/**
* Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
*
* @param version the <a href="#version">Lucene match version</a>
* @param input {@link TokenStream} holding the input to be tokenized
* @param side the {@link Side} from which to chop off an n-gram
* @param minGram the smallest n-gram to generate
* @param maxGram the largest n-gram to generate
*/
@Deprecated
public EdgeNGramTokenFilter(Version version, TokenStream input, Side side, int minGram, int maxGram) {
super(input);
if (version == null) {
throw new IllegalArgumentException("version must not be null");
}
if (version.onOrAfter(Version.LUCENE_44) && side == Side.BACK) {
throw new IllegalArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
}
if (side == null) {
throw new IllegalArgumentException("sideLabel must be either front or back");
}
if (minGram < 1) {
throw new IllegalArgumentException("minGram must be greater than zero");
}
if (minGram > maxGram) {
throw new IllegalArgumentException("minGram must not be greater than maxGram");
}
this.version = version;
this.charUtils = version.onOrAfter(Version.LUCENE_44)
? CharacterUtils.getInstance(version)
: CharacterUtils.getJava4Instance();
this.minGram = minGram;
this.maxGram = maxGram;
this.side = side;
}
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:42,代码来源:EdgeNGramTokenFilter.java
示例12: GreekLowerCaseFilter
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
/**
* @deprecated Use {@link #GreekLowerCaseFilter(TokenStream)}
*/
@Deprecated
public GreekLowerCaseFilter(Version matchVersion, TokenStream in) {
super(in);
this.charUtils = CharacterUtils.getInstance(matchVersion);
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:9,代码来源:GreekLowerCaseFilter.java
示例13: LowerCaseFilter
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
/**
* @deprecated Use {@link #LowerCaseFilter(TokenStream)}
*/
@Deprecated
public LowerCaseFilter(Version matchVersion, TokenStream in) {
super(in);
charUtils = CharacterUtils.getInstance(matchVersion);
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:9,代码来源:LowerCaseFilter.java
示例14: UpperCaseFilter
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
/**
* @deprecated Use {@link #UpperCaseFilter(TokenStream)}
*/
@Deprecated
public UpperCaseFilter(Version matchVersion, TokenStream in) {
super(in);
charUtils = CharacterUtils.getInstance(matchVersion);
}
开发者ID:lamsfoundation,项目名称:lams,代码行数:9,代码来源:UpperCaseFilter.java
示例15: TypeTokenizer
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
public TypeTokenizer(Reader input) {
super(input);
charUtils = CharacterUtils.getInstance();
}
开发者ID:gncloud,项目名称:fastcatsearch3,代码行数:5,代码来源:TypeTokenizer.java
示例16: WDSTokenizer
import org.apache.lucene.analysis.util.CharacterUtils; //导入依赖的package包/类
public WDSTokenizer(Reader in, boolean useSmart) {
offsetAtt = addAttribute(OffsetAttribute.class);
termAtt = addAttribute(CharTermAttribute.class);
charUtils = CharacterUtils.getInstance();
}
开发者ID:weidays,项目名称:WDSAnalyzer,代码行数:6,代码来源:WDSTokenizer.java
注:本文中的org.apache.lucene.analysis.util.CharacterUtils类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论