本文整理汇总了Java中weka.core.tokenizers.Tokenizer类的典型用法代码示例。如果您正苦于以下问题:Java Tokenizer类的具体用法?Java Tokenizer怎么用?Java Tokenizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Tokenizer类属于weka.core.tokenizers包,在下文中一共展示了Tokenizer类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
@OptionMetadata(displayName = "tokenizer",
description = "The tokenizing algorithm to use on the tweets. Uses the CMU TweetNLP tokenizer as default",
commandLineParamName = "tokenizer",
commandLineParamSynopsis = "-tokenizer <string>", displayOrder = 3)
public Tokenizer getTokenizer() {
return m_tokenizer;
}
开发者ID:felipebravom,项目名称:AffectiveTweets,代码行数:8,代码来源:TweetToFeatureVector.java
示例2: getTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
@OptionMetadata(displayName = "tokenizer",
description = "The tokenizing algorithm to use on the tweets. Uses the CMU TweetNLP tokenizer as default",
commandLineParamName = "tokenizer",
commandLineParamSynopsis = "-tokenizer <string>", displayOrder = 1)
public Tokenizer getTokenizer() {
return m_tokenizer;
}
开发者ID:felipebravom,项目名称:AffectiveTweets,代码行数:8,代码来源:LexiconDistantSupervision.java
示例3: tokenize
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* Tokenizes a String
* @param content the content
* @param toLowerCase true for lowercasing the content
* @param standarizeUrlsUsers true for standarizing urls and users
* @param reduceRepeatedLetters true for reduing repeated letters
* @param tokenizer the tokenizer
* @param stemmer the stemmer
* @param stop the stopwords handler
* @return a list of tokens
*/
static public List<String> tokenize(String content, boolean toLowerCase, boolean standarizeUrlsUsers, boolean reduceRepeatedLetters, Tokenizer tokenizer, Stemmer stemmer, StopwordsHandler stop) {
if (toLowerCase)
content = content.toLowerCase();
// if a letters appears two or more times it is replaced by only two
// occurrences of it
if (reduceRepeatedLetters)
content = content.replaceAll("([a-z])\\1+", "$1$1");
List<String> tokens = new ArrayList<String>();
tokenizer.tokenize(content);
for(;tokenizer.hasMoreElements();){
String token=tokenizer.nextElement();
if(!stop.isStopword(token)){
if (standarizeUrlsUsers) {
// Replace URLs to a generic URL
if (token.matches("http.*|ww\\..*|www\\..*")) {
token="http://www.url.com";
}
// Replaces user mentions to a generic user
else if (token.matches("@.*")) {
token="@user";
}
}
tokens.add(stemmer.stem(token));
}
}
return tokens;
}
开发者ID:felipebravom,项目名称:AffectiveTweets,代码行数:49,代码来源:Utils.java
示例4: setTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
public void setTokenizer(Tokenizer m_tokenizer) {
this.m_tokenizer = m_tokenizer;
}
开发者ID:felipebravom,项目名称:AffectiveTweets,代码行数:4,代码来源:TweetToFeatureVector.java
示例5: setTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* the tokenizer algorithm to use.
*
* @param value the configured tokenizing algorithm
*/
public void setTokenizer(Tokenizer value) {
m_tokenizer = value;
}
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:9,代码来源:NaiveBayesMultinomialText.java
示例6: getTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* Returns the current tokenizer algorithm.
*
* @return the current tokenizer algorithm
*/
public Tokenizer getTokenizer() {
return m_tokenizer;
}
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:9,代码来源:NaiveBayesMultinomialText.java
示例7: setTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* the tokenizer algorithm to use.
*
* @param value the configured tokenizing algorithm
*/
public void setTokenizer(Tokenizer value) {
m_tokenizer = value;
}
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:9,代码来源:SGDText.java
示例8: getTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* Returns the current tokenizer algorithm.
*
* @return the current tokenizer algorithm
*/
public Tokenizer getTokenizer() {
return m_tokenizer;
}
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:9,代码来源:SGDText.java
示例9: setTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* the tokenizer algorithm to use.
*
* @param value the configured tokenizing algorithm
*/
public void setTokenizer(Tokenizer value) {
m_Tokenizer = value;
}
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:9,代码来源:StringToWordVector.java
示例10: getTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* Returns the current tokenizer algorithm.
*
* @return the current tokenizer algorithm
*/
public Tokenizer getTokenizer() {
return m_Tokenizer;
}
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:9,代码来源:StringToWordVector.java
示例11: setTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* the tokenizer algorithm to use.
*
* @param value the configured tokenizing algorithm
*/
public void setTokenizer(Tokenizer value) {
m_tokenizer = value;
}
开发者ID:dsibournemouth,项目名称:autoweka,代码行数:9,代码来源:NaiveBayesMultinomialText.java
示例12: getTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* Returns the current tokenizer algorithm.
*
* @return the current tokenizer algorithm
*/
public Tokenizer getTokenizer() {
return m_tokenizer;
}
开发者ID:dsibournemouth,项目名称:autoweka,代码行数:9,代码来源:NaiveBayesMultinomialText.java
示例13: setTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* the tokenizer algorithm to use.
*
* @param value the configured tokenizing algorithm
*/
public void setTokenizer(Tokenizer value) {
m_Tokenizer = value;
}
开发者ID:dsibournemouth,项目名称:autoweka,代码行数:9,代码来源:StringToWordVector.java
示例14: getTokenizer
import weka.core.tokenizers.Tokenizer; //导入依赖的package包/类
/**
* Returns the current tokenizer algorithm.
*
* @return the current tokenizer algorithm
*/
public Tokenizer getTokenizer() {
return m_Tokenizer;
}
开发者ID:dsibournemouth,项目名称:autoweka,代码行数:9,代码来源:StringToWordVector.java
注:本文中的weka.core.tokenizers.Tokenizer类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论