本文整理汇总了Java中cmu.arktweetnlp.Tagger.TaggedToken类的典型用法代码示例。如果您正苦于以下问题:Java TaggedToken类的具体用法?Java TaggedToken怎么用?Java TaggedToken使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
TaggedToken类属于cmu.arktweetnlp.Tagger包,在下文中一共展示了TaggedToken类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: createFromTaggedTokens
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public static TweetTfIdf createFromTaggedTokens(
List<List<TaggedToken>> tweets, TfType type,
TfIdfNormalization normalization, boolean usePOSTags) {
TweetTfIdf tweetTfIdf = new TweetTfIdf(type, normalization, usePOSTags);
tweetTfIdf.m_termFreqs = tfTaggedTokenTweets(tweets, type, usePOSTags);
tweetTfIdf.m_inverseDocFreq = idf(tweetTfIdf.m_termFreqs);
tweetTfIdf.m_termIds = new HashMap<String, Integer>();
int i = 0;
for (String key : tweetTfIdf.m_inverseDocFreq.keySet()) {
tweetTfIdf.m_termIds.put(key, i);
i++;
}
LOG.info("Found " + tweetTfIdf.m_inverseDocFreq.size() + " terms");
// Debug
// print("Term Frequency", m_termFreqs, m_inverseDocFreq);
// print("Inverse Document Frequency", m_inverseDocFreq);
return tweetTfIdf;
}
开发者ID:millecker,项目名称:senti-storm,代码行数:23,代码来源:TweetTfIdf.java
示例2: execute
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
String text = tuple.getStringByField("text");
List<TaggedToken> taggedTokens = (List<TaggedToken>) tuple
.getValueByField("taggedTokens");
// Generate Feature Vector
Map<Integer, Double> featureVector = m_fvg
.generateFeatureVector(taggedTokens);
if (m_logging) {
LOG.info("Tweet: " + text + " FeatureVector: " + featureVector);
}
// Emit new tuples
collector.emit(new Values(text, featureVector));
}
开发者ID:millecker,项目名称:senti-storm,代码行数:18,代码来源:FeatureGenerationBolt.java
示例3: execute
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
String text = tuple.getStringByField("text");
List<String> preprocessedTokens = (List<String>) tuple
.getValueByField("preprocessedTokens");
// POS Tagging
List<TaggedToken> taggedTokens = tag(preprocessedTokens);
if (m_logging) {
LOG.info("Tweet: " + taggedTokens);
}
// Emit new tuples
collector.emit(new Values(text, taggedTokens));
}
开发者ID:millecker,项目名称:senti-storm,代码行数:17,代码来源:POSTaggerBolt.java
示例4: main
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public static void main(String[] args) {
Tagger t = new Tagger();
try {
t.loadModel("model.20120919");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.out.println("Invalid model file.");
}
int[] labels = new int[t.model.labelVocab.size()];
for(int i = 0; i < labels.length; i++){
String l = t.model.labelVocab.name(i);
System.out.printf("%s\t%s\n", i, l);
}
String tweet = "@Dolb this is very bad #way \"http://bitly.com/44\" :)";
// Removing ARK_TWEET_NLP
List<TaggedToken> tt = t.tokenizeAndTag(tweet);
for(TaggedToken a: tt){
System.out.println(a.token+":\t"+a.tag);
}
}
开发者ID:uiuc-ischool-scanr,项目名称:SAIL,代码行数:26,代码来源:POSFeatures.java
示例5: printInfo
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public void printInfo(){
if(taggedTokens == null){
System.out.println("Please call getPOSCounts first");
}
for(TaggedToken tt: taggedTokens){
System.out.printf("%s\t%s,\t",tt.token,tt.tag);
}
System.out.println("\nCounts");
for(String key: posCounts.keySet()){
System.out.printf("%s\t%s\n",key, posCounts.get(key));
}
System.out.println();
}
开发者ID:uiuc-ischool-scanr,项目名称:SAIL,代码行数:17,代码来源:POSFeatures.java
示例6: tag
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public List<String> tag(List<String> tokens) {
sentence = new Sentence();
tt = new TaggedToken();
sentence.tokens = tokens;
ms = new ModelSentence(sentence.T());
featureExtractor.computeFeatures(sentence, ms);
model.greedyDecode(ms, false);
ArrayList<String> taggedTokens = new ArrayList<String>(sentence.T());
for (int t = 0; t < sentence.T(); t++) {
taggedTokens.add(model.labelVocab.name(ms.labels[t]));
}
return taggedTokens;
}
开发者ID:weizh,项目名称:geolocator-3.0,代码行数:18,代码来源:ENTweetPOSTagger.java
示例7: generateFeatureVectorFromTaggedTokens
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
@Override
public Map<Integer, Double> generateFeatureVectorFromTaggedTokens(
List<TaggedToken> tweet) {
if (m_useTaggedWords) {
throw new RuntimeException(
"Use TaggedWords was set to true! generateFeatureVectorFromTaggedTokens is not applicable!");
}
Map<Integer, Double> featureVector = m_sentimentFeatureVectorGenerator
.generateFeatureVectorFromTaggedTokens(tweet);
featureVector.putAll(m_POSFeatureVectorGenerator
.generateFeatureVectorFromTaggedTokens(tweet));
featureVector.putAll(m_tfidfFeatureVectorGenerator
.generateFeatureVectorFromTaggedTokens(tweet));
return featureVector;
}
开发者ID:millecker,项目名称:storm-apps,代码行数:20,代码来源:CombinedFeatureVectorGenerator.java
示例8: execute
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
List<TaggedToken> taggedTokens = (List<TaggedToken>) tuple
.getValueByField("taggedTokens");
// Generate Feature Vector
Map<Integer, Double> featureVector = m_fvg
.generateFeatureVectorFromTaggedTokens(taggedTokens);
if (m_logging) {
LOG.info("Tweet: " + featureVector);
}
// Emit new tuples
collector.emit(new Values(featureVector));
}
开发者ID:millecker,项目名称:storm-apps,代码行数:17,代码来源:FeatureGenerationBolt.java
示例9: generateFeatureVector
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
@Override
public Map<Integer, Double> generateFeatureVector(
List<TaggedToken> taggedTokens) {
Map<Integer, Double> resultFeatureVector = new TreeMap<Integer, Double>();
double[] posTags = countPOSTagsFromTaggedTokens(taggedTokens, m_normalize);
if (posTags != null) {
if (posTags[0] != 0) // nouns
resultFeatureVector.put(m_vectorStartId, posTags[0]);
if (posTags[1] != 0) // verb
resultFeatureVector.put(m_vectorStartId + 1, posTags[1]);
if (posTags[2] != 0) // adjective
resultFeatureVector.put(m_vectorStartId + 2, posTags[2]);
if (posTags[3] != 0) // adverb
resultFeatureVector.put(m_vectorStartId + 3, posTags[3]);
if (posTags[4] != 0) // interjection
resultFeatureVector.put(m_vectorStartId + 4, posTags[4]);
if (posTags[5] != 0) // punctuation
resultFeatureVector.put(m_vectorStartId + 5, posTags[5]);
if (posTags[6] != 0) // hashtag
resultFeatureVector.put(m_vectorStartId + 6, posTags[6]);
if (posTags[7] != 0) // emoticon
resultFeatureVector.put(m_vectorStartId + 7, posTags[7]);
}
if (LOGGING) {
LOG.info("POStags: " + Arrays.toString(posTags));
}
return resultFeatureVector;
}
开发者ID:millecker,项目名称:senti-storm,代码行数:29,代码来源:POSFeatureVectorGenerator.java
示例10: countPOSTagsFromTaggedTokens
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
private double[] countPOSTagsFromTaggedTokens(List<TaggedToken> taggedTokens,
boolean normalize) {
// 8 = [NOUN, VERB, ADJECTIVE, ADVERB, INTERJECTION, PUNCTUATION, HASHTAG,
// EMOTICON]
double[] posTags = new double[] { 0d, 0d, 0d, 0d, 0d, 0d, 0d, 0d };
int wordCount = 0;
for (TaggedToken word : taggedTokens) {
wordCount++;
String arkTag = word.tag;
// http://www.ark.cs.cmu.edu/TweetNLP/annot_guidelines.pdf
if (arkTag.equals("N") || arkTag.equals("O") || arkTag.equals("^")
|| arkTag.equals("Z")) {
posTags[0]++;
} else if (arkTag.equals("V") || arkTag.equals("T")) {
posTags[1]++;
} else if (arkTag.equals("A")) {
posTags[2]++;
} else if (arkTag.equals("R")) {
posTags[3]++;
} else if (arkTag.equals("!")) {
posTags[4]++;
} else if (arkTag.equals(",")) {
posTags[5]++;
} else if (arkTag.equals("#")) {
posTags[6]++;
} else if (arkTag.equals("E")) {
posTags[7]++;
}
}
if (normalize) {
for (int i = 0; i < posTags.length; i++) {
posTags[i] /= wordCount;
}
}
return posTags;
}
开发者ID:millecker,项目名称:senti-storm,代码行数:37,代码来源:POSFeatureVectorGenerator.java
示例11: generateFeatureVector
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
@Override
public Map<Integer, Double> generateFeatureVector(List<TaggedToken> tweet) {
Map<Integer, Double> featureVector = m_sentimentFeatureVectorGenerator
.generateFeatureVector(tweet);
featureVector.putAll(m_POSFeatureVectorGenerator
.generateFeatureVector(tweet));
featureVector.putAll(m_tfidfFeatureVectorGenerator
.generateFeatureVector(tweet));
return featureVector;
}
开发者ID:millecker,项目名称:senti-storm,代码行数:14,代码来源:CombinedFeatureVectorGenerator.java
示例12: generateFeatureVectors
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public List<Map<Integer, Double>> generateFeatureVectors(
List<List<TaggedToken>> taggedTweets, boolean logging) {
List<Map<Integer, Double>> featuredVectors = new ArrayList<Map<Integer, Double>>();
for (List<TaggedToken> tweet : taggedTweets) {
Map<Integer, Double> featureVector = generateFeatureVector(tweet);
if (logging) {
LOG.info("Tweet: " + tweet);
LOG.info("FeatureVector: " + featureVector);
}
featuredVectors.add(featureVector);
}
return featuredVectors;
}
开发者ID:millecker,项目名称:senti-storm,代码行数:14,代码来源:FeatureVectorGenerator.java
示例13: generateFeatureVector
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
@Override
public Map<Integer, Double> generateFeatureVector(
List<TaggedToken> taggedTokens) {
Map<Integer, SentimentResult> tweetSentiments = m_sentimentDict
.getSentenceSentiment(taggedTokens);
return generateFeatureVector(tweetSentiments);
}
开发者ID:millecker,项目名称:senti-storm,代码行数:10,代码来源:SentimentFeatureVectorGenerator.java
示例14: getSentenceSentiment
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public Map<Integer, SentimentResult> getSentenceSentiment(
List<TaggedToken> sentence) {
Map<Integer, SentimentResult> sentenceSentiments = new HashMap<Integer, SentimentResult>();
if (LOGGING) {
LOG.info("TaggedSentence: " + sentence.toString());
}
for (TaggedToken word : sentence) {
Map<Integer, Double> wordSentiments = getWordSentiment(word.token,
word.tag, false);
if (wordSentiments != null) {
for (Map.Entry<Integer, Double> wordSentiment : wordSentiments
.entrySet()) {
int key = wordSentiment.getKey();
double sentimentScore = wordSentiment.getValue();
SentimentResult sentimentResult = sentenceSentiments.get(key);
if (sentimentResult == null) {
sentimentResult = new SentimentResult();
}
// add score value
sentimentResult.addScore(sentimentScore);
// update sentimentResult
sentenceSentiments.put(key, sentimentResult);
}
}
}
if (LOGGING) {
LOG.info("Sentiment: " + sentenceSentiments);
}
return (sentenceSentiments.size() > 0) ? sentenceSentiments : null;
}
开发者ID:millecker,项目名称:senti-storm,代码行数:33,代码来源:SentimentDictionary.java
示例15: getSentiment
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public List<Map<Integer, SentimentResult>> getSentiment(
List<List<TaggedToken>> tweets) {
List<Map<Integer, SentimentResult>> tweetSentiments = new ArrayList<Map<Integer, SentimentResult>>();
for (List<TaggedToken> tweet : tweets) {
tweetSentiments.add(getSentenceSentiment(tweet));
}
return tweetSentiments;
}
开发者ID:millecker,项目名称:senti-storm,代码行数:9,代码来源:SentimentDictionary.java
示例16: FeaturedTweet
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public FeaturedTweet(long id, String text, double score, List<String> tokens,
List<String> preprocessedTokens, List<TaggedToken> taggedTokens,
Map<Integer, Double> featureVector) {
super(id, text, score);
m_tokens = tokens;
m_preprocessedTokens = preprocessedTokens;
m_taggedTokens = taggedTokens;
m_featureVector = featureVector;
}
开发者ID:millecker,项目名称:senti-storm,代码行数:10,代码来源:FeaturedTweet.java
示例17: getTaggedTokensFromTweets
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public static final List<List<TaggedToken>> getTaggedTokensFromTweets(
List<FeaturedTweet> featuredTweets) {
List<List<TaggedToken>> taggedTweets = new ArrayList<List<TaggedToken>>();
for (FeaturedTweet tweet : featuredTweets) {
taggedTweets.add(tweet.getTaggedTokens());
}
return taggedTweets;
}
开发者ID:millecker,项目名称:senti-storm,代码行数:9,代码来源:FeaturedTweet.java
示例18: generateFeatureTweets
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public static final List<FeaturedTweet> generateFeatureTweets(
List<Tweet> tweets) {
final Preprocessor preprocessor = Preprocessor.getInstance();
final POSTagger postTagger = POSTagger.getInstance();
// Tokenize
List<List<String>> tokenizedTweets = Tokenizer.tokenizeTweets(tweets);
// Preprocess
List<List<String>> preprocessedTweets = preprocessor
.preprocessTweets(tokenizedTweets);
// POS Tagging
List<List<TaggedToken>> taggedTweets = postTagger
.tagTweets(preprocessedTweets);
// Load Feature Vector Generator
TweetTfIdf tweetTfIdf = TweetTfIdf.createFromTaggedTokens(taggedTweets,
TfType.LOG, TfIdfNormalization.COS, true);
FeatureVectorGenerator fvg = new CombinedFeatureVectorGenerator(true,
tweetTfIdf);
// Feature Vector Generation
List<FeaturedTweet> featuredTweets = new ArrayList<FeaturedTweet>();
for (int i = 0; i < tweets.size(); i++) {
List<TaggedToken> taggedTweet = taggedTweets.get(i);
Map<Integer, Double> featureVector = fvg
.generateFeatureVector(taggedTweet);
featuredTweets.add(new FeaturedTweet(tweets.get(i), tokenizedTweets
.get(i), preprocessedTweets.get(i), taggedTweet, featureVector));
}
return featuredTweets;
}
开发者ID:millecker,项目名称:senti-storm,代码行数:36,代码来源:FeaturedTweet.java
示例19: tfTaggedTokenTweets
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public static List<Map<String, Double>> tfTaggedTokenTweets(
List<List<TaggedToken>> tweets, TfType type, boolean usePOSTags) {
List<Map<String, Double>> termFreqs = new ArrayList<Map<String, Double>>();
for (List<TaggedToken> tweet : tweets) {
termFreqs.add(tfFromTaggedTokens(tweet, type, usePOSTags));
}
return termFreqs;
}
开发者ID:millecker,项目名称:senti-storm,代码行数:9,代码来源:TweetTfIdf.java
示例20: tag
import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
private List<TaggedToken> tag(List<String> tokens) {
Sentence sentence = new Sentence();
sentence.tokens = tokens;
ModelSentence ms = new ModelSentence(sentence.T());
m_featureExtractor.computeFeatures(sentence, ms);
m_model.greedyDecode(ms, false);
List<TaggedToken> taggedTokens = new ArrayList<TaggedToken>();
for (int t = 0; t < sentence.T(); t++) {
TaggedToken tt = new TaggedToken(tokens.get(t),
m_model.labelVocab.name(ms.labels[t]));
taggedTokens.add(tt);
}
return taggedTokens;
}
开发者ID:millecker,项目名称:senti-storm,代码行数:16,代码来源:POSTaggerBolt.java
注:本文中的cmu.arktweetnlp.Tagger.TaggedToken类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论