• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Java TaggedToken类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Java中cmu.arktweetnlp.Tagger.TaggedToken的典型用法代码示例。如果您正苦于以下问题:Java TaggedToken类的具体用法?Java TaggedToken怎么用?Java TaggedToken使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



TaggedToken类属于cmu.arktweetnlp.Tagger包,在下文中一共展示了TaggedToken类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: createFromTaggedTokens

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public static TweetTfIdf createFromTaggedTokens(
    List<List<TaggedToken>> tweets, TfType type,
    TfIdfNormalization normalization, boolean usePOSTags) {

  TweetTfIdf tweetTfIdf = new TweetTfIdf(type, normalization, usePOSTags);

  tweetTfIdf.m_termFreqs = tfTaggedTokenTweets(tweets, type, usePOSTags);
  tweetTfIdf.m_inverseDocFreq = idf(tweetTfIdf.m_termFreqs);

  tweetTfIdf.m_termIds = new HashMap<String, Integer>();
  int i = 0;
  for (String key : tweetTfIdf.m_inverseDocFreq.keySet()) {
    tweetTfIdf.m_termIds.put(key, i);
    i++;
  }

  LOG.info("Found " + tweetTfIdf.m_inverseDocFreq.size() + " terms");
  // Debug
  // print("Term Frequency", m_termFreqs, m_inverseDocFreq);
  // print("Inverse Document Frequency", m_inverseDocFreq);
  return tweetTfIdf;
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:23,代码来源:TweetTfIdf.java


示例2: execute

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
  String text = tuple.getStringByField("text");
  List<TaggedToken> taggedTokens = (List<TaggedToken>) tuple
      .getValueByField("taggedTokens");

  // Generate Feature Vector
  Map<Integer, Double> featureVector = m_fvg
      .generateFeatureVector(taggedTokens);

  if (m_logging) {
    LOG.info("Tweet: " + text + " FeatureVector: " + featureVector);
  }

  // Emit new tuples
  collector.emit(new Values(text, featureVector));
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:18,代码来源:FeatureGenerationBolt.java


示例3: execute

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
  String text = tuple.getStringByField("text");
  List<String> preprocessedTokens = (List<String>) tuple
      .getValueByField("preprocessedTokens");

  // POS Tagging
  List<TaggedToken> taggedTokens = tag(preprocessedTokens);

  if (m_logging) {
    LOG.info("Tweet: " + taggedTokens);
  }

  // Emit new tuples
  collector.emit(new Values(text, taggedTokens));
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:17,代码来源:POSTaggerBolt.java


示例4: main

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public static void main(String[] args) {
	Tagger t = new Tagger();
	try {
		t.loadModel("model.20120919");
	} catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
		System.out.println("Invalid model file.");
		
	}
	int[] labels = new int[t.model.labelVocab.size()];
	for(int i = 0; i < labels.length; i++){
		String l = t.model.labelVocab.name(i);
		
		System.out.printf("%s\t%s\n", i, l);
	}
	
	String tweet = "@Dolb this is very bad #way \"http://bitly.com/44\" :)";
	// Removing ARK_TWEET_NLP
	List<TaggedToken> tt = t.tokenizeAndTag(tweet);
	for(TaggedToken a: tt){
		System.out.println(a.token+":\t"+a.tag);
	}
	
}
 
开发者ID:uiuc-ischool-scanr,项目名称:SAIL,代码行数:26,代码来源:POSFeatures.java


示例5: printInfo

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public void printInfo(){
	if(taggedTokens == null){
		System.out.println("Please call getPOSCounts first");
	}
	for(TaggedToken tt: taggedTokens){
		System.out.printf("%s\t%s,\t",tt.token,tt.tag);
		
	}
	System.out.println("\nCounts");
	for(String key: posCounts.keySet()){
		System.out.printf("%s\t%s\n",key, posCounts.get(key));
	}
	System.out.println();
	
	
}
 
开发者ID:uiuc-ischool-scanr,项目名称:SAIL,代码行数:17,代码来源:POSFeatures.java


示例6: tag

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public List<String> tag(List<String> tokens) {

		sentence = new Sentence();
		tt = new TaggedToken();
		sentence.tokens = tokens;
		ms = new ModelSentence(sentence.T());
		featureExtractor.computeFeatures(sentence, ms);
		model.greedyDecode(ms, false);

		ArrayList<String> taggedTokens = new ArrayList<String>(sentence.T());

		for (int t = 0; t < sentence.T(); t++) {
			taggedTokens.add(model.labelVocab.name(ms.labels[t]));
		}

		return taggedTokens;
	}
 
开发者ID:weizh,项目名称:geolocator-3.0,代码行数:18,代码来源:ENTweetPOSTagger.java


示例7: generateFeatureVectorFromTaggedTokens

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
@Override
public Map<Integer, Double> generateFeatureVectorFromTaggedTokens(
    List<TaggedToken> tweet) {
  if (m_useTaggedWords) {
    throw new RuntimeException(
        "Use TaggedWords was set to true! generateFeatureVectorFromTaggedTokens is not applicable!");
  }

  Map<Integer, Double> featureVector = m_sentimentFeatureVectorGenerator
      .generateFeatureVectorFromTaggedTokens(tweet);

  featureVector.putAll(m_POSFeatureVectorGenerator
      .generateFeatureVectorFromTaggedTokens(tweet));

  featureVector.putAll(m_tfidfFeatureVectorGenerator
      .generateFeatureVectorFromTaggedTokens(tweet));

  return featureVector;
}
 
开发者ID:millecker,项目名称:storm-apps,代码行数:20,代码来源:CombinedFeatureVectorGenerator.java


示例8: execute

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
  List<TaggedToken> taggedTokens = (List<TaggedToken>) tuple
      .getValueByField("taggedTokens");

  // Generate Feature Vector
  Map<Integer, Double> featureVector = m_fvg
      .generateFeatureVectorFromTaggedTokens(taggedTokens);

  if (m_logging) {
    LOG.info("Tweet: " + featureVector);
  }

  // Emit new tuples
  collector.emit(new Values(featureVector));
}
 
开发者ID:millecker,项目名称:storm-apps,代码行数:17,代码来源:FeatureGenerationBolt.java


示例9: generateFeatureVector

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
@Override
public Map<Integer, Double> generateFeatureVector(
    List<TaggedToken> taggedTokens) {
  Map<Integer, Double> resultFeatureVector = new TreeMap<Integer, Double>();
  double[] posTags = countPOSTagsFromTaggedTokens(taggedTokens, m_normalize);
  if (posTags != null) {
    if (posTags[0] != 0) // nouns
      resultFeatureVector.put(m_vectorStartId, posTags[0]);
    if (posTags[1] != 0) // verb
      resultFeatureVector.put(m_vectorStartId + 1, posTags[1]);
    if (posTags[2] != 0) // adjective
      resultFeatureVector.put(m_vectorStartId + 2, posTags[2]);
    if (posTags[3] != 0) // adverb
      resultFeatureVector.put(m_vectorStartId + 3, posTags[3]);
    if (posTags[4] != 0) // interjection
      resultFeatureVector.put(m_vectorStartId + 4, posTags[4]);
    if (posTags[5] != 0) // punctuation
      resultFeatureVector.put(m_vectorStartId + 5, posTags[5]);
    if (posTags[6] != 0) // hashtag
      resultFeatureVector.put(m_vectorStartId + 6, posTags[6]);
    if (posTags[7] != 0) // emoticon
      resultFeatureVector.put(m_vectorStartId + 7, posTags[7]);
  }
  if (LOGGING) {
    LOG.info("POStags: " + Arrays.toString(posTags));
  }
  return resultFeatureVector;
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:29,代码来源:POSFeatureVectorGenerator.java


示例10: countPOSTagsFromTaggedTokens

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
private double[] countPOSTagsFromTaggedTokens(List<TaggedToken> taggedTokens,
    boolean normalize) {
  // 8 = [NOUN, VERB, ADJECTIVE, ADVERB, INTERJECTION, PUNCTUATION, HASHTAG,
  // EMOTICON]
  double[] posTags = new double[] { 0d, 0d, 0d, 0d, 0d, 0d, 0d, 0d };
  int wordCount = 0;
  for (TaggedToken word : taggedTokens) {
    wordCount++;
    String arkTag = word.tag;
    // http://www.ark.cs.cmu.edu/TweetNLP/annot_guidelines.pdf
    if (arkTag.equals("N") || arkTag.equals("O") || arkTag.equals("^")
        || arkTag.equals("Z")) {
      posTags[0]++;
    } else if (arkTag.equals("V") || arkTag.equals("T")) {
      posTags[1]++;
    } else if (arkTag.equals("A")) {
      posTags[2]++;
    } else if (arkTag.equals("R")) {
      posTags[3]++;
    } else if (arkTag.equals("!")) {
      posTags[4]++;
    } else if (arkTag.equals(",")) {
      posTags[5]++;
    } else if (arkTag.equals("#")) {
      posTags[6]++;
    } else if (arkTag.equals("E")) {
      posTags[7]++;
    }
  }
  if (normalize) {
    for (int i = 0; i < posTags.length; i++) {
      posTags[i] /= wordCount;
    }
  }
  return posTags;
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:37,代码来源:POSFeatureVectorGenerator.java


示例11: generateFeatureVector

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
@Override
public Map<Integer, Double> generateFeatureVector(List<TaggedToken> tweet) {
  Map<Integer, Double> featureVector = m_sentimentFeatureVectorGenerator
      .generateFeatureVector(tweet);

  featureVector.putAll(m_POSFeatureVectorGenerator
      .generateFeatureVector(tweet));

  featureVector.putAll(m_tfidfFeatureVectorGenerator
      .generateFeatureVector(tweet));

  return featureVector;
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:14,代码来源:CombinedFeatureVectorGenerator.java


示例12: generateFeatureVectors

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public List<Map<Integer, Double>> generateFeatureVectors(
    List<List<TaggedToken>> taggedTweets, boolean logging) {
  List<Map<Integer, Double>> featuredVectors = new ArrayList<Map<Integer, Double>>();
  for (List<TaggedToken> tweet : taggedTweets) {
    Map<Integer, Double> featureVector = generateFeatureVector(tweet);
    if (logging) {
      LOG.info("Tweet: " + tweet);
      LOG.info("FeatureVector: " + featureVector);
    }
    featuredVectors.add(featureVector);
  }
  return featuredVectors;
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:14,代码来源:FeatureVectorGenerator.java


示例13: generateFeatureVector

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
@Override
public Map<Integer, Double> generateFeatureVector(
    List<TaggedToken> taggedTokens) {

  Map<Integer, SentimentResult> tweetSentiments = m_sentimentDict
      .getSentenceSentiment(taggedTokens);

  return generateFeatureVector(tweetSentiments);
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:10,代码来源:SentimentFeatureVectorGenerator.java


示例14: getSentenceSentiment

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public Map<Integer, SentimentResult> getSentenceSentiment(
    List<TaggedToken> sentence) {
  Map<Integer, SentimentResult> sentenceSentiments = new HashMap<Integer, SentimentResult>();
  if (LOGGING) {
    LOG.info("TaggedSentence: " + sentence.toString());
  }
  for (TaggedToken word : sentence) {
    Map<Integer, Double> wordSentiments = getWordSentiment(word.token,
        word.tag, false);
    if (wordSentiments != null) {
      for (Map.Entry<Integer, Double> wordSentiment : wordSentiments
          .entrySet()) {

        int key = wordSentiment.getKey();
        double sentimentScore = wordSentiment.getValue();

        SentimentResult sentimentResult = sentenceSentiments.get(key);
        if (sentimentResult == null) {
          sentimentResult = new SentimentResult();
        }
        // add score value
        sentimentResult.addScore(sentimentScore);
        // update sentimentResult
        sentenceSentiments.put(key, sentimentResult);
      }
    }
  }
  if (LOGGING) {
    LOG.info("Sentiment: " + sentenceSentiments);
  }
  return (sentenceSentiments.size() > 0) ? sentenceSentiments : null;
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:33,代码来源:SentimentDictionary.java


示例15: getSentiment

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public List<Map<Integer, SentimentResult>> getSentiment(
    List<List<TaggedToken>> tweets) {
  List<Map<Integer, SentimentResult>> tweetSentiments = new ArrayList<Map<Integer, SentimentResult>>();
  for (List<TaggedToken> tweet : tweets) {
    tweetSentiments.add(getSentenceSentiment(tweet));
  }
  return tweetSentiments;
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:9,代码来源:SentimentDictionary.java


示例16: FeaturedTweet

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public FeaturedTweet(long id, String text, double score, List<String> tokens,
    List<String> preprocessedTokens, List<TaggedToken> taggedTokens,
    Map<Integer, Double> featureVector) {
  super(id, text, score);
  m_tokens = tokens;
  m_preprocessedTokens = preprocessedTokens;
  m_taggedTokens = taggedTokens;
  m_featureVector = featureVector;
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:10,代码来源:FeaturedTweet.java


示例17: getTaggedTokensFromTweets

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public static final List<List<TaggedToken>> getTaggedTokensFromTweets(
    List<FeaturedTweet> featuredTweets) {
  List<List<TaggedToken>> taggedTweets = new ArrayList<List<TaggedToken>>();
  for (FeaturedTweet tweet : featuredTweets) {
    taggedTweets.add(tweet.getTaggedTokens());
  }
  return taggedTweets;
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:9,代码来源:FeaturedTweet.java


示例18: generateFeatureTweets

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public static final List<FeaturedTweet> generateFeatureTweets(
    List<Tweet> tweets) {
  final Preprocessor preprocessor = Preprocessor.getInstance();
  final POSTagger postTagger = POSTagger.getInstance();

  // Tokenize
  List<List<String>> tokenizedTweets = Tokenizer.tokenizeTweets(tweets);

  // Preprocess
  List<List<String>> preprocessedTweets = preprocessor
      .preprocessTweets(tokenizedTweets);

  // POS Tagging
  List<List<TaggedToken>> taggedTweets = postTagger
      .tagTweets(preprocessedTweets);

  // Load Feature Vector Generator
  TweetTfIdf tweetTfIdf = TweetTfIdf.createFromTaggedTokens(taggedTweets,
      TfType.LOG, TfIdfNormalization.COS, true);
  FeatureVectorGenerator fvg = new CombinedFeatureVectorGenerator(true,
      tweetTfIdf);

  // Feature Vector Generation
  List<FeaturedTweet> featuredTweets = new ArrayList<FeaturedTweet>();
  for (int i = 0; i < tweets.size(); i++) {
    List<TaggedToken> taggedTweet = taggedTweets.get(i);
    Map<Integer, Double> featureVector = fvg
        .generateFeatureVector(taggedTweet);

    featuredTweets.add(new FeaturedTweet(tweets.get(i), tokenizedTweets
        .get(i), preprocessedTweets.get(i), taggedTweet, featureVector));
  }

  return featuredTweets;
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:36,代码来源:FeaturedTweet.java


示例19: tfTaggedTokenTweets

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
public static List<Map<String, Double>> tfTaggedTokenTweets(
    List<List<TaggedToken>> tweets, TfType type, boolean usePOSTags) {
  List<Map<String, Double>> termFreqs = new ArrayList<Map<String, Double>>();
  for (List<TaggedToken> tweet : tweets) {
    termFreqs.add(tfFromTaggedTokens(tweet, type, usePOSTags));
  }
  return termFreqs;
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:9,代码来源:TweetTfIdf.java


示例20: tag

import cmu.arktweetnlp.Tagger.TaggedToken; //导入依赖的package包/类
private List<TaggedToken> tag(List<String> tokens) {
  Sentence sentence = new Sentence();
  sentence.tokens = tokens;
  ModelSentence ms = new ModelSentence(sentence.T());
  m_featureExtractor.computeFeatures(sentence, ms);
  m_model.greedyDecode(ms, false);

  List<TaggedToken> taggedTokens = new ArrayList<TaggedToken>();
  for (int t = 0; t < sentence.T(); t++) {
    TaggedToken tt = new TaggedToken(tokens.get(t),
        m_model.labelVocab.name(ms.labels[t]));
    taggedTokens.add(tt);
  }
  return taggedTokens;
}
 
开发者ID:millecker,项目名称:senti-storm,代码行数:16,代码来源:POSTaggerBolt.java



注:本文中的cmu.arktweetnlp.Tagger.TaggedToken类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Java TerminateToolException类代码示例发布时间:2022-05-22
下一篇:
Java Spacing类代码示例发布时间:2022-05-22
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap