本文整理汇总了Java中edu.stanford.nlp.stats.IntCounter类的典型用法代码示例。如果您正苦于以下问题:Java IntCounter类的具体用法?Java IntCounter怎么用?Java IntCounter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
IntCounter类属于edu.stanford.nlp.stats包,在下文中一共展示了IntCounter类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: loadParsesAsVectors
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
/**
* Read MUC data in, a file of parse trees. Convert it into IntCounters
* of the tokens in each MUC story.
*/
private void loadParsesAsVectors(String path) {
ProcessedData process = new ProcessedData(path, null, null, null);
process.nextStory();
Collection<String> parseStrings = process.getParseStrings();
while( parseStrings != null ) {
// Count lemmas in parse trees.
List<Tree> trees = TreeOperator.stringsToTrees(parseStrings);
IntCounter<String> counter = IRProcessData.countWordsFromParseTrees(trees, _wordnet);
// Multiply by IDF scores.
Counter<String> tfidfCounter = multiplyByIDF(counter);
if( _mucVectors == null ) _mucVectors = new ArrayList<Counter<String>>();
_mucVectors.add(tfidfCounter);
// Next story.
process.nextStory();
parseStrings = process.getParseStrings();
}
System.out.println("Loaded " + (_mucVectors == null ? 0 : _mucVectors.size()) + " vectors from the input parses file.");
}
开发者ID:nchambers,项目名称:schemas,代码行数:25,代码来源:IRDocSim.java
示例2: splitStateCounts
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
/**
* Splits the state counts. Root states and the boundary tag do not
* get their counts increased, and all others are doubled. Betas
* and transition weights are handled later.
*/
private void splitStateCounts() {
// double the count of states...
IntCounter<String> newStateSplitCounts = new IntCounter<String>();
newStateSplitCounts.addAll(stateSplitCounts);
newStateSplitCounts.addAll(stateSplitCounts);
// root states should only have 1
for (String root : startSymbols) {
if (newStateSplitCounts.getCount(root) > 1) {
newStateSplitCounts.setCount(root, 1);
}
}
if (newStateSplitCounts.getCount(Lexicon.BOUNDARY_TAG) > 1) {
newStateSplitCounts.setCount(Lexicon.BOUNDARY_TAG, 1);
}
stateSplitCounts = newStateSplitCounts;
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:25,代码来源:SplittingGrammarExtractor.java
示例3: ReadDataTagged
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
protected ReadDataTagged(TaggerConfig config, MaxentTagger maxentTagger,
PairsHolder pairs)
throws IOException
{
this.maxentTagger = maxentTagger;
this.pairs = pairs;
fileRecords = TaggedFileRecord.createRecords(config, config.getFile());
Map<String, IntCounter<String>> wordTagCounts = Generics.newHashMap();
for (TaggedFileRecord record : fileRecords) {
loadFile(record.reader(), wordTagCounts);
}
// By counting the words and then filling the Dictionary, we can
// make it so there are no calls that mutate the Dictionary or its
// TagCount objects later
maxentTagger.dict.fillWordTagCounts(wordTagCounts);
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:17,代码来源:ReadDataTagged.java
示例4: ReadDataTagged
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
protected ReadDataTagged(TaggerConfig config, MaxentTagger maxentTagger,
PairsHolder pairs)
throws IOException
{
this.maxentTagger = maxentTagger;
this.pairs = pairs;
List<TaggedFileRecord> fileRecords = TaggedFileRecord.createRecords(config, config.getFile());
Map<String, IntCounter<String>> wordTagCounts = Generics.newHashMap();
for (TaggedFileRecord record : fileRecords) {
loadFile(record.reader(), wordTagCounts);
}
// By counting the words and then filling the Dictionary, we can
// make it so there are no calls that mutate the Dictionary or its
// TagCount objects later
maxentTagger.dict.fillWordTagCounts(wordTagCounts);
}
开发者ID:jaimeguzman,项目名称:data_mining,代码行数:17,代码来源:ReadDataTagged.java
示例5: sumArgs
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
/**
* Creates an overall count of arguments summed across all of the given predicates.
* The predicates are a list of IDs, indexing positions in the given names list.
* @param ids A set of indices into the names list.
* @param names The list of tokens that have arguments in our corpus.
* @param argCounts The map from token to argument counts.
*/
private Counter<String> sumArgs(Collection<String> names, VerbArgCounts argCounts) {
Counter<String> sum = new IntCounter<String>();
if( names != null && argCounts != null ) {
for( String slotname : names ) {
Map<String,Integer> subcounts = argCounts.getArgsForSlot(slotname);
if( subcounts != null ) {
for( Map.Entry<String,Integer> entry : subcounts.entrySet() )
sum.incrementCount(entry.getKey(), entry.getValue());
}
}
}
return sum;
}
开发者ID:nchambers,项目名称:schemas,代码行数:21,代码来源:SlotInducer.java
示例6: countWordsFromParseTrees
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
/**
* Given strings of sentences, split them into tokens and lemmatize, then return the
* overall counts of the lemmas.
* @param sentences
* @return
*/
public static IntCounter<String> countWordsFromParseTrees(Collection<Tree> trees, WordNet wordnet) {
if( trees != null ) {
IntCounter<String> tokenCounts = ParsesToCounts.countPOSTokens(trees);
IntCounter<String> lemmaCounts = new IntCounter<String>();
for( String token : tokenCounts.keySet() ) {
String baseToken = token.substring(2);
String lemma = null;
char tag = token.charAt(0);
if( tag == 'v' )
lemma = wordnet.verbToLemma(baseToken);
else if( tag == 'n' )
lemma = wordnet.nounToLemma(baseToken);
else if( tag == 'j' )
lemma = wordnet.adjectiveToLemma(baseToken);
if( lemma == null ) lemma = baseToken;
// Only count nouns, verbs, and adjectives.
if( tag == 'n' || tag == 'v' || tag == 'j' ) {
if( lemma != null && lemma.length() > 2 ) {
lemma = CalculateIDF.createKey(lemma, tag);
lemmaCounts.incrementCount(lemma, tokenCounts.getIntCount(token));
}
}
}
lemmaCounts.setCount("*TOTAL*", tokenCounts.totalCount());
// System.out.println("size " + lemmaCounts.size());
return lemmaCounts;
}
else return null;
}
开发者ID:nchambers,项目名称:schemas,代码行数:39,代码来源:IRProcessData.java
示例7: countTokensInFile
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
/**
* Given a file of stories with parse trees, we count all of the tokens and output
* the counts, one story per line, to a new file.
* @param dir The directory with our parses file.
* @param filename The parses filename.
*/
private void countTokensInFile(String dir, String filename) {
String inpath = dir + File.separator + filename;
String outpath = _outDir + File.separator + filename;
if( outpath.endsWith(".gz") ) outpath = outpath.substring(0, outpath.length()-3);
// Open the file.
ProcessedData process = new ProcessedData(inpath, null, null, null);
try {
// Create the output file.
System.out.println("Writing counts to " + outpath);
PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(outpath, false)));
// Read the documents one by one.
process.nextStory();
Collection<String> parseStrings = process.getParseStrings();
while( parseStrings != null ) {
if( _duplicates.contains(process.currentStory()) ) {
// Nothing.
}
else {
System.out.print(process.currentStory());
IntCounter<String> tokenCounts = countWordsFromParseTrees(TreeOperator.stringsToTrees(parseStrings), _wordnet);
String stringCounter = ParsesToCounts.counterToString(tokenCounts);
writer.write(process.currentStory() + "\t");
writer.write(stringCounter);
writer.write("\n");
System.out.println("...wrote");
}
// Next story.
process.nextStory();
parseStrings = process.getParseStrings();
}
writer.close();
} catch( IOException ex ) {
ex.printStackTrace();
System.exit(-1);
}
}
开发者ID:nchambers,项目名称:schemas,代码行数:46,代码来源:IRProcessData.java
示例8: countWordsFromParseTrees
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
/**
* Given strings of sentences, split them into tokens and lemmatize, then return the
* overall counts of the lemmas.
* @param sentences
* @return
*/
private Counter<String> countWordsFromParseTrees(Collection<Tree> trees) {
if( trees != null ) {
IntCounter<String> tokenCounts = ParsesToCounts.countPOSTokens(trees);
IntCounter<String> lemmaCounts = new IntCounter<String>();
for( String token : tokenCounts.keySet() ) {
String baseToken = token.substring(2);
String lemma = null;
char tag = token.charAt(0);
if( tag == 'v' )
lemma = _wordnet.verbToLemma(baseToken);
else if( tag == 'n' )
lemma = _wordnet.nounToLemma(baseToken);
else if( tag == 'j' )
lemma = _wordnet.adjectiveToLemma(baseToken);
if( lemma == null ) lemma = baseToken;
// Only count nouns, verbs, and adjectives.
if( tag == 'n' || tag == 'v' || tag == 'j' ) {
if( lemma != null && lemma.length() > 2 ) {
lemma = CalculateIDF.createKey(lemma, tag);
// Only count tokens that aren't super frequent (low IDF scores).
if( _idfs.get(lemma) > 1.0f )
lemmaCounts.incrementCount(lemma, tokenCounts.getIntCount(token));
// else System.out.println("Skipping1 " + token);
}
// else System.out.println("Skipping2 " + token);
}
}
lemmaCounts.setCount("*TOTAL*", tokenCounts.totalCount());
// System.out.println("size " + lemmaCounts.size());
return lemmaCounts;
}
else return null;
}
开发者ID:nchambers,项目名称:schemas,代码行数:43,代码来源:IRDocuments.java
示例9: counterFromString
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
/**
* Builds a Counter from the saved string version.
*/
private Counter<String> counterFromString(String str) {
String[] items = str.split(", ");
Counter<String> counts = new IntCounter<String>();
for( String item : items ) {
int equalsign = item.indexOf('=');
int num = Integer.valueOf(item.substring(equalsign+1));
if( num > 0 )
counts.setCount(item.substring(0,equalsign), num);
}
return counts;
}
开发者ID:nchambers,项目名称:schemas,代码行数:15,代码来源:IRDocuments.java
示例10: fromFile
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
/**
* Reads counts from a tab-separated file. One Counter per line, keys and values
* alternate after each tab.
* @param filename The file to read.
* @return A counter.
*/
public static IntCounter<String> fromFile(String filename) {
BufferedReader in = null;
try {
String line;
in = new BufferedReader(new FileReader(filename));
while( (line = in.readLine()) != null ) {
return fromString(line);
}
} catch( IOException ex ) { ex.printStackTrace(); }
return null;
}
开发者ID:nchambers,项目名称:schemas,代码行数:18,代码来源:ParsesToCounts.java
示例11: fromString
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
/**
* Parses a single line that is tab-separated keys/values from an IntCounter.
* @param line The line with the counts.
* @return A counter, unless the line is null, returns null.
*/
public static IntCounter<String> fromString(String line) {
if( line != null ) {
IntCounter<String> counter = new IntCounter<String>();
String[] parts = line.split("\t");
for( int i = 0; i < parts.length; i++ ) {
counter.incrementCount(parts[i], Integer.valueOf(parts[i+1]));
}
return counter;
} else return null;
}
开发者ID:nchambers,项目名称:schemas,代码行数:16,代码来源:ParsesToCounts.java
示例12: counterToString
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
/**
* Convert a counter to a single String line, tab-separated by keys and counts.
* @param counter The counter to stringify.
* @return A string of the counter, or null if the given counter is null.
*/
public static String counterToString(IntCounter<String> counter) {
if( counter != null ) {
StringBuffer sb = new StringBuffer();
int i = 0;
for( String key : counter.keySet() ) {
if( i++ > 0 ) sb.append('\t');
sb.append(key);
sb.append('\t');
sb.append(counter.getIntCount(key));
}
return sb.toString();
} else return null;
}
开发者ID:nchambers,项目名称:schemas,代码行数:19,代码来源:ParsesToCounts.java
示例13: findBestDocumentsInFile
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
/**
* Traverses the lines in the given file, each line is an IntCounter representing
* a single document from Gigaword. Compare each line to all files in the global
* MUC input, see if it is close to a MUC document.
* @param path Path to a file of IntCounters.
* @return A list of document names that we matched.
*/
private List<String> findBestDocumentsInFile(String path) {
List<String> matches = new ArrayList<String>();
BufferedReader in = null;
try {
if( path.endsWith(".gz") )
in = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(path))));
else in = new BufferedReader(new FileReader(path));
String line;
while( (line = in.readLine()) != null ) {
// e.g. nyt_eng_199411 kidnap 40 release 23 ...
int tab = line.indexOf('\t');
IntCounter<String> gigadoc = ParsesToCounts.fromString(line.substring(tab+1));
Counter<String> gigavec = multiplyByIDF(gigadoc);
for( Counter<String> mucdoc : _mucVectors ) {
double cosine = Counters.cosine(gigavec, mucdoc);
if( cosine > _minCosine ) {
String docname = line.substring(0,tab);
System.out.println("Matched " + docname);
matches.add(docname);
}
}
}
} catch( Exception ex ) {
System.err.println("Error opening " + path);
ex.printStackTrace();
}
return matches;
}
开发者ID:nchambers,项目名称:schemas,代码行数:40,代码来源:IRDocSim.java
示例14: findSyntacticSlots
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
/**
* Count all governor dependencies that see a gold entity as its dependent.
*/
public Map<String,Counter<String>> findSyntacticSlots(List<Tree> trees, List<List<TypedDependency>> alldeps, List<EntityMention> entities, List<MUCEntity> goldEntities) {
Map<String,Counter<String>> mucslotToCounts = new HashMap<String,Counter<String>>();
// Find any mentions in the sentence that match gold entities, save their indices.
if( goldEntities != null ) {
for( EntityMention mention : entities ) {
for( int sid = 0; sid < trees.size(); sid++ ) {
if( mention.sid()-1 == sid ) {
for( MUCEntity gold : goldEntities ) {
if( TemplateTester.stringMatchToMUCEntity(gold, mention.string(), false) ) {
System.out.println("Gold mention " + mention + " from muc gold " + gold);
List<String> slots = StatisticsDeps.spanToSlots(mention.start(), mention.end()+1, trees.get(sid), alldeps.get(sid), _wordnet);
for( String slot : slots ) {
String type = gold._templateType + ":" + gold._slotType;
if( !mucslotToCounts.containsKey(type) )
mucslotToCounts.put(type, new IntCounter<String>());
mucslotToCounts.get(type).incrementCount(slot);
}
System.out.println(" -- gold slots: " + slots);
}
}
}
}
}
}
return mucslotToCounts;
}
开发者ID:nchambers,项目名称:schemas,代码行数:30,代码来源:LabelDocument.java
示例15: aggregate
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
public Object aggregate(Class key, List<? extends CoreMap> in) {
if (in == null) return null;
IntCounter<Object> counter = new IntCounter<Object>();
for (CoreMap cm:in) {
Object obj = cm.get(key);
if (obj != null && (ignoreSet == null || !ignoreSet.contains(obj))) {
counter.incrementCount(obj);
}
}
if (counter.size() > 0) {
return counter.argmax();
} else {
return null;
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:16,代码来源:CoreMapAttributeAggregator.java
示例16: TagCount
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
TagCount(IntCounter<String> tagCounts) {
for (String tag : tagCounts.keySet()) {
map.put(tag, tagCounts.getIntCount(tag));
}
getTagsCache = map.keySet().toArray(new String[map.keySet().size()]);
sumCache = calculateSumCache();
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:9,代码来源:TagCount.java
示例17: contextIncompatible
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
public static boolean contextIncompatible(Mention men, Mention ant, Dictionaries dict) {
String antHead = ant.headWord.word();
if ( (ant.mentionType == MentionType.PROPER)
&& ant.sentNum != men.sentNum
&& !isContextOverlapping(ant,men)
&& dict.NE_signatures.containsKey(antHead)) {
IntCounter<String> ranks = Counters.toRankCounter(dict.NE_signatures.get(antHead));
List<String> context;
if (!men.getPremodifierContext().isEmpty()) {
context = men.getPremodifierContext();
} else {
context = men.getContext();
}
if (!context.isEmpty()) {
int highestRank = 100000;
for (String w: context) {
if (ranks.containsKey(w) && ranks.getIntCount(w) < highestRank) {
highestRank = ranks.getIntCount(w);
}
// check in the other direction
if (dict.NE_signatures.containsKey(w)) {
IntCounter<String> reverseRanks = Counters.toRankCounter(dict.NE_signatures.get(w));
if (reverseRanks.containsKey(antHead) && reverseRanks.getIntCount(antHead) < highestRank) {
highestRank = reverseRanks.getIntCount(antHead);
}
}
}
if (highestRank > 10) return true;
}
}
return false;
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:33,代码来源:Rules.java
示例18: sentenceContextIncompatible
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
public static boolean sentenceContextIncompatible(Mention men, Mention ant, Dictionaries dict) {
if ( (ant.mentionType != MentionType.PROPER)
&& (ant.sentNum != men.sentNum)
&& (men.mentionType != MentionType.PROPER)
&& !isContextOverlapping(ant,men)) {
List<String> context1 = !ant.getPremodifierContext().isEmpty() ? ant.getPremodifierContext() : ant.getContext();
List<String> context2 = !men.getPremodifierContext().isEmpty() ? men.getPremodifierContext() : men.getContext();
if (!context1.isEmpty() && !context2.isEmpty()) {
int highestRank = 100000;
for (String w1: context1) {
for (String w2: context2) {
// check the forward direction
if (dict.NE_signatures.containsKey(w1)) {
IntCounter<String> ranks = Counters.toRankCounter(dict.NE_signatures.get(w1));
if (ranks.containsKey(w2) && ranks.getIntCount(w2) < highestRank) {
highestRank = ranks.getIntCount(w2);
}
}
// check in the other direction
if (dict.NE_signatures.containsKey(w2)) {
IntCounter<String> reverseRanks = Counters.toRankCounter(dict.NE_signatures.get(w2));
if (reverseRanks.containsKey(w1) && reverseRanks.getIntCount(w1) < highestRank) {
highestRank = reverseRanks.getIntCount(w1);
}
}
}
}
if (highestRank > 10) return true;
}
}
return false;
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:33,代码来源:Rules.java
示例19: appendIntCountStats
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
private static <E> void appendIntCountStats(StringBuilder sb, String label, IntCounter<E> counts)
{
sb.append(label).append("\n");
List<E> sortedKeys = Counters.toSortedList(counts);
int total = counts.totalIntCount();
for (E key:sortedKeys) {
int count = counts.getIntCount(key);
appendFrac(sb, key.toString(), count, total);
sb.append("\n");
}
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:12,代码来源:CoNLL2011DocumentReader.java
示例20: countPOSTokens
import edu.stanford.nlp.stats.IntCounter; //导入依赖的package包/类
public static IntCounter<String> countPOSTokens(Collection<Tree> trees) {
IntCounter<String> particleParents = new IntCounter<String>();
IntCounter<String> tokens = new IntCounter<String>();
for( Tree tree : trees ) {
// System.out.println("tree: " + tree);
Collection<Tree> leaves = TreeOperator.leavesFromTree(tree);
for( Tree leaf : leaves ) {
// Lowercase the token.
String token = leaf.getChild(0).value().toLowerCase();
String tag = leaf.value();
// Particles.
// e.g. (PRT (RP up))
if( tag.equals("RP") ) {
Tree gparent = leaf.ancestor(2, tree);
Tree gparentTree = gparent.children()[0];
String gVerbHead = gparentTree.getChild(0).value().toLowerCase();
char gTag = CalculateIDF.normalizePOS(gparentTree.value());
String headKey = CalculateIDF.createKey(gVerbHead + "_" + token, gTag);
// System.out.println("Particle verb " + headKey);
// System.out.println(" Incrementing " + headKey);
tokens.incrementCount(headKey);
particleParents.incrementCount(CalculateIDF.createKey(gVerbHead, gTag));
}
// All other words.
else {
// e.g. v-helping
String key = CalculateIDF.createKey(token, CalculateIDF.normalizePOS(tag));
tokens.incrementCount(key);
}
}
}
// Remove counts from verbs that are counted with their particles.
for( Map.Entry<String, Double> entry : particleParents.entrySet() ) {
// System.out.println(" Decrementing " + entry.getKey());
tokens.decrementCount(entry.getKey(), entry.getValue());
}
// System.out.println("Returning " + tokens);
return tokens;
}
开发者ID:nchambers,项目名称:schemas,代码行数:45,代码来源:ParsesToCounts.java
注:本文中的edu.stanford.nlp.stats.IntCounter类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论