本文整理汇总了Java中edu.stanford.nlp.dcoref.CorefCoreAnnotations类的典型用法代码示例。如果您正苦于以下问题:Java CorefCoreAnnotations类的具体用法?Java CorefCoreAnnotations怎么用?Java CorefCoreAnnotations使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CorefCoreAnnotations类属于edu.stanford.nlp.dcoref包,在下文中一共展示了CorefCoreAnnotations类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: CorefMention
import edu.stanford.nlp.dcoref.CorefCoreAnnotations; //导入依赖的package包/类
public CorefMention(Mention m, IntTuple pos){
mentionType = m.mentionType;
number = m.number;
gender = m.gender;
animacy = m.animacy;
startIndex = m.startIndex + 1;
endIndex = m.endIndex + 1;
headIndex = m.headIndex + 1;
corefClusterID = m.corefClusterID;
sentNum = m.sentNum + 1;
mentionID = m.mentionID;
mentionSpan = m.spanToString();
// index starts from 1
position = new IntTuple(2);
position.set(0, pos.get(0)+1);
position.set(1, pos.get(1)+1);
m.headWord.set(CorefCoreAnnotations.CorefClusterIdAnnotation.class, corefClusterID);
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:21,代码来源:CorefChain.java
示例2: newStyleCoreferenceGraphOutput
import edu.stanford.nlp.dcoref.CorefCoreAnnotations; //导入依赖的package包/类
@SuppressWarnings("unused")
private void newStyleCoreferenceGraphOutput(Annotation annotation)
{
// display the new-style coreference graph
//List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains != null)
{
for (CorefChain chain : corefChains.values())
{
CorefChain.CorefMention representative = chain.getRepresentativeMention();
for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder())
{
System.out.println(mention);
if (mention == representative)
continue;
// all offsets start at 1!
System.out.println("\t"
+ mention.mentionID + ": (Mention from sentence " + mention.sentNum + ", "
+ "Head word = " + mention.headIndex
+ ", (" + mention.startIndex + "," + mention.endIndex + ")"
+ ")"
+ " -> "
+ "(Representative from sentence " + representative.sentNum + ", "
+ "Head word = " + representative.headIndex
+ ", (" + representative.startIndex + "," + representative.endIndex + ")"
+ "), that is: \"" +
mention.mentionSpan + "\" -> \"" +
representative.mentionSpan + "\"");
}
}
}
}
开发者ID:dmnapolitano,项目名称:stanford-thrift,代码行数:34,代码来源:StanfordCorefThrift.java
示例3: addCoreference
import edu.stanford.nlp.dcoref.CorefCoreAnnotations; //导入依赖的package包/类
public TokenizedCommunication addCoreference() throws AnalyticException {
List<Tokenization> tkzList = this.tc.getTokenizations();
Communication root = this.tc.getRoot();
final String commId = root.getId();
Optional<List<CoreMap>> coreSentences = Optional.ofNullable(this.annotation.get(CoreAnnotations.SentencesAnnotation.class));
List<CoreMap> cmList = coreSentences.orElseThrow(() -> new AnalyticException("Communication " + commId + " did not have any CoreNLP sentences."));
final int cmListSize = cmList.size();
final int tkzListSize = tkzList.size();
if (cmListSize != tkzListSize)
throw new AnalyticException("Communication " + commId + " had a different number of coreMaps and Tokenizations."
+ "\nCoreMaps: " + cmListSize + " vs. Tokenizations: " + tkzListSize);
EntityMentionSet ems = new EntityMentionSet()
.setUuid(gen.next())
.setMentionList(new ArrayList<>());
TheoryDependencies td = new TheoryDependencies();
tkzList.forEach(t -> td.addToTokenizationTheoryList(t.getUuid()));
// AnnotationMetadata md = this.getMetadata(this.csProps.getCorefToolName())
// .setDependencies(td);
AnnotationMetadata md = AnnotationMetadataFactory.fromCurrentLocalTime().setTool("Stanford Coref").setDependencies(td);
ems.setMetadata(md);
EntitySet es = new EntitySet().setUuid(gen.next())
.setMetadata(md)
.setEntityList(new ArrayList<Entity>())
.setMentionSetId(ems.getUuid());
Optional<Map<Integer, CorefChain>> coreNlpChainsOption = Optional.ofNullable(this.annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class));
if (coreNlpChainsOption.isPresent()) {
Map<Integer, CorefChain> chains = coreNlpChainsOption.get();
for (CorefChain chain : chains.values()) {
Entity entity = this.makeEntity(chain, ems, tkzList);
es.addToEntityList(entity);
}
} else
LOGGER.warn("No coref chains found for Communication: " + commId);
if (!ems.isSetMentionList())
ems.setMentionList(new ArrayList<EntityMention>());
// if (ems == null || !ems.isSetMentionList())
// throw new AnalyticException(
// "Concrete-agiga produced a null EntityMentionSet, or a null mentionList.");
//
// if (ems.getMentionListSize() == 0
// && !this.allowEmptyEntitiesAndEntityMentions)
// throw new AnalyticException(
// "Empty entity mentions are disallowed and no entity mentions were produced for communication: "
// + comm.getId());
// else
// comm.addToEntityMentionSetList(ems);
//
// if (es == null || !es.isSetEntityList())
// throw new AnalyticException(
// "Concrete-agiga produced a null EntitySet, or a null entityList.");
//
// if (es.getEntityListSize() == 0
// && !this.allowEmptyEntitiesAndEntityMentions)
// throw new AnalyticException(
// "Empty entities are disallowed and no entities were produced for communication: "
// + comm.getId());
// else
root.addToEntityMentionSetList(ems);
root.addToEntitySetList(es);
try {
return new CachedTokenizationCommunication(root);
} catch (MiscommunicationException e) {
throw new AnalyticException(e);
}
}
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:75,代码来源:CorefManager.java
示例4: getMentions
import edu.stanford.nlp.dcoref.CorefCoreAnnotations; //导入依赖的package包/类
public static Map<String, Set<String>> getMentions(String text) {
Set<String> entities = getOrganizationEntity(text);
// create an empty Annotation just with the given text
Annotation document = new Annotation(text);
// run all Annotators on this text
nlpStanford.pipeline.annotate(document);
Map<Integer, CorefChain> graph = document.get(CorefCoreAnnotations.CorefChainAnnotation.class);
Map<String, Set<String>> mp = new HashMap<>();
for (Integer temp : graph.keySet()) {
CorefChain c = graph.get(temp);
CorefChain.CorefMention cm = c.getRepresentativeMention();
List<CorefChain.CorefMention> cms = c.getMentionsInTextualOrder();
List<String> cms_parsed = new ArrayList<>();
// remove "in Sentences"
for (CorefChain.CorefMention each : cms) {
Pattern p = Pattern.compile("\"([^\"]*)\"");
Matcher m = p.matcher(each.toString());
while (m.find()) {
cms_parsed.add(m.group(1));
}
}
//put it into a map container.
Set<String> tmp = new HashSet<>();
if(entities.contains(cms_parsed.get(0).trim().toLowerCase())) { //filters out the key that matches entity
for (String element : cms_parsed.subList(1, cms.size())) {
// if(entities.contains(element.trim().toLowerCase())) //filters out the values that matches entity
tmp.add(element);
}
mp.put(cms_parsed.get(0), tmp); //take the first element as key and rest (tmp) as values
}
}
return mp;
}
开发者ID:serendio-labs-stage,项目名称:diskoveror-ta,代码行数:48,代码来源:OrganizationCoref.java
示例5: getMentions
import edu.stanford.nlp.dcoref.CorefCoreAnnotations; //导入依赖的package包/类
public static Map<String, Set<String>> getMentions(String text) {
Set<String> entities = getPersonEntity(text);
// create an empty Annotation just with the given text
Annotation document = new Annotation(text);
// run all Annotators on this text
nlpStanford.pipeline.annotate(document);
Map<Integer, CorefChain> graph = document.get(CorefCoreAnnotations.CorefChainAnnotation.class);
Map<String, Set<String>> mp = new HashMap<>();
for (Integer temp : graph.keySet()) {
CorefChain c = graph.get(temp);
List<CorefChain.CorefMention> cms = c.getMentionsInTextualOrder();
List<String> cms_parsed = new ArrayList<>();
// remove "in Sentences"
for (CorefChain.CorefMention each : cms) {
Pattern p = Pattern.compile("\"([^\"]*)\"");
Matcher m = p.matcher(each.toString());
while (m.find()) {
cms_parsed.add(m.group(1));
}
}
//put it into a map container.
Set<String> tmp = new HashSet<>();
if(entities.contains(cms_parsed.get(0).trim().toLowerCase())) { //filters out the key that matches entity
for (String element : cms_parsed.subList(1, cms.size())) {
// if(entities.contains(element.trim().toLowerCase())) //filters out the values that matches entity
tmp.add(element);
}
mp.put(cms_parsed.get(0), tmp); //take the first element as key and rest (tmp) as values
}
}
return mp;
}
开发者ID:serendio-labs-stage,项目名称:diskoveror-ta,代码行数:48,代码来源:PersonCoref.java
示例6: load
import edu.stanford.nlp.dcoref.CorefCoreAnnotations; //导入依赖的package包/类
/** This method does its own buffering of the passed in InputStream. */
public Annotation load(InputStream is) throws IOException, ClassNotFoundException, ClassCastException {
is = new BufferedInputStream(is);
if(compress) is = new GZIPInputStream(is);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
Annotation doc = new Annotation("");
String line;
// read the coref graph (new format)
Map<Integer, CorefChain> chains = loadCorefChains(reader);
if(chains != null) doc.set(CorefCoreAnnotations.CorefChainAnnotation.class, chains);
// read the coref graph (old format)
line = reader.readLine().trim();
if(line.length() > 0){
String [] bits = line.split(" ");
if(bits.length % 4 != 0){
throw new RuntimeIOException("ERROR: Incorrect format for the serialized coref graph: " + line);
}
List<Pair<IntTuple, IntTuple>> corefGraph = new ArrayList<Pair<IntTuple,IntTuple>>();
for(int i = 0; i < bits.length; i += 4){
IntTuple src = new IntTuple(2);
IntTuple dst = new IntTuple(2);
src.set(0, Integer.parseInt(bits[i]));
src.set(1, Integer.parseInt(bits[i + 1]));
dst.set(0, Integer.parseInt(bits[i + 2]));
dst.set(1, Integer.parseInt(bits[i + 3]));
corefGraph.add(new Pair<IntTuple, IntTuple>(src, dst));
}
doc.set(CorefCoreAnnotations.CorefGraphAnnotation.class, corefGraph);
}
// read individual sentences
List<CoreMap> sentences = new ArrayList<CoreMap>();
while((line = reader.readLine()) != null){
CoreMap sentence = new Annotation("");
// first line is the parse tree. construct it with CoreLabels in Tree nodes
Tree tree = new PennTreeReader(new StringReader(line), new LabeledScoredTreeFactory(CoreLabel.factory())).readTree();
sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
// read the dependency graphs
IntermediateSemanticGraph intermCollapsedDeps = loadDependencyGraph(reader);
IntermediateSemanticGraph intermUncollapsedDeps = loadDependencyGraph(reader);
IntermediateSemanticGraph intermCcDeps = loadDependencyGraph(reader);
// the remaining lines until empty line are tokens
List<CoreLabel> tokens = new ArrayList<CoreLabel>();
while((line = reader.readLine()) != null){
if(line.length() == 0) break;
CoreLabel token = loadToken(line, haveExplicitAntecedent);
tokens.add(token);
}
sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
// convert the intermediate graph to an actual SemanticGraph
SemanticGraph collapsedDeps = convertIntermediateGraph(intermCollapsedDeps, tokens);
sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, collapsedDeps);
SemanticGraph uncollapsedDeps = convertIntermediateGraph(intermUncollapsedDeps, tokens);
sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps);
SemanticGraph ccDeps = convertIntermediateGraph(intermCcDeps, tokens);
sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps);
sentences.add(sentence);
}
doc.set(CoreAnnotations.SentencesAnnotation.class, sentences);
reader.close();
return doc;
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:71,代码来源:CustomAnnotationSerializer.java
示例7: save
import edu.stanford.nlp.dcoref.CorefCoreAnnotations; //导入依赖的package包/类
public void save(Annotation corpus, OutputStream os) throws IOException {
os = new BufferedOutputStream(os);
if(compress) os = new GZIPOutputStream(os);
PrintWriter pw = new PrintWriter(os);
// save the coref graph in the new format
Map<Integer, CorefChain> chains = corpus.get(CorefCoreAnnotations.CorefChainAnnotation.class);
saveCorefChains(chains, pw);
// save the coref graph on one line
// Note: this is the old format!
List<Pair<IntTuple, IntTuple>> corefGraph = corpus.get(CorefCoreAnnotations.CorefGraphAnnotation.class);
if(corefGraph != null){
boolean first = true;
for(Pair<IntTuple, IntTuple> arc: corefGraph){
if(! first) pw.print(" ");
pw.printf("%d %d %d %d", arc.first.get(0), arc.first.get(1), arc.second.get(0), arc.second.get(1));
first = false;
}
}
pw.println();
// save sentences separated by an empty line
List<CoreMap> sentences = corpus.get(CoreAnnotations.SentencesAnnotation.class);
for(CoreMap sent: sentences){
// save the parse tree first, on a single line
Tree tree = sent.get(TreeCoreAnnotations.TreeAnnotation.class);
if(tree != null){
String treeString = tree.toString();
// no \n allowed in the parse tree string (might happen due to tokenization of HTML/XML/RDF tags)
treeString = treeString.replaceAll("\n", " ");
pw.println(treeString);
}
else pw.println();
SemanticGraph collapsedDeps = sent.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
saveDependencyGraph(collapsedDeps, pw);
SemanticGraph uncollapsedDeps = sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
saveDependencyGraph(uncollapsedDeps, pw);
SemanticGraph ccDeps = sent.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
saveDependencyGraph(ccDeps, pw);
// save all sentence tokens
List<CoreLabel> tokens = sent.get(CoreAnnotations.TokensAnnotation.class);
if(tokens != null){
for(CoreLabel token: tokens){
saveToken(token, haveExplicitAntecedent, pw);
pw.println();
}
}
// add an empty line after every sentence
pw.println();
}
pw.close();
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:58,代码来源:CustomAnnotationSerializer.java
示例8: runTest
import edu.stanford.nlp.dcoref.CorefCoreAnnotations; //导入依赖的package包/类
@Test
public void runTest() {
// creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution
Properties props = new Properties();
props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
TextReader reader = new TextReader();
String text = reader.readText(FILE_PATH);
// create an empty Annotation just with the given text
Annotation document = new Annotation(text);
// run all Annotators on this text
LOGGER.info("Annotating...");
long start = System.currentTimeMillis();
pipeline.annotate(document);
long finish = System.currentTimeMillis();
LOGGER.info("Finished in " + (finish - start) + " ms");
// these are all the sentences in this document
// a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
StringBuilder log = new StringBuilder();
for (CoreMap sentence : sentences) {
// traversing the words in the current sentence
// a CoreLabel is a CoreMap with additional token-specific methods
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
// this is the text of the token
String word = token.get(CoreAnnotations.TextAnnotation.class);
// this is the POS tag of the token
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
// this is the NER label of the token
String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
log.append(String.format("word \"%s\" POS %s NER %s\n", word, pos, ne));
}
LOGGER.info("\n Sentence: " + sentence.toString());
LOGGER.info(log.toString());
// this is the parse tree of the current sentence
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
LOGGER.info("\n Tree: \n" + tree.toString());
// this is the Stanford dependency graph of the current sentence
SemanticGraph dependencies = sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
LOGGER.info("\n Dependencies: \n" + dependencies.toString());
}
// This is the coreference link graph
// Each chain stores a set of mentions that link to each other,
// along with a method for getting the most representative mention
// Both sentence and token offsets start at 1!
Map<Integer, CorefChain> graph = document.get(CorefCoreAnnotations.CorefChainAnnotation.class);
LOGGER.info("\n Graph: \n" + graph.toString());
}
开发者ID:pesua,项目名称:Project-Poppins,代码行数:58,代码来源:StanfordNLPTest.java
示例9: tag
import edu.stanford.nlp.dcoref.CorefCoreAnnotations; //导入依赖的package包/类
public ListMatrix<ListMatrix<MapMatrix<String, Object>>> tag(String text) throws Exception {
ListMatrix<ListMatrix<MapMatrix<String, Object>>> sentenceList = new DefaultListMatrix<ListMatrix<MapMatrix<String, Object>>>();
Annotation document = new Annotation(text);
stanfordCoreNLP.annotate(document);
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
ListMatrix<MapMatrix<String, Object>> tokenList = new DefaultListMatrix<MapMatrix<String, Object>>();
sentenceList.add(tokenList);
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
MapMatrix<String, Object> tokenMap = new DefaultMapMatrix<String, Object>();
tokenList.add(tokenMap);
String word = token.get(CoreAnnotations.TextAnnotation.class);
tokenMap.put("Token", word);
String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
tokenMap.put("POS", pos);
String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
tokenMap.put("NE", ne);
}
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
SemanticGraph dependencies = sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
}
Map<Integer, CorefChain> graph = document.get(CorefCoreAnnotations.CorefChainAnnotation.class);
return sentenceList;
}
开发者ID:jdmp,项目名称:java-data-mining-package,代码行数:37,代码来源:StanfordTagger.java
注:本文中的edu.stanford.nlp.dcoref.CorefCoreAnnotations类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论