本文整理汇总了Java中gate.util.OffsetComparator类的典型用法代码示例。如果您正苦于以下问题:Java OffsetComparator类的具体用法?Java OffsetComparator怎么用?Java OffsetComparator使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
OffsetComparator类属于gate.util包,在下文中一共展示了OffsetComparator类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: doit
import gate.util.OffsetComparator; //导入依赖的package包/类
public void doit(
gate.Document doc,
Map<String, AnnotationSet> bindings,
gate.AnnotationSet annotations,
gate.AnnotationSet inputAS,
gate.AnnotationSet outputAS,
gate.creole.ontology.Ontology ontology) throws JapeException {
// your RHS Java code will be embedded here ...
StringBuilder sb = new StringBuilder();
List<Annotation> tkns=new ArrayList<Annotation>();
for(Annotation t:tagAnnots){
tkns.add(t);
}
Collections.sort(tkns, new OffsetComparator());
for(Annotation tkn:tkns){
sb.append(tkn.getFeatures().get("string"));
}
String link=sb.toString();
String[] split = link.split("/");
String modelName=split[3];
doc.getFeatures().put("modelName",modelName);
doc.getFeatures().put("srcLink", link);
}
开发者ID:sasinda,项目名称:OntologyBasedInormationExtractor,代码行数:24,代码来源:Preprocess_Meta.java
示例2: getSourceUnitAnnotations
import gate.util.OffsetComparator; //导入依赖的package包/类
/**
* Returns unit annotations for the source document
*
* @return
*/
public List<Annotation> getSourceUnitAnnotations() {
Set<Annotation> toReturn = new HashSet<Annotation>();
if(this.sourceAnnotations != null) {
for(Annotation sAnnotation : sourceAnnotations) {
AnnotationSet set = alignmentTask.getSrcAS().getContained(
sAnnotation.getStartNode().getOffset(),
sAnnotation.getEndNode().getOffset());
if(set == null || set.isEmpty()) continue;
toReturn.addAll(set.get(alignmentTask.getUaAnnotType()));
}
}
else {
toReturn.addAll(alignmentTask.getSrcAS().get(
alignmentTask.getUaAnnotType()));
}
List<Annotation> sortedReturn = new ArrayList<Annotation>(toReturn);
Collections.sort(sortedReturn, new OffsetComparator());
return sortedReturn;
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:27,代码来源:PUAPair.java
示例3: getTargetUnitAnnotations
import gate.util.OffsetComparator; //导入依赖的package包/类
/**
* Returns unit annotations for the target document
*
* @return
*/
public List<Annotation> getTargetUnitAnnotations() {
Set<Annotation> toReturn = new HashSet<Annotation>();
if(this.targetAnnotations != null) {
for(Annotation tAnnotation : targetAnnotations) {
AnnotationSet set = alignmentTask.getTgtAS().getContained(
tAnnotation.getStartNode().getOffset(),
tAnnotation.getEndNode().getOffset());
if(set == null || set.isEmpty()) continue;
toReturn.addAll(set.get(alignmentTask.getUaAnnotType()));
}
}
else {
toReturn.addAll(alignmentTask.getTgtAS().get(
alignmentTask.getUaAnnotType()));
}
List<Annotation> sortedReturn = new ArrayList<Annotation>(toReturn);
Collections.sort(sortedReturn, new OffsetComparator());
return sortedReturn;
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:27,代码来源:PUAPair.java
示例4: obtainNgramFeatures
import gate.util.OffsetComparator; //导入依赖的package包/类
/** Obtain the N-gram features from an annotation set. */
private String[] obtainNgramFeatures(AnnotationSet annsNgramType,
String gateFeature) {
int num = annsNgramType.size();
String[] feats = new String[num];
ArrayList annotationArray = (annsNgramType == null || annsNgramType
.isEmpty()) ? new ArrayList() : new ArrayList(annsNgramType);
Collections.sort(annotationArray, new OffsetComparator());
for(int i = 0; i < num; ++i) {
feats[i] = (String)((Annotation)annotationArray.get(i)).getFeatures()
.get(gateFeature);
if(feats[i]==null)
feats[i] = ConstantParameters.NAMENONFEATURE;
feats[i] = feats[i].trim().replaceAll(ConstantParameters.ITEMSEPARATOR,
ConstantParameters.ITEMSEPREPLACEMENT);
}
return feats;
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:19,代码来源:NLPFeaturesOfDoc.java
示例5: obtainNgramFeaturesFromDifferentType
import gate.util.OffsetComparator; //导入依赖的package包/类
/**
* Obtain the N-gram features from an annotation set for the Annotation type
* which is different from the instance's type.
*/
private String[] obtainNgramFeaturesFromDifferentType(
AnnotationSet annsNgramType, AnnotationSet annsCurrent, String gateFeature) {
int num = annsNgramType.size();
String[] feats = new String[num];
ArrayList annotationArray = (annsNgramType == null || annsNgramType
.isEmpty()) ? new ArrayList() : new ArrayList(annsNgramType);
Collections.sort(annotationArray, new OffsetComparator());
for(int i = 0; i < num; ++i) {
feats[i] = obtainAnnotationForTypeAndFeature(annsCurrent, gateFeature,
((Annotation)(annotationArray.get(i))).getStartNode().getOffset(),
((Annotation)(annotationArray.get(i))).getEndNode().getOffset());
if(feats[i] != null)
feats[i] = feats[i].trim().replaceAll(ConstantParameters.ITEMSEPARATOR,
ConstantParameters.ITEMSEPREPLACEMENT);
}
return feats;
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:22,代码来源:NLPFeaturesOfDoc.java
示例6: testNP
import gate.util.OffsetComparator; //导入依赖的package包/类
@Test
public void testNP() throws Exception{
Document testDoc = Factory.newDocument("This is a good test.");
try {
testCorpus.add(testDoc);
try {
munpexApp.execute();
// Check the results
AnnotationSet annots = testDoc.getAnnotations();
assertNotNull("test document has no annotations!", annots);
AnnotationSet NPs = annots.get("NP");
assertNotNull("test document has no NP annotations!", NPs);
List<Annotation> npList = new ArrayList<Annotation>(NPs);
// sort in document order
Collections.sort(npList, new OffsetComparator());
assertEquals("Document should have one NP", npList.size(), 1);
Annotation np = npList.get(0);
assertEquals("First NP's HEAD should be 'test'",
"test", getNPHeadString(np));
assertEquals("First NP's MOD should be 'good'",
"good", getNPModString(np));
assertEquals("First NP's DET should be 'a'",
"a", getNPDetString(np));
assertEquals("First NP is not a pronoun",
"false", getNPPronounString(np));
} finally {
testCorpus.remove(testDoc);
}
} finally {
Factory.deleteResource(testDoc);
}
}
开发者ID:SemanticSoftwareLab,项目名称:ScholarLens,代码行数:38,代码来源:MuNPExTest_EN.java
示例7: testPronoun
import gate.util.OffsetComparator; //导入依赖的package包/类
@Test
public void testPronoun() throws Exception{
Document testDoc = Factory.newDocument("He is astounding.");
try {
testCorpus.add(testDoc);
try {
munpexApp.execute();
// Check the results
AnnotationSet annots = testDoc.getAnnotations();
assertNotNull("test document has no annotations!", annots);
AnnotationSet NPs = annots.get("NP");
assertNotNull("test document has no NP annotations!", NPs);
List<Annotation> npList = new ArrayList<Annotation>(NPs);
// sort in document order
Collections.sort(npList, new OffsetComparator());
assertEquals("Document should have one NP", npList.size(), 1);
Annotation np = npList.get(0);
assertEquals("First NP's HEAD should be 'He'",
"He", getNPHeadString(np));
assertEquals("First NP is a pronoun",
"true", getNPPronounString(np));
} finally {
testCorpus.remove(testDoc);
}
} finally {
Factory.deleteResource(testDoc);
}
}
开发者ID:SemanticSoftwareLab,项目名称:ScholarLens,代码行数:32,代码来源:MuNPExTest_EN.java
示例8: testContribution
import gate.util.OffsetComparator; //导入依赖的package包/类
@Test
public void testContribution() throws Exception{
Document testDoc = Factory.newDocument("In this paper we present our awesome ideas.");
try {
testCorpus.add(testDoc);
try {
rhetectorApp.execute();
// Check the results
AnnotationSet annots = testDoc.getAnnotations();
assertNotNull("test document has no annotations!", annots);
AnnotationSet REs = annots.get("RhetoricalEntity");
assertNotNull("test document has no RhetoricalEntity annotations!", REs);
List<Annotation> npList = new ArrayList<Annotation>(REs);
// sort in document order
Collections.sort(npList, new OffsetComparator());
assertEquals("Document should have one RhetoricalEntity", npList.size(), 1);
Annotation np = npList.get(0);
assertEquals("The RhetoricalEntity type must be 'Contribution' from the SALT Ontology.",
"http://salt.semanticauthoring.org/ontologies/sro#Contribution", getRETypeString(np));
} finally {
testCorpus.remove(testDoc);
}
} finally {
Factory.deleteResource(testDoc);
}
}
开发者ID:SemanticSoftwareLab,项目名称:TextMining-Rhetector,代码行数:32,代码来源:REExtractionTest.java
示例9: ParserXMLoutputAnalyser
import gate.util.OffsetComparator; //导入依赖的package包/类
ParserXMLoutputAnalyser(AnnotationSet in, AnnotationSet out) {
this.outputAS = out;
List sents = new ArrayList(in.get("Sentence"));
java.util.Collections.sort(sents, new OffsetComparator());
sentenceIterator = sents.iterator();
wordformAS = in.get("WordForm");
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:8,代码来源:ParserXMLoutputAnalyser.java
示例10: processBySentence
import gate.util.OffsetComparator; //导入依赖的package包/类
public void processBySentence(AnnotationSet input, AnnotationSet output)
throws ExecutionException {
ParserXMLoutputAnalyser parser = new ParserXMLoutputAnalyser(input,output);
List sents = new ArrayList(input.get("Sentence"));
java.util.Collections.sort(sents, new OffsetComparator());
Iterator sentenceIterator = sents.iterator();
int sentNum = 0;
while (sentenceIterator.hasNext()) {
sentNum++;
sentenceIterator.next();
File tempForms;
try {
tempForms = java.io.File.createTempFile("rasp", ".data_"
+ sentNum);
this.generateInputForParser(input, tempForms, sentNum);
} catch (Exception e) {
throw new ExecutionException(e);
}
// the next step consists in calling the morpho scripts
// and modify the annotations in the CAS accordingly
callExternalCommand(parser, tempForms, true);
if (!debug)
tempForms.delete();
}
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:28,代码来源:ParserAnnotator.java
示例11: matchArgInstanceWithInst
import gate.util.OffsetComparator; //导入依赖的package包/类
/** Match the argument instance with the relation instance. */
boolean[][] matchArgInstanceWithInst(AnnotationSet annotations,
String relInstanceType, String instanceType, String relArgF, String argF) {
// Get the intance array
AnnotationSet anns = annotations.get(instanceType);
ArrayList annotationArray = (anns == null || anns.isEmpty())
? new ArrayList()
: new ArrayList(anns);
Collections.sort(annotationArray, new OffsetComparator());
// Get the relation intance array
AnnotationSet relAnns = annotations.get(relInstanceType);
ArrayList relAnnotationArray = (relAnns == null || relAnns.isEmpty())
? new ArrayList()
: new ArrayList(relAnns);
Collections.sort(relAnnotationArray, new OffsetComparator());
// Assign the match
boolean[][] isArgInRel = new boolean[annotationArray.size()][relAnnotationArray
.size()];
for(int i = 0; i < annotationArray.size(); ++i) {
Annotation ann = (Annotation)annotationArray.get(i);
String argV = ann.getFeatures().get(argF).toString();
for(int ii = 0; ii < relAnnotationArray.size(); ++ii) {
String argRelV = ((Annotation)relAnnotationArray.get(ii)).getFeatures()
.get(relArgF).toString();
if(argV.equals(argRelV))
isArgInRel[i][ii] = true;
else isArgInRel[i][ii] = false;
}
}
return isArgInRel;
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:32,代码来源:NLPFeaturesOfDoc.java
示例12: addAnnsInDoc
import gate.util.OffsetComparator; //导入依赖的package包/类
/**
* Add the annotation into documents for chunk learning.
*
* @throws InvalidOffsetException
*/
private void addAnnsInDoc(Document doc, HashSet chunks, String instanceType,
String featName, String labelName, Label2Id labelsAndId)
throws InvalidOffsetException {
AnnotationSet annsDoc = null;
if(inputASName == null || inputASName.trim().length() == 0) {
annsDoc = doc.getAnnotations();
} else {
annsDoc = doc.getAnnotations(inputASName);
}
AnnotationSet annsDocResults = null;
if(outputASName == null || outputASName.trim().length() == 0) {
annsDocResults = doc.getAnnotations();
} else {
annsDocResults = doc.getAnnotations(outputASName);
}
AnnotationSet anns = annsDoc.get(instanceType);
ArrayList annotationArray = (anns == null || anns.isEmpty())
? new ArrayList()
: new ArrayList(anns);
Collections.sort(annotationArray, new OffsetComparator());
for(Object obj : chunks) {
ChunkOrEntity entity = (ChunkOrEntity)obj;
FeatureMap features = Factory.newFeatureMap();
features.put(featName, labelsAndId.id2Label.get(
new Integer(entity.name).toString()).toString());
features.put("prob", entity.prob);
Annotation token1 = (Annotation)annotationArray.get(entity.start);
Annotation token2 = (Annotation)annotationArray.get(entity.end);
Node entityS = token1.getStartNode();
Node entityE = token2.getEndNode();
if(entityS != null && entityE != null)
annsDocResults.add(entityS.getOffset(), entityE.getOffset(), labelName,
features);
}
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:41,代码来源:LightWeightLearningApi.java
示例13: DefaultIteratingMethod
import gate.util.OffsetComparator; //导入依赖的package包/类
/**
* Constructor
*/
public DefaultIteratingMethod() {
comparator = new OffsetComparator();
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:7,代码来源:DefaultIteratingMethod.java
示例14: initialize
import gate.util.OffsetComparator; //导入依赖的package包/类
public void initialize(String name, String srcDocId, String tgtDocId,
String srcASName, String tgtASName, String uaAnnotType,
String puaAnnotType, String puaFeatureName, String uaFeatureName,
String alignmentView, String actionsFilePath) {
this.srcDocId = srcDocId;
this.tgtDocId = tgtDocId;
this.srcASName = srcASName;
this.tgtASName = tgtASName;
this.name = name;
this.alignmentView = alignmentView;
this.actionsFilePath = actionsFilePath;
srcDoc = compoundDocument.getDocument(srcDocId);
tgtDoc = compoundDocument.getDocument(tgtDocId);
srcAS = srcASName.equals(AlignmentEditor.DEFAULT_AS_NAME) ? srcDoc
.getAnnotations() : srcDoc.getAnnotations(srcASName);
tgtAS = tgtASName.equals(AlignmentEditor.DEFAULT_AS_NAME) ? tgtDoc
.getAnnotations() : tgtDoc.getAnnotations(tgtASName);
this.puaFeatureName = puaFeatureName;
this.uaFeatureName = uaFeatureName;
this.uaAnnotType = uaAnnotType;
this.puaAnnotType = puaAnnotType;
Alignment puaAlignment = this.compoundDocument
.getAlignmentInformation(puaFeatureName);
alignment = this.compoundDocument.getAlignmentInformation(uaFeatureName);
puaList = new ArrayList<PUAPair>();
// only if parentOfUnitOfAlignment is provided
if(puaAnnotType != null && puaAnnotType.trim().length() > 0) {
Set<Annotation> srcVisitedAnnots = new HashSet<Annotation>();
// sort annotations - this will return pairs in the sorted order
List<Annotation> srcAnnotsList = new ArrayList<Annotation>(srcAS
.get(puaAnnotType));
Collections.sort(srcAnnotsList, new OffsetComparator());
// find out all linked annotations and create pauListItem
for(Annotation srcAnnot : srcAnnotsList) {
if(srcVisitedAnnots.contains(srcAnnot)) continue;
if(puaAlignment.isAnnotationAligned(srcAnnot)) {
Set<Annotation> srcAnnots = new HashSet<Annotation>();
Set<Annotation> tgtAnnots = puaAlignment
.getAlignedAnnotations(srcAnnot);
for(Annotation tgtAnnot : tgtAnnots) {
srcAnnots.addAll(puaAlignment.getAlignedAnnotations(tgtAnnot));
}
srcAnnots.retainAll(srcAS);
tgtAnnots.retainAll(tgtAS);
srcVisitedAnnots.addAll(srcAnnots);
puaList.add(new PUAPair(this, srcAnnots, tgtAnnots));
}
}
}
else {
// entire document is a single parent of unit
puaList.add(new PUAPair(this, null, null));
}
alignmentActionsManager = new AlignmentActionsManager(this, actionsFilePath);
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:65,代码来源:AlignmentTask.java
示例15: generateInputForMorpher
import gate.util.OffsetComparator; //导入依赖的package包/类
private List<Annotation> generateInputForMorpher(AnnotationSet inputAS,
File outputFile) throws Exception {
OutputStream fout = new FileOutputStream(outputFile);
OutputStreamWriter out = new OutputStreamWriter(fout, charset);
BufferedWriter writer = new BufferedWriter(out);
Iterator sentences = inputAS.get("Sentence").iterator();
AnnotationSet wfs = inputAS.get("WordForm");
List<Annotation> wordforms = new ArrayList<Annotation>(wfs.size());
// * We generate things like a a_AT1 &rasp_colon;0.999748 a_ZZ1
// * &rasp_colon;2.77533e-05 a_II &rasp_colon;0.000223815
while (sentences.hasNext()) {
writer.append("^ ^_^:1\n");
Annotation sentence = (Annotation) sentences.next();
AnnotationSet wfinsentence = wfs.getContained(sentence
.getStartNode().getOffset(), sentence.getEndNode()
.getOffset());
// sort them
List<Annotation> sortedWordForms = new ArrayList<Annotation>(
wfinsentence);
java.util.Collections.sort(sortedWordForms, new OffsetComparator());
Iterator<Annotation> iter = sortedWordForms.iterator();
// create a single entry for word forms located at the same position
Long previousstart = null;
Long previousend = null;
boolean isFirst = true;
while (iter.hasNext()) {
Annotation a = iter.next();
FeatureMap fm = a.getFeatures();
String form = (String) fm.get("string");
String pos = (String) fm.get("pos");
Double prob = (Double) fm.get("probability");
Long laststartoffset = a.getStartNode().getOffset();
Long lastendoffset = a.getEndNode().getOffset();
// do we have a new entity?
if (laststartoffset != previousstart
|| lastendoffset != previousend) {
// finish the line
if (isFirst == false) {
writer.newLine();
}
isFirst = false;
// dump the form as found in the text
String formToken = getDocument().getContent().getContent(
laststartoffset, lastendoffset).toString();
writer.append(formToken);
}
// add the rest anyway
writer.append(" ").append(form).append("_");
writer.append(pos).append(" &rasp_colon;");
writer.append(Double.toString(prob));
wordforms.add(a);
previousstart = laststartoffset;
previousend = lastendoffset;
}
writer.newLine();
}
writer.flush();
writer.close();
return wordforms;
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:71,代码来源:MorphoAnnotator.java
示例16: gatedoc2LabelsComplete
import gate.util.OffsetComparator; //导入依赖的package包/类
/** Get the labels of each instance in the document. */
public void gatedoc2LabelsComplete(AnnotationSet annotations,
String instanceType, String classType, String classFeature) {
AnnotationSet anns = annotations.get(instanceType);
ArrayList annotationArray = (anns == null || anns.isEmpty())
? new ArrayList()
: new ArrayList(anns);
Collections.sort(annotationArray, new OffsetComparator());
if(numInstances != annotationArray.size()) {
System.out.println("!!Warning: the number of instances "
+ new Integer(numInstances) + " in the document " + docId
+ " is not right!!!");
return;
}
// For each of entity
AnnotationSet annsEntity = annotations.get(classType);
for(Object obj : annsEntity) {
Annotation annEntity = (Annotation)obj;
if(annEntity.getFeatures().get(classFeature) == null) continue;
String featName = annEntity.getFeatures().get(classFeature).toString();
featName = featName.trim();
featName = featName.replaceAll(ConstantParameters.SUFFIXSTARTTOKEN,
ConstantParameters.SUFFIXSTARTTOKEN + "_");
featName = featName.replaceAll(ConstantParameters.ITEMSEPARATOR, "_");
//Get the multilabel from one instance
String [] featNameArray = featName.split(ConstantParameters.MULTILABELSEPARATOR);
boolean isStart = true;
for(int i = 0; i < numInstances; ++i) {
Annotation annToken = (Annotation)annotationArray.get(i);
if(annToken.overlaps(annEntity)) {
String featName0 = "";
if(isStart) {
for(int j=0; j<featNameArray.length; ++j) {
if(j>0) featName0 += ConstantParameters.ITEMSEPARATOR;
featName0 += featNameArray[j]+ConstantParameters.SUFFIXSTARTTOKEN;
}
isStart = false;
} else
for(int j=0; j<featNameArray.length; ++j) {
if(j>0) featName0 += ConstantParameters.ITEMSEPARATOR;
featName0 += featNameArray[j];
}
if(featName0.length() > 0) {
if(this.classNames[i] != null)
this.classNames[i] += ConstantParameters.ITEMSEPARATOR
+ featName0;
else this.classNames[i] = featName0;
}
}
}
}
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:53,代码来源:NLPFeaturesOfDoc.java
示例17: gatedoc2NLPFeatures
import gate.util.OffsetComparator; //导入依赖的package包/类
/** Get the Attribute feature for each instance of the document. */
public void gatedoc2NLPFeatures(AnnotationSet annotations,
String instanceType, String[] typesGate, String[] featuresGate,
String[] namesGate, int[] featurePosition) {
int numTypes = typesGate.length;
this.totalnumTypes += numTypes;
for(int i = 0; i < numTypes; ++i) {
this.featuresName.append(namesGate[i] + ConstantParameters.ITEMSEPARATOR);
}
String[] positionArrStr = new String[numTypes];
for(int i = 0; i < numTypes; ++i) {
if(featurePosition[i] != 0)
positionArrStr[i] = obtainPositionStr(featurePosition[i]);
}
AnnotationSet anns = annotations.get(instanceType);
ArrayList<Annotation>annotationArray = (anns == null || anns.isEmpty())
? new ArrayList<Annotation>()
: new ArrayList<Annotation>(anns);
Collections.sort(annotationArray, new OffsetComparator());
String[] features = new String[numTypes];
int numInstances0 = annotationArray.size();
AnnotationSet [] annsArray = new AnnotationSet[numTypes];
for(int j=0; j<numTypes; ++j) {
annsArray[j] = (AnnotationSet)annotations
.get(typesGate[j]);
}
for(int i = 0; i < numInstances0; ++i) {
// for class
Annotation annToken;
for(int j = 0; j < numTypes; j++) {
// for each attribute in different positions, get the token in
// the corresponding position
if(featurePosition[j] == 0)
annToken = (Annotation)annotationArray.get(i);
else if((featurePosition[j] < 0 && i + featurePosition[j] >= 0)
|| (featurePosition[j] > 0 && i + featurePosition[j] < numInstances0))
annToken = (Annotation)annotationArray.get(i + featurePosition[j]);
else continue;
if(typesGate[j].equals(instanceType)) {
features[j] = (String)annToken.getFeatures().get(featuresGate[j]);
} else { // if not belongs to token
Long tokenStartOffset = annToken.getStartNode().getOffset();
Long tokenEndOffset = annToken.getEndNode().getOffset();
features[j] = obtainAnnotationForTypeAndFeature(annsArray[j], featuresGate[j], tokenStartOffset,
tokenEndOffset);
}
// put the name into the feature name
if(features[j] != null) {
features[j] = features[j].trim().replaceAll(
ConstantParameters.ITEMSEPARATOR,
ConstantParameters.ITEMSEPREPLACEMENT);
features[j] = obtainFeatureName(namesGate[j], features[j]);
}
}// end of the loop on the types
int numCounted = 0;
if(featuresInLine[i] == null) featuresInLine[i] = new StringBuffer();
for(int j = 0; j < numTypes; ++j) {
if(features[j] != null) {
++numCounted;
if(featurePosition[j]!=0)
this.featuresInLine[i].append(features[j]
+ positionArrStr[j]+ConstantParameters.ITEMSEPARATOR);
else
this.featuresInLine[i].append(features[j]
+ ConstantParameters.ITEMSEPARATOR);
} else {
if(featurePosition[j]!=0)
this.featuresInLine[i].append(ConstantParameters.NAMENONFEATURE
+ positionArrStr[j]+ConstantParameters.ITEMSEPARATOR);
else
this.featuresInLine[i].append(ConstantParameters.NAMENONFEATURE
+ConstantParameters.ITEMSEPARATOR);
}
featuresCounted[i] += numCounted;
}
}// end of the loop on instances
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:78,代码来源:NLPFeaturesOfDoc.java
示例18: gatedoc2LabelsCompleteRel
import gate.util.OffsetComparator; //导入依赖的package包/类
/** Get the label for the relation learning. */
public void gatedoc2LabelsCompleteRel(AnnotationSet annotations,
String instanceType, String arg1Inst, String arg2Inst, String classType,
String classFeature, String arg1C, String arg2C) {
AnnotationSet anns = annotations.get(instanceType);
ArrayList annotationArray = (anns == null || anns.isEmpty())
? new ArrayList()
: new ArrayList(anns);
Collections.sort(annotationArray, new OffsetComparator());
if(numInstances != annotationArray.size()) {
System.out.println("!!Warning: the number of instances "
+ new Integer(numInstances) + " in the document " + docId
+ " is not right!!!");
return;
}
// For each of entity
AnnotationSet annsEntity = annotations.get(classType);
for(Object obj : annsEntity) {
Annotation annEntity = (Annotation)obj;
if(annEntity.getFeatures().get(classFeature) == null) continue;
String featName = annEntity.getFeatures().get(classFeature).toString();
featName = featName.trim();
featName = featName.replaceAll(ConstantParameters.SUFFIXSTARTTOKEN,
ConstantParameters.SUFFIXSTARTTOKEN + "_");
// Get the values of the entity args
String arg1CV = annEntity.getFeatures().get(arg1C).toString();
String arg2CV = annEntity.getFeatures().get(arg2C).toString();
boolean isStart = true;
for(int i = 0; i < numInstances; ++i) {
Annotation annToken = (Annotation)annotationArray.get(i);
FeatureMap feats = annToken.getFeatures();
if(arg1CV.equals(feats.get(arg1Inst))
&& arg2CV.equals(feats.get(arg2Inst))) {
String featName0 = featName;
if(isStart) {
featName0 += ConstantParameters.SUFFIXSTARTTOKEN;
isStart = false;
}
if(featName0.length() > 0) {
if(this.classNames[i] instanceof String)
this.classNames[i] += ConstantParameters.ITEMSEPARATOR
+ featName0;
else this.classNames[i] = featName0;
}
}
}
}
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:49,代码来源:NLPFeaturesOfDoc.java
示例19: addAnnsInDocClassification
import gate.util.OffsetComparator; //导入依赖的package包/类
/**
* Add the annotation into documents for classification.
*
* @throws InvalidOffsetException
*/
private void addAnnsInDocClassification(Document doc, int[] selectedLabels,
float[] valuesLabels, String instanceType, String featName,
String labelName, Label2Id labelsAndId,
LearningEngineSettings engineSettings) throws InvalidOffsetException {
AnnotationSet annsDoc = null;
if(inputASName == null || inputASName.trim().length() == 0) {
annsDoc = doc.getAnnotations();
} else {
annsDoc = doc.getAnnotations(inputASName);
}
AnnotationSet annsDocResults = null;
if(outputASName == null || outputASName.trim().length() == 0) {
annsDocResults = doc.getAnnotations();
} else {
annsDocResults = doc.getAnnotations(outputASName);
}
AnnotationSet annsLabel = annsDoc.get(labelName);
AnnotationSet anns = annsDoc.get(instanceType);
ArrayList annotationArray = (anns == null || anns.isEmpty())
? new ArrayList()
: new ArrayList(anns);
Collections.sort(annotationArray, new OffsetComparator());
// For the relation extraction
String arg1F = null;
String arg2F = null;
if(engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
AttributeRelation relAtt = (AttributeRelation)engineSettings.datasetDefinition.classAttribute;
arg1F = relAtt.getArg1();
arg2F = relAtt.getArg2();
}
for(int i = 0; i < annotationArray.size(); ++i) {
if(selectedLabels[i] < 0) continue;
FeatureMap features = Factory.newFeatureMap();
features.put(featName, labelsAndId.id2Label.get(
new Integer(selectedLabels[i] + 1).toString()).toString());
features.put("prob", valuesLabels[i]);
Annotation ann = (Annotation)annotationArray.get(i);
// For relation data, need the argument features
if(engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
String arg1V = ann.getFeatures().get(
engineSettings.datasetDefinition.arg1Feat).toString();
String arg2V = ann.getFeatures().get(
engineSettings.datasetDefinition.arg2Feat).toString();
features.put(arg1F, arg1V);
features.put(arg2F, arg2V);
}
// FeatureMap featO = ann.getFeatures();
// for(Object obj:features.keySet()) {
// if(featO.containsKey(obj))
// featO.put(obj.toString()+"_results", features.get(obj));
// else featO.put(obj, features.get(obj));
// }
annsDocResults.add(ann.getStartNode().getOffset(), ann.getEndNode()
.getOffset(), labelName, features);
}
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:62,代码来源:LightWeightLearningApi.java
示例20: addLabelListInDocClassification
import gate.util.OffsetComparator; //导入依赖的package包/类
/**
* Add a ranked list of label for each example in documents for
* classification, not just a single label.
*
* @throws InvalidOffsetException
*/
private void addLabelListInDocClassification(Document doc,
LabelsOfFV[] multiLabels, String instanceType, String featName,
String labelName, Label2Id labelsAndId,
LearningEngineSettings engineSettings) throws InvalidOffsetException {
AnnotationSet annsDoc = null;
if(inputASName == null || inputASName.trim().length() == 0) {
annsDoc = doc.getAnnotations();
} else {
annsDoc = doc.getAnnotations(inputASName);
}
AnnotationSet annsDocResults = null;
if(outputASName == null || outputASName.trim().length() == 0) {
annsDocResults = doc.getAnnotations();
} else {
annsDocResults = doc.getAnnotations(outputASName);
}
AnnotationSet anns = annsDoc.get(instanceType);
ArrayList annotationArray = (anns == null || anns.isEmpty())
? new ArrayList()
: new ArrayList(anns);
Collections.sort(annotationArray, new OffsetComparator());
// For the relation extraction
String arg1F = null;
String arg2F = null;
if(engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
AttributeRelation relAtt = (AttributeRelation)engineSettings.datasetDefinition.classAttribute;
arg1F = relAtt.getArg1();
arg2F = relAtt.getArg2();
}
for(int i = 0; i < annotationArray.size(); ++i) {
int len = multiLabels[i].num;
int[] indexSort = new int[len];
sortFloatAscIndex(multiLabels[i].probs, indexSort, len, len);
// get the labels and their scores
StringBuffer strB = new StringBuffer();
for(int j = 0; j < len; ++j) {
String label = labelsAndId.id2Label.get(
new Integer(indexSort[j] + 1).toString()).toString();
strB.append(label + ":" + multiLabels[i].probs[indexSort[j]] + " ");
}
FeatureMap features = Factory.newFeatureMap();
features.put(featName, strB.toString().trim());
// features.put("prob", valuesLabels[i]);
Annotation ann = (Annotation)annotationArray.get(i);
// For relation data, need the argument features
if(engineSettings.datasetDefinition.dataType == DataSetDefinition.RelationData) {
String arg1V = ann.getFeatures().get(
engineSettings.datasetDefinition.arg1Feat).toString();
String arg2V = ann.getFeatures().get(
engineSettings.datasetDefinition.arg2Feat).toString();
features.put(arg1F, arg1V);
features.put(arg2F, arg2V);
}
FeatureMap featO = ann.getFeatures();
for(Object obj : features.keySet()) {
// if(featO.containsKey(obj))
featO.put(obj.toString() + "_resultsList", features.get(obj));
// else featO.put(obj, features.get(obj));
}
// FeatureMap featAdd = ann.
// for(Object obj:featO.keySet()) {
// annsDocResults.add(ann.).getFeatures().put(obj, featO.get(obj));
// }
annsDocResults.add(ann.getStartNode().getOffset(), ann.getEndNode()
.getOffset(), labelName, featO);
// annsDoc.add(ann.getStartNode(), ann.getEndNode(), labelName, features);
}
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:75,代码来源:LightWeightLearningApi.java
注:本文中的gate.util.OffsetComparator类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论