本文整理汇总了Java中ixa.kaflib.KAFDocument类的典型用法代码示例。如果您正苦于以下问题:Java KAFDocument类的具体用法?Java KAFDocument怎么用?Java KAFDocument使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
KAFDocument类属于ixa.kaflib包,在下文中一共展示了KAFDocument类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: slot2opinionsFromAnnotations
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
/**
* Read NAF file containing ATE annotations in the entity layer and print them in Semeval-absa 2015 format
*
* @param naf
* @throws ParserConfigurationException
* @throws Exception
*/
public void slot2opinionsFromAnnotations(String naf) throws ParserConfigurationException, Exception
{
int oId = 0;
KAFDocument kaf = KAFDocument.createFromFile(new File(naf));
for (Entity e : kaf.getEntities())
{
oId++;
//create and add opinion to the structure
String polarity ="";
String cat = "";
String trgt = e.getStr();
int offsetFrom = e.getTerms().get(0).getWFs().get(0).getOffset();
List<WF> entWFs = e.getTerms().get(e.getTerms().size()-1).getWFs();
int offsetTo = entWFs.get(entWFs.size()-1).getOffset()+entWFs.get(entWFs.size()-1).getLength();
String sId = e.getTerms().get(0).getWFs().get(0).getXpath();
Opinion op = new Opinion("o"+oId, trgt, offsetFrom, offsetTo, polarity, cat, sId);
this.addOpinion(op);
}
print2Semeval2015format("EliXa_Arun.xml");
}
开发者ID:Elhuyar,项目名称:Elixa,代码行数:29,代码来源:CorpusReader.java
示例2: tagSentence
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
/**
* Process linguistically input sentence with ixa-pipes (tokenization and PoS tagging).
* A tagged file is generated for each sentence in the corpus and stored in the directory
* given as argument. Sentence Ids are used as file names. If a tagged file already exists
* that sentence is not tagged
*
* @param nafdir : path to the directory were tagged files should be stored
* @param posModel : model to be used by the PoS tagger
* @throws IOException
* @throws JDOMException
*/
public String tagSentence(String sId, String nafdir, String posModel, String lemmatizerModel) throws IOException, JDOMException
{
KAFDocument kafinst = new KAFDocument("","");
String kafname = sId.replace(':', '_');
String kafPath = nafdir+File.separator+kafname+".kaf";
if (FileUtilsElh.checkFile(kafPath))
{
System.err.println("CorpusReader::tagSentence : file already there:"+kafPath);
}
/* if language is basque 'posModel' argument can be used to pass the path to the
* basque morphological analyzer eustagger. If the path does not contain "eustagger" or "euslem"
* (usual executable names for the tagger) it defaults to ixa-pipes.
* */
else if (eustagger.matcher(posModel).find())
{
int ok =NLPpipelineWrapper.eustaggerCall(posModel, getSentences().get(sId), nafdir+File.separator+kafname);
}
else
{
kafinst = NLPpipelineWrapper.ixaPipesTokPos(getSentences().get(sId), lang, posModel, lemmatizerModel);
kafinst.save(kafPath);
}
return kafPath;
}
开发者ID:Elhuyar,项目名称:Elixa,代码行数:37,代码来源:CorpusReader.java
示例3: tagSentenceTab
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
/**
* Process linguistically input sentence with ixa-pipes (tokenization and PoS tagging).
* A tagged file is generated for each sentence in the corpus and stored in the directory
* given as argument. Sentence Ids are used as file names. If a tagged file already exists
* that sentence is not tagged
*
* @param nafdir : path to the directory were tagged files should be stored
* @param posModel : model to be used by the PoS tagger
* @throws IOException
* @throws JDOMException
*/
public String tagSentenceTab(String sId, String nafdir, String posModel, String lemmatizerModel) throws IOException, JDOMException
{
KAFDocument kafinst = new KAFDocument("","");
String savename = sId.replace(':', '_');
String savePath = nafdir+File.separator+savename+".kaf";
if (FileUtilsElh.checkFile(savePath))
{
System.err.println("CorpusReader::tagSentence : file already there:"+savePath);
}
/* if language is basque 'posModel' argument can be used to pass the path to the
* basque morphological analyzer eustagger. If the path does not contain "eustagger" or "euslem"
* (usual executable names for the tagger) it defaults to ixa-pipes.
* */
else if (eustagger.matcher(posModel).find())
{
int ok =NLPpipelineWrapper.eustaggerCall(posModel, getSentences().get(sId), nafdir+File.separator+savename);
}
else
{
String conll = NLPpipelineWrapper.ixaPipesTokPosConll(getSentences().get(sId), lang, posModel, lemmatizerModel);
FileUtils.writeStringToFile(new File(savePath), conll);
}
return savePath;
}
开发者ID:Elhuyar,项目名称:Elixa,代码行数:37,代码来源:CorpusReader.java
示例4: extractPOStags
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
/**
* POS tags ngram extraction from a kaf document
*
* @param int length : which 'n' use for 'n-grams'
* @param KAFDocument kafDoc : postagged kaf document to extract ngrams from.
* @param boolean save : safe ngrams to file or not.
* @return TreeSet<String> return lemma ngrams of length length
*/
public TreeSet<String> extractPOStags(KAFDocument kafDoc, boolean save)
{
TreeSet<String> result = new TreeSet<String>();
for (Term term : kafDoc.getTerms())
{
String pos = "POS_"+term.getPos();
// for good measure, test that the lemma is not already included as a feature
if (! getAttIndexes().containsKey(pos))
{
addNumericFeature(pos);
result.add(pos);
}
}
return result;
}
开发者ID:Elhuyar,项目名称:Elixa,代码行数:25,代码来源:Features.java
示例5: getMainTerm
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
private static Term getMainTerm(final KAFDocument document, final Span<Term> span) {
@SuppressWarnings("deprecation")
Term term = span.getHead();
if (term == null) {
term = document.getTermsHead(span.getTargets());
span.setHead(term);
}
outer: while (true) {
for (final Dep dep : document.getDepsFromTerm(term)) {
if (dep.getRfunc().equals("VC") || dep.getRfunc().equals("IM")) {
term = dep.getTo();
continue outer;
}
}
break;
}
return term;
}
开发者ID:dkmfbk,项目名称:pikes,代码行数:19,代码来源:SpanLabeller.java
示例6: ixaPipesPosConll
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
/**
* Processes a given string with the Ixa-pipe PoS tagger.
*
* @param KAFDocument tokenizedKaf: tokenized input text in KAF format
* @param String posModelPath : path to the pos tagger model
*
* @return KAFDocument : PoStagged input text in KAF format
*
* @throws IOException
* @throws JDOMException
*/
public static String ixaPipesPosConll(KAFDocument tokenizedKaf, String posModelPath, String lemmaModelPath) throws IOException, JDOMException
{
//KAFDocument.LinguisticProcessor posLp = tokenizedKaf.addLinguisticProcessor(
// "terms", "ixa-pipe-pos-"+FileUtilsElh.fileName(posModelPath), "v1.naf" + "-" + "elixa");
//pos tagger parameters
if (! FileUtilsElh.checkFile(posModelPath))
{
System.err.println("NLPpipelineWrapper::ixaPipesPos() - provided pos model path is problematic, "
+ "probably pos tagging will end up badly...");
}
if (! FileUtilsElh.checkFile(lemmaModelPath))
{
System.err.println("NLPpipelineWrapper::ixaPipesPos() - provided lemma model path is problematic, "
+ "probably pos tagging will end up badly...");
}
Properties posProp = setPostaggerProperties(posModelPath,lemmaModelPath,tokenizedKaf.getLang(), "false", "false");
//pos tagger call
eus.ixa.ixa.pipe.pos.Annotate postagger = new eus.ixa.ixa.pipe.pos.Annotate(posProp);
return (postagger.annotatePOSToCoNLL(tokenizedKaf));
//System.err.println("NLPpipelineWrapper::ixaPipesPos - pos tagging ready");
}
开发者ID:Elhuyar,项目名称:Elixa,代码行数:35,代码来源:NLPpipelineWrapper.java
示例7: ixaPipesNERC
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
/**
* Processes a given string with the Ixa-pipe NERC tagger.
*
* @param KAFDocument tokenizedKaf: tokenized input text in KAF format
* @param String nercModelPath : path to the NERC tagger model
*
* @return KAFDocument : NERC tagged input text in KAF format
*
* @throws IOException
* @throws JDOMException
*/
public static KAFDocument ixaPipesNERC(KAFDocument tokenizedKaf, String nercModelPath, String lexer, String dictTag, String dictPath) throws IOException, JDOMException
{
KAFDocument.LinguisticProcessor nercLp = tokenizedKaf.addLinguisticProcessor(
"entities", "ixa-pipe-pos-"+FileUtilsElh.fileName(nercModelPath), "v1.naf" + "-" + "elixa");
//pos tagger parameters
if (! FileUtilsElh.checkFile(nercModelPath))
{
System.err.println("NLPpipelineWrapper : ixaPipesPos() - provided pos model path is problematic, "
+ "probably pos tagging will end up badly...");
}
Properties nercProp = setIxaPipesNERCProperties(nercModelPath,
tokenizedKaf.getLang(), lexer, dictTag, dictPath);
//pos tagger call
eus.ixa.ixa.pipe.nerc.Annotate nerctagger = new eus.ixa.ixa.pipe.nerc.Annotate(nercProp);
nercLp.setBeginTimestamp();
nerctagger.annotateNEsToKAF(tokenizedKaf);
nercLp.setEndTimestamp();
return tokenizedKaf;
//System.err.println("pos tagging amaituta");
}
开发者ID:Elhuyar,项目名称:Elixa,代码行数:35,代码来源:NLPpipelineWrapper.java
示例8: service
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
@Override
public void service(Request request, Response response) throws Exception {
super.service(request, response);
String naf = request.getParameter("naf");
KAFDocument document = KAFDocument.createFromStream(new StringReader(naf));
NAFFilter filter = NAFFilter.builder().withSRLRoleLinking(false, false).withOpinionLinking(false, false).build();
RDFGenerator generator = RDFGenerator.DEFAULT;
Renderer renderer = Renderer.DEFAULT;
filter.filter(document);
final Model model = generator.generate(document, null);
StringWriter writer = new StringWriter();
renderer.renderAll(writer, document, model, null, null);
String res = writer.toString();
super.writeOutput(response, "text/html", res);
}
开发者ID:dkmfbk,项目名称:pikes,代码行数:21,代码来源:NafVisualizeHandler.java
示例9: iterator
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
@Override
public Iterator<KAFDocument> iterator() {
return new UnmodifiableIterator<KAFDocument>() {
private int index = 0;
@Override
public boolean hasNext() {
return this.index < Corpus.this.files.length;
}
@Override
public KAFDocument next() {
return get(this.index++);
}
};
}
开发者ID:dkmfbk,项目名称:pikes,代码行数:19,代码来源:Corpus.java
示例10: add
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
public Evaluator add(final KAFDocument document, final String goldLabel,
final String testLabel) {
final Multimap<Integer, Opinion> goldMap = indexOpinionsBySentence(document
.getOpinions(goldLabel));
final Multimap<Integer, Opinion> testMap = indexOpinionsBySentence(document
.getOpinions(testLabel));
for (int i = 0; i < document.getNumSentences(); ++i) {
final Collection<Opinion> goldOpinions = goldMap.get(i);
final Collection<Opinion> testOpinions = testMap.get(i);
if (!goldOpinions.isEmpty() || !testOpinions.isEmpty()) {
add(goldOpinions, testOpinions);
}
}
return this;
}
开发者ID:dkmfbk,项目名称:pikes,代码行数:19,代码来源:OpinionPrecisionRecall.java
示例11: hasHead
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
public static boolean hasHead(final KAFDocument document, final Object annotation,
final Term head) {
List<Span<Term>> spans;
if (annotation instanceof Coref) {
spans = ((Coref) annotation).getSpans();
} else if (annotation instanceof Entity) {
spans = ((Entity) annotation).getSpans();
} else if (annotation instanceof Timex3) {
spans = ImmutableList.of(KAFDocument.newTermSpan(document
.getTermsByWFs(((Timex3) annotation).getSpan().getTargets())));
} else if (annotation instanceof Predicate) {
spans = ImmutableList.of(((Predicate) annotation).getSpan());
} else if (annotation instanceof Role) {
spans = ImmutableList.of(((Role) annotation).getSpan());
} else {
throw new IllegalArgumentException("Unsupported annotation: " + annotation);
}
for (final Span<Term> span : spans) {
if (head == extractHead(document, span)) {
return true;
}
}
return false;
}
开发者ID:dkmfbk,项目名称:pikes,代码行数:25,代码来源:NAFUtils.java
示例12: generate
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
public void generate(final KAFDocument document,
@Nullable final Iterable<Integer> sentenceIDs, final RDFHandler handler)
throws RDFHandlerException {
final boolean[] ids = new boolean[document.getNumSentences() + 1];
if (sentenceIDs == null) {
Arrays.fill(ids, true);
} else {
for (final Integer sentenceID : sentenceIDs) {
ids[sentenceID] = true;
}
}
final String baseURI = document.getPublic().uri;
new Extractor(baseURI, handler, document, ids).run();
}
开发者ID:dkmfbk,项目名称:pikes,代码行数:17,代码来源:RDFGenerator.java
示例13: termRangesFor
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
public static List<Range> termRangesFor(final KAFDocument document, final Iterable<Term> terms) {
final List<Range> ranges = Lists.newArrayList();
int startIndex = -1;
int lastIndex = -2;
for (final Term term : Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms)) {
final int termIndex = document.getTerms().indexOf(term);
if (termIndex - lastIndex > 1) {
if (startIndex >= 0) {
ranges.add(Range.create(startIndex, lastIndex + 1));
}
startIndex = termIndex;
}
lastIndex = termIndex;
}
if (startIndex != -1 && lastIndex >= startIndex) {
ranges.add(Range.create(startIndex, lastIndex + 1));
}
return ranges;
}
开发者ID:dkmfbk,项目名称:pikes,代码行数:20,代码来源:NAFUtils.java
示例14: splitSpan
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
public static final List<Span<Term>> splitSpan(final KAFDocument document,
final Span<Term> span, final Iterable<Term> heads) {
final Set<Term> excludedTerms = document.getTermsByDepDescendants(heads);
final List<Span<Term>> spans = Lists.newArrayList();
for (final Term head : heads) {
final Set<Term> terms = document.getTermsByDepAncestors(ImmutableSet.of(head));
terms.removeAll(excludedTerms);
terms.add(head);
terms.retainAll(span.getTargets());
if (!terms.isEmpty()) {
spans.add(KAFDocument.newTermSpan(Ordering.from(Term.OFFSET_COMPARATOR)
.sortedCopy(terms), head));
}
}
return spans;
}
开发者ID:dkmfbk,项目名称:pikes,代码行数:18,代码来源:NAFUtils.java
示例15: isActiveFormHelper
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
private static Boolean isActiveFormHelper(final KAFDocument document, final Term term) {
final Dep dep = document.getDepToTerm(term);
if (dep == null) {
return Boolean.FALSE;
}
final Term parent = dep.getFrom();
final String word = parent.getStr().toLowerCase();
final String pos = parent.getMorphofeat();
if (pos.startsWith("NN")) {
return Boolean.FALSE;
}
if (word.matches("am|are|is|was|were|be|been|being")) {
return Boolean.FALSE;
}
if (word.matches("ha(ve|s|d|ving)")) {
return Boolean.TRUE;
}
if (pos.matches("VBZ|VBD|VBP|MD")) {
return Boolean.FALSE;
}
return isActiveFormHelper(document, parent);
}
开发者ID:dkmfbk,项目名称:pikes,代码行数:24,代码来源:NAFUtils.java
示例16: matchExtendedPos
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
public static java.util.function.Predicate<Term> matchExtendedPos(final KAFDocument document,
final String... posPrefixes) {
return new java.util.function.Predicate<Term>() {
@Override
public boolean test(final Term term) {
final String pos = extendedPos(document, term);
for (final String prefix : posPrefixes) {
if (pos.startsWith(prefix)) {
return true;
}
}
return false;
}
};
}
开发者ID:dkmfbk,项目名称:pikes,代码行数:18,代码来源:NAFUtils.java
示例17: label
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
public Map<Term, Float> label(final KAFDocument document, final Term root) {
// Identify candidate terms. For each of them, compute features and apply classifier to
// determine whether candidate term should be selected and with what probability estimate
// Return a map mapping selected terms to their probability estimates
final Map<Term, Float> map = Maps.newHashMap();
for (final Term term : candidates(document, root, this.posPrefixes)) {
final Vector vector = features(document, root, term);
final LabelledVector outVector = this.classifier.predict(true, vector);
final float p = outVector.getProbability(SELECTED);
if (outVector.getLabel() == SELECTED) {
map.put(term, p >= 0.5f ? p : 0.5f);
} else {
map.put(term, p < 0.5f ? p : 0.5f);
}
}
return map;
}
开发者ID:dkmfbk,项目名称:pikes,代码行数:19,代码来源:LinkLabeller.java
示例18: text2naf
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
static public KAFDocument text2naf(String text) {
KAFDocument doc = new KAFDocument("en", "v3");
doc.setRawText(text);
String date = "";
try {
date = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", new Locale("en")).format(new Date());
} catch (Exception e) {
LOGGER.error(e.getMessage());
}
KAFDocument.Public p = doc.createPublic();
p.uri = "http://www.example.com";
p.publicId = "0";
KAFDocument.FileDesc d = doc.createFileDesc();
d.creationtime = date;
d.author = "Unknown author";
d.filename = "test.xml";
d.title = "Unknown title";
return doc;
}
开发者ID:dkmfbk,项目名称:pikes,代码行数:24,代码来源:CorpusPreprocessor.java
示例19: addArguments
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
private void addArguments(final KAFDocument document, final int sentence,
final Term expressionHead, final Iterable<Span<Term>> argSpans,
final LinkLabeller.Trainer linkTrainer, final SpanLabeller.Trainer spanTrainer) {
// Extract heads and spans of the arguments (only where defined)
final List<Term> heads = Lists.newArrayList();
final List<Span<Term>> spans = Lists.newArrayList();
for (final Span<Term> span : argSpans) {
final Term head = NAFUtils.extractHead(document, span);
if (head != null) {
heads.add(head);
spans.add(span);
}
}
// Add a sample for node labelling
linkTrainer.add(document, expressionHead, heads);
// Add samples for span labelling (one for each argument)
for (int i = 0; i < heads.size(); ++i) {
final List<Term> excludedTerms = Lists.newArrayList(heads);
excludedTerms.remove(heads.get(i));
spanTrainer.add(document, heads.get(i), excludedTerms, spans.get(i));
}
}
开发者ID:dkmfbk,项目名称:pikes,代码行数:26,代码来源:PipelineTrainer.java
示例20: match
import ixa.kaflib.KAFDocument; //导入依赖的package包/类
public final boolean match(final KAFDocument document, final Iterable<Term> terms,
final Term head) {
final Term[][] solutions = matchRecursive(document, terms, head);
if (solutions != null) {
outer:
for (final Term[] solution : solutions) {
for (int i = 0; i < this.tokens.size(); ++i) {
if (solution[i] == null) {
continue outer;
}
}
return true;
}
}
return false;
}
开发者ID:dkmfbk,项目名称:pikes,代码行数:17,代码来源:Lexicon.java
注:本文中的ixa.kaflib.KAFDocument类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论