• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Java Sentence类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Java中org.cleartk.token.type.Sentence的典型用法代码示例。如果您正苦于以下问题:Java Sentence类的具体用法?Java Sentence怎么用?Java Sentence使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



Sentence类属于org.cleartk.token.type包,在下文中一共展示了Sentence类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: mpAnalyzerTest

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void mpAnalyzerTest() throws Exception {
  this.jCas.reset();
  tokenBuilder = new TokenBuilder<Token, Sentence>(Token.class, Sentence.class, "pos", "stem");

  this.tokenBuilder.buildTokens(jCas, 
      "jump jumping jumped jumper happy happier happiest", 
      "jump jumping jumped jumper happy happier happiest", 
      "VBP VBG VBP NN JJ JJ JJ"
      );

  mpAnalyzer.process(jCas);

  List<String> expected = Arrays.asList("jump jump jumped jumper happy happier happiest".split(" "));
  List<String> actual = new ArrayList<String>();
  for (Token token : JCasUtil.select(this.jCas, Token.class)) {
    actual.add(token.getLemma());
  }
  Assert.assertEquals(expected, actual);
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:21,代码来源:MpAnalyzerTest.java


示例2: testPeriod

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void testPeriod() throws Exception {
	String text = "The sides was so steep and the bushes so thick. We tramped and clumb. ";
	jCas.setDocumentText(text);
	new Sentence(jCas, 0, 47).addToIndexes();
	new Sentence(jCas, 48, 70).addToIndexes();
	SimplePipeline.runPipeline(jCas, tokenizer);
	int i = 0;
	assertEquals("The", getToken(i++).getCoveredText());
	assertEquals("sides", getToken(i++).getCoveredText());
	assertEquals("was", getToken(i++).getCoveredText());
	assertEquals("so", getToken(i++).getCoveredText());
	assertEquals("steep", getToken(i++).getCoveredText());
	assertEquals("and", getToken(i++).getCoveredText());
	assertEquals("the", getToken(i++).getCoveredText());
	assertEquals("bushes", getToken(i++).getCoveredText());
	assertEquals("so", getToken(i++).getCoveredText());
	assertEquals("thick", getToken(i++).getCoveredText());
	assertEquals(".", getToken(i++).getCoveredText());
	assertEquals("We", getToken(i++).getCoveredText());
	assertEquals("tramped", getToken(i++).getCoveredText());
	assertEquals("and", getToken(i++).getCoveredText());
	assertEquals("clumb", getToken(i++).getCoveredText());
	assertEquals(".", getToken(i++).getCoveredText());

}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:27,代码来源:TokenizerTest.java


示例3: createSentences

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
/**
 * Creates sentences, one for each non-whitespace line, and sets the CAS text.
 */
private void createSentences(String... lines) {
  this.jCas.setDocumentText(Joiner.on("\n").join(lines));
  int offset = 0;
  for (String line : lines) {
    int length = line.length();
    int start = 0;
    while (start < length && Character.isWhitespace(line.charAt(start))) {
      ++start;
    }
    int end = length;
    while (end > 0 && Character.isWhitespace(line.charAt(end - 1))) {
      --end;
    }
    if (start != length && end != 0) {
      Sentence sentence = new Sentence(this.jCas, offset + start, offset + end);
      sentence.addToIndexes();
    }
    offset += length + 1;
  }
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:TokenizerTest.java


示例4: posTaggerTest

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void posTaggerTest() throws Exception {
   this.assumeBigMemoryTestsEnabled();
   this.logger.info(BIG_MEMORY_TEST_MESSAGE);
   
  initDefaultModel();
	this.jCas.reset();
	tokenBuilder = new TokenBuilder<Token, Sentence>(Token.class, Sentence.class, "pos", "stem");

	this.tokenBuilder.buildTokens(
			this.jCas,
			"The brown fox jumped quickly over the lazy dog .",
			"The brown fox jumped quickly over the lazy dog .");
	SimplePipeline.runPipeline(jCas, posTagger);
	
   List<String> expectedPos = Arrays.asList("DT JJ NN VBD RB IN DT JJ NN .".split(" "));
	List<String> actualPos = new ArrayList<String>();
   //List<String> expectedLemma = Arrays.asList("the brown fox jump quickly over the lazy dog .".split(" "));
	//List<String> actualLemma = new ArrayList<String>();
	for (Token token : JCasUtil.select(this.jCas, Token.class)) {
		actualPos.add(token.getPos());
		//actualLemma.add(token.getLemma());
	}
	Assert.assertEquals(expectedPos, actualPos);
	//Assert.assertEquals(expectedLemma, actualLemma);
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:27,代码来源:PosTaggerTest.java


示例5: createSentences

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
/**
 * Creates sentences, one for each non-whitespace line, and sets the CAS text.
 */
private void createSentences(String ... lines) {
  this.jCas.setDocumentText(Joiner.on("\n").join(lines));
  int offset = 0;
  for (String line : lines) {
    int length = line.length();
    int start = 0;
    while (start < length && Character.isWhitespace(line.charAt(start))) {
      ++start;
    }
    int end = length;
    while (end > 0 && Character.isWhitespace(line.charAt(end - 1))) {
      --end;
    }
    if (start != length && end != 0) {
      Sentence sentence = new Sentence(this.jCas, offset + start, offset + end);
      sentence.addToIndexes();
    }
    offset += length + 1;
  }
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:TokenizerAndTokenAnnotatorTest.java


示例6: getDescription

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
public static AnalysisEngineDescription getDescription(String languageCode)
    throws ResourceInitializationException {
  String modelPath = String.format("/models/%s-pos-maxent.bin", languageCode);
  return AnalysisEngineFactory.createEngineDescription(
      opennlp.uima.postag.POSTagger.class,
      opennlp.uima.util.UimaUtil.MODEL_PARAMETER,
      ExternalResourceFactory.createExternalResourceDescription(
          POSModelResourceImpl.class,
          PosTagger.class.getResource(modelPath).toString()),
      opennlp.uima.util.UimaUtil.SENTENCE_TYPE_PARAMETER,
      Sentence.class.getName(),
      opennlp.uima.util.UimaUtil.TOKEN_TYPE_PARAMETER,
      Token.class.getName(),
      opennlp.uima.util.UimaUtil.POS_FEATURE_PARAMETER,
      "pos");
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:PosTagger.java


示例7: test1

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void test1() throws Exception {
  this.sentenceSegmentFile("src/test/resources/data/sentence/test1.txt");

  Sentence sentence = JCasUtil.selectByIndex(jCas, Sentence.class, 0);
  assertEquals("aaaa aaaa aaaa aaaa", sentence.getCoveredText());

  sentence = JCasUtil.selectByIndex(jCas, Sentence.class, 1);
  assertEquals("bbbb", sentence.getCoveredText());

  sentence = JCasUtil.selectByIndex(jCas, Sentence.class, 2);
  assertEquals("ccc cccc ccc cccc", sentence.getCoveredText());

  sentence = JCasUtil.selectByIndex(jCas, Sentence.class, 3);
  assertEquals("dddddd ddd.", sentence.getCoveredText());
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:SentenceAnnotatorTest.java


示例8: testWindowClassNames

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void testWindowClassNames() throws Exception {
  String text = "I bought a lamp. I love lamp. Lamps are great!";
  this.jCas.setDocumentText(text);
  Sentence window = new Sentence(this.jCas, 0, 30);
  window.addToIndexes();

  AnalysisEngineDescription desc = SentenceAnnotator.getDescription();
  ConfigurationParameterFactory.addConfigurationParameter(
      desc,
      SentenceAnnotator.PARAM_WINDOW_CLASS_NAMES,
      new String[] { "org.cleartk.token.type.Sentence" });
  AnalysisEngine engine = AnalysisEngineFactory.createEngine(desc);
  engine.process(this.jCas);
  engine.collectionProcessComplete();

  Collection<Sentence> sentences = JCasUtil.select(this.jCas, Sentence.class);
  Iterator<Sentence> sentenceIter = sentences.iterator();
  assertEquals(3, sentences.size());
  assertEquals(window, sentenceIter.next());
  assertEquals("I bought a lamp.", sentenceIter.next().getCoveredText());
  assertEquals("I love lamp.", sentenceIter.next().getCoveredText());
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:SentenceAnnotatorTest.java


示例9: convert

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
/**
 * Convert to ClearTK <em>Predicate</em> / <em>SemanticArgument</em> annotations and add them to
 * <b>view</b>.
 * 
 * @param view
 *          the view where the annotations should be added
 * @param topNode
 *          the top node annotation of the corresponding Treebank parse
 * @param sentence
 *          the sentence annotation of the corresponding sentence
 * @return the generated <em>Predicate</em> annotation
 */
public Predicate convert(JCas view, TopTreebankNode topNode, Sentence sentence) {
  Predicate p = new Predicate(view);
  p.setPropTxt(this.propTxt);
  p.setAnnotation(this.terminal.convert(view, topNode));
  p.setBegin(p.getAnnotation().getBegin());
  p.setEnd(p.getAnnotation().getEnd());
  p.setSentence(sentence);
  p.setFrameSet(this.frameSet);
  p.setBaseForm(this.baseForm);

  List<Argument> aList = new ArrayList<Argument>();
  for (Proplabel proplabel : this.proplabels) {
    aList.add(proplabel.convert(view, topNode));
  }
  p.setArguments(new FSArray(view, aList.size()));
  FSCollectionFactory.fillArrayFS(p.getArguments(), aList);
  p.addToIndexes();

  return p;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:33,代码来源:Propbank.java


示例10: process

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
  String id = new File(ViewUriUtil.getURI(jCas)).getName();
  PrintWriter outputWriter;
  try {
    outputWriter = new PrintWriter(new File(this.outputDir, id + ".pos"));
  } catch (FileNotFoundException e) {
    throw new AnalysisEngineProcessException(e);
  }
  for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
    for (Token token : JCasUtil.selectCovered(jCas, Token.class, sentence)) {
      outputWriter.print(token.getCoveredText());
      outputWriter.print('/');
      outputWriter.print(token.getPos());
      outputWriter.print(' ');
    }
    outputWriter.println();
  }
  outputWriter.close();
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:21,代码来源:ExamplePosPlainTextWriter.java


示例11: main

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
  Options options = CliFactory.parseArguments(Options.class, args);

  CollectionReader reader = UriCollectionReader.getCollectionReaderFromDirectory(options.getInputDirectory());

  AggregateBuilder builder = new AggregateBuilder();
  builder.add(UriToDocumentTextAnnotator.getDescription());
  builder.add(SentenceAnnotator.getDescription());
  builder.add(AnalysisEngineFactory.createEngineDescription(
      LineWriter.class,
      LineWriter.PARAM_OUTPUT_FILE_NAME,
      options.getOutputFile(),
      LineWriter.PARAM_OUTPUT_ANNOTATION_CLASS_NAME,
      Sentence.class.getName()));

  SimplePipeline.runPipeline(reader, builder.createAggregateDescription());
  System.out.println("results written to " + options.getOutputFile());

}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:20,代码来源:Docs2Sentences.java


示例12: initZmusExtractor

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
private ZeroMeanUnitStddevExtractor<String, DocumentAnnotation> initZmusExtractor()
    throws IOException {
  CombinedExtractor1<DocumentAnnotation> featuresToNormalizeExtractor = new CombinedExtractor1<DocumentAnnotation>(
      new CountAnnotationExtractor<DocumentAnnotation>(Sentence.class),
      new CountAnnotationExtractor<DocumentAnnotation>(Token.class));

  ZeroMeanUnitStddevExtractor<String, DocumentAnnotation> zmusExtractor = new ZeroMeanUnitStddevExtractor<String, DocumentAnnotation>(
      ZMUS_EXTRACTOR_KEY,
      featuresToNormalizeExtractor);

  if (this.zmusUri != null) {
    zmusExtractor.load(this.zmusUri);
  }

  return zmusExtractor;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:DocumentClassificationAnnotator.java


示例13: initMinMaxExtractor

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
private MinMaxNormalizationExtractor<String, DocumentAnnotation> initMinMaxExtractor()
    throws IOException {
  CombinedExtractor1<DocumentAnnotation> featuresToNormalizeExtractor = new CombinedExtractor1<DocumentAnnotation>(
      new CountAnnotationExtractor<DocumentAnnotation>(Sentence.class),
      new CountAnnotationExtractor<DocumentAnnotation>(Token.class));

  MinMaxNormalizationExtractor<String, DocumentAnnotation> minmaxExtractor = new MinMaxNormalizationExtractor<String, DocumentAnnotation>(
      MINMAX_EXTRACTOR_KEY,
      featuresToNormalizeExtractor);

  if (this.minmaxUri != null) {
    minmaxExtractor.load(this.minmaxUri);
  }

  return minmaxExtractor;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:DocumentClassificationAnnotator.java


示例14: getSubordinateEvents

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
private List<Event> getSubordinateEvents(JCas jCas, Event source, Sentence sentence) {
  List<Event> targets = new ArrayList<Event>();
  TreebankNode sourceNode = TreebankNodeUtil.selectMatchingLeaf(jCas, source);
  for (Event target : JCasUtil.selectCovered(jCas, Event.class, sentence)) {
    if (!target.equals(source)) {
      TreebankNode targetNode = TreebankNodeUtil.selectMatchingLeaf(jCas, target);
      if (sourceNode != null && targetNode != null) {
        String path = noLeavesPath(TreebankNodeUtil.getPath(sourceNode, targetNode));
        if (SUBORDINATE_PATH_PATTERN.matcher(path).matches()) {
          targets.add(target);
        }
      }
    }
  }
  return targets;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:TemporalLinkEventToSubordinatedEventAnnotator.java


示例15: getSourceTargetPairs

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Override
protected List<SourceTargetPair> getSourceTargetPairs(JCas jCas) {
  List<SourceTargetPair> pairs = Lists.newArrayList();
  Iterator<Sentence> sentences = JCasUtil.select(jCas, Sentence.class).iterator();
  Sentence prev = sentences.hasNext() ? sentences.next() : null;
  while (sentences.hasNext()) {
    Sentence curr = sentences.next();
    Event source = getMainEvent(jCas, prev);
    Event target = getMainEvent(jCas, curr);
    if (source != null && target != null) {
      pairs.add(new SourceTargetPair(source, target));
    }
    prev = curr;
  }
  return pairs;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:TemporalLinkMainEventToNextSentenceMainEventAnnotator.java


示例16: getMainEvent

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
private Event getMainEvent(JCas jCas, Sentence sentence) {
  // group events by their depth, and record the minimum depth
  Integer minDepth = null;
  Map<Integer, List<Event>> depthEvents = new HashMap<Integer, List<Event>>();
  for (Event event : JCasUtil.selectCovered(jCas, Event.class, sentence)) {
    TreebankNode node = TreebankNodeUtil.selectMatchingLeaf(jCas, event);
    Integer depth = node == null ? null : TreebankNodeUtil.getDepth(node);
    if (!depthEvents.containsKey(depth)) {
      depthEvents.put(depth, new ArrayList<Event>());
    }
    depthEvents.get(depth).add(event);
    if (depth != null && (minDepth == null || depth < minDepth)) {
      minDepth = depth;
    }
  }
  // select the last event
  if (depthEvents.isEmpty()) {
    return null;
  } else {
    List<Event> events = depthEvents.get(minDepth);
    return events.get(events.size() - 1);
  }
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:TemporalLinkMainEventToNextSentenceMainEventAnnotator.java


示例17: process

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
  for (Sentence sentence : JCasUtil.select(jcas, Sentence.class)) {
    Instance<Boolean> instance = new Instance<Boolean>(false, this.extractor.extract(
        jcas,
        sentence));

    if (this.isTraining()) {
      this.dataWriter.write(instance);
    } else {
      Map<Boolean, Double> scoredOutcomes = this.classifier.score(instance.getFeatures());
      Double trueScore = scoredOutcomes.get(true);
      if (trueScore > 0.0) {
        SummarySentence extractedSentence = new SummarySentence(
            jcas,
            sentence.getBegin(),
            sentence.getEnd());
        extractedSentence.setScore(trueScore);
        extractedSentence.addToIndexes();
      }
    }
  }
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:SumBasicAnnotator.java


示例18: createTokenCountsExtractor

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
private FeatureExtractor1<Sentence> createTokenCountsExtractor() {
  FeatureExtractor1<Token> tokenFieldExtractor = new CoveredTextExtractor<Token>();
  switch (this.tokenField) {
    case COVERED_TEXT:
      tokenFieldExtractor = new CoveredTextExtractor<Token>();
      break;
    case STEM:
      tokenFieldExtractor = new TypePathExtractor<Token>(Token.class, "stem");
      break;
    case LEMMA:
      tokenFieldExtractor = new TypePathExtractor<Token>(Token.class, "lemma");
      break;
  }

  CleartkExtractor<Sentence, Token> countsExtractor = new CleartkExtractor<Sentence, Token>(
      Token.class,
      new StopwordRemovingExtractor<Token>(this.stopwords, tokenFieldExtractor),
      new CleartkExtractor.Count(new CleartkExtractor.Covered()));

  return countsExtractor;
}
 
开发者ID:ClearTK,项目名称:cleartk,代码行数:22,代码来源:SumBasicAnnotator.java


示例19: process

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Override
  public void process(JCas jCas) throws AnalysisEngineProcessException {
	// save all the design decisions
	for (DesignDecision dd : JCasUtil.select(jCas, DesignDecision.class)) {
		if (!designDecisionSet.contains(dd.getCoveredText()))
			designDecisionSet.add(dd.getCoveredText());
	}
	// get all the sentences
	List<Sentence> sentences = new ArrayList<Sentence>(JCasUtil.select(jCas, Sentence.class));

	// loop sentences and remove those that are in the designDecisionSet
	for (Sentence sentence : sentences) {
		if (designDecisionSet.contains(sentence.getCoveredText()))
			sentence.removeFromIndexes();
	}
}
 
开发者ID:germanattanasio,项目名称:traceability-assistant,代码行数:17,代码来源:DesignDecisionSentenceRemover.java


示例20: writeSentences

import org.cleartk.token.type.Sentence; //导入依赖的package包/类
/**
  * Write the concern annotations into @param outputFilename.
  *
  * @param inputDirectory the input directory
  * @param outputFilename the output filename
  * @throws ResourceInitializationException the resource initialization exception
  * @throws UIMAException the uIMA exception
  * @throws IOException Signals that an I/O exception has occurred.
  */
 public void writeSentences(String inputDirectory,String outputFilename) throws ResourceInitializationException, UIMAException, IOException {
  File filesDirectory = new File(inputDirectory);	  
    SimplePipeline.runPipeline(
    	UriCollectionReader.getDescriptionFromDirectory(filesDirectory),
    	UriToXmiCasAnnotator.getDescription(),
    	
    	AnalysisEngineFactory.createEngineDescription(
    	        LineWriter.class,
    	        LineWriter.PARAM_OUTPUT_FILE_NAME,
    	        outputFilename,
    	        LineWriter.PARAM_OUTPUT_ANNOTATION_CLASS_NAME,
    	        Sentence.class.getName(),
    	        LineWriter.PARAM_ANNOTATION_WRITER_CLASS_NAME,
    	        CoveredTextAnnotationWriter.class.getName())    	
    );
}
 
开发者ID:germanattanasio,项目名称:traceability-assistant,代码行数:26,代码来源:CarchaPipeline.java



注:本文中的org.cleartk.token.type.Sentence类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Java RexInputRef类代码示例发布时间:2022-05-22
下一篇:
Java Paintable类代码示例发布时间:2022-05-22
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap