本文整理汇总了Java中org.cleartk.token.type.Sentence类的典型用法代码示例。如果您正苦于以下问题:Java Sentence类的具体用法?Java Sentence怎么用?Java Sentence使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Sentence类属于org.cleartk.token.type包,在下文中一共展示了Sentence类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: mpAnalyzerTest
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void mpAnalyzerTest() throws Exception {
this.jCas.reset();
tokenBuilder = new TokenBuilder<Token, Sentence>(Token.class, Sentence.class, "pos", "stem");
this.tokenBuilder.buildTokens(jCas,
"jump jumping jumped jumper happy happier happiest",
"jump jumping jumped jumper happy happier happiest",
"VBP VBG VBP NN JJ JJ JJ"
);
mpAnalyzer.process(jCas);
List<String> expected = Arrays.asList("jump jump jumped jumper happy happier happiest".split(" "));
List<String> actual = new ArrayList<String>();
for (Token token : JCasUtil.select(this.jCas, Token.class)) {
actual.add(token.getLemma());
}
Assert.assertEquals(expected, actual);
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:21,代码来源:MpAnalyzerTest.java
示例2: testPeriod
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void testPeriod() throws Exception {
String text = "The sides was so steep and the bushes so thick. We tramped and clumb. ";
jCas.setDocumentText(text);
new Sentence(jCas, 0, 47).addToIndexes();
new Sentence(jCas, 48, 70).addToIndexes();
SimplePipeline.runPipeline(jCas, tokenizer);
int i = 0;
assertEquals("The", getToken(i++).getCoveredText());
assertEquals("sides", getToken(i++).getCoveredText());
assertEquals("was", getToken(i++).getCoveredText());
assertEquals("so", getToken(i++).getCoveredText());
assertEquals("steep", getToken(i++).getCoveredText());
assertEquals("and", getToken(i++).getCoveredText());
assertEquals("the", getToken(i++).getCoveredText());
assertEquals("bushes", getToken(i++).getCoveredText());
assertEquals("so", getToken(i++).getCoveredText());
assertEquals("thick", getToken(i++).getCoveredText());
assertEquals(".", getToken(i++).getCoveredText());
assertEquals("We", getToken(i++).getCoveredText());
assertEquals("tramped", getToken(i++).getCoveredText());
assertEquals("and", getToken(i++).getCoveredText());
assertEquals("clumb", getToken(i++).getCoveredText());
assertEquals(".", getToken(i++).getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:27,代码来源:TokenizerTest.java
示例3: createSentences
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
/**
* Creates sentences, one for each non-whitespace line, and sets the CAS text.
*/
private void createSentences(String... lines) {
this.jCas.setDocumentText(Joiner.on("\n").join(lines));
int offset = 0;
for (String line : lines) {
int length = line.length();
int start = 0;
while (start < length && Character.isWhitespace(line.charAt(start))) {
++start;
}
int end = length;
while (end > 0 && Character.isWhitespace(line.charAt(end - 1))) {
--end;
}
if (start != length && end != 0) {
Sentence sentence = new Sentence(this.jCas, offset + start, offset + end);
sentence.addToIndexes();
}
offset += length + 1;
}
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:TokenizerTest.java
示例4: posTaggerTest
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void posTaggerTest() throws Exception {
this.assumeBigMemoryTestsEnabled();
this.logger.info(BIG_MEMORY_TEST_MESSAGE);
initDefaultModel();
this.jCas.reset();
tokenBuilder = new TokenBuilder<Token, Sentence>(Token.class, Sentence.class, "pos", "stem");
this.tokenBuilder.buildTokens(
this.jCas,
"The brown fox jumped quickly over the lazy dog .",
"The brown fox jumped quickly over the lazy dog .");
SimplePipeline.runPipeline(jCas, posTagger);
List<String> expectedPos = Arrays.asList("DT JJ NN VBD RB IN DT JJ NN .".split(" "));
List<String> actualPos = new ArrayList<String>();
//List<String> expectedLemma = Arrays.asList("the brown fox jump quickly over the lazy dog .".split(" "));
//List<String> actualLemma = new ArrayList<String>();
for (Token token : JCasUtil.select(this.jCas, Token.class)) {
actualPos.add(token.getPos());
//actualLemma.add(token.getLemma());
}
Assert.assertEquals(expectedPos, actualPos);
//Assert.assertEquals(expectedLemma, actualLemma);
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:27,代码来源:PosTaggerTest.java
示例5: createSentences
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
/**
* Creates sentences, one for each non-whitespace line, and sets the CAS text.
*/
private void createSentences(String ... lines) {
this.jCas.setDocumentText(Joiner.on("\n").join(lines));
int offset = 0;
for (String line : lines) {
int length = line.length();
int start = 0;
while (start < length && Character.isWhitespace(line.charAt(start))) {
++start;
}
int end = length;
while (end > 0 && Character.isWhitespace(line.charAt(end - 1))) {
--end;
}
if (start != length && end != 0) {
Sentence sentence = new Sentence(this.jCas, offset + start, offset + end);
sentence.addToIndexes();
}
offset += length + 1;
}
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:TokenizerAndTokenAnnotatorTest.java
示例6: getDescription
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
public static AnalysisEngineDescription getDescription(String languageCode)
throws ResourceInitializationException {
String modelPath = String.format("/models/%s-pos-maxent.bin", languageCode);
return AnalysisEngineFactory.createEngineDescription(
opennlp.uima.postag.POSTagger.class,
opennlp.uima.util.UimaUtil.MODEL_PARAMETER,
ExternalResourceFactory.createExternalResourceDescription(
POSModelResourceImpl.class,
PosTagger.class.getResource(modelPath).toString()),
opennlp.uima.util.UimaUtil.SENTENCE_TYPE_PARAMETER,
Sentence.class.getName(),
opennlp.uima.util.UimaUtil.TOKEN_TYPE_PARAMETER,
Token.class.getName(),
opennlp.uima.util.UimaUtil.POS_FEATURE_PARAMETER,
"pos");
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:PosTagger.java
示例7: test1
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void test1() throws Exception {
this.sentenceSegmentFile("src/test/resources/data/sentence/test1.txt");
Sentence sentence = JCasUtil.selectByIndex(jCas, Sentence.class, 0);
assertEquals("aaaa aaaa aaaa aaaa", sentence.getCoveredText());
sentence = JCasUtil.selectByIndex(jCas, Sentence.class, 1);
assertEquals("bbbb", sentence.getCoveredText());
sentence = JCasUtil.selectByIndex(jCas, Sentence.class, 2);
assertEquals("ccc cccc ccc cccc", sentence.getCoveredText());
sentence = JCasUtil.selectByIndex(jCas, Sentence.class, 3);
assertEquals("dddddd ddd.", sentence.getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:SentenceAnnotatorTest.java
示例8: testWindowClassNames
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Test
public void testWindowClassNames() throws Exception {
String text = "I bought a lamp. I love lamp. Lamps are great!";
this.jCas.setDocumentText(text);
Sentence window = new Sentence(this.jCas, 0, 30);
window.addToIndexes();
AnalysisEngineDescription desc = SentenceAnnotator.getDescription();
ConfigurationParameterFactory.addConfigurationParameter(
desc,
SentenceAnnotator.PARAM_WINDOW_CLASS_NAMES,
new String[] { "org.cleartk.token.type.Sentence" });
AnalysisEngine engine = AnalysisEngineFactory.createEngine(desc);
engine.process(this.jCas);
engine.collectionProcessComplete();
Collection<Sentence> sentences = JCasUtil.select(this.jCas, Sentence.class);
Iterator<Sentence> sentenceIter = sentences.iterator();
assertEquals(3, sentences.size());
assertEquals(window, sentenceIter.next());
assertEquals("I bought a lamp.", sentenceIter.next().getCoveredText());
assertEquals("I love lamp.", sentenceIter.next().getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:SentenceAnnotatorTest.java
示例9: convert
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
/**
* Convert to ClearTK <em>Predicate</em> / <em>SemanticArgument</em> annotations and add them to
* <b>view</b>.
*
* @param view
* the view where the annotations should be added
* @param topNode
* the top node annotation of the corresponding Treebank parse
* @param sentence
* the sentence annotation of the corresponding sentence
* @return the generated <em>Predicate</em> annotation
*/
public Predicate convert(JCas view, TopTreebankNode topNode, Sentence sentence) {
Predicate p = new Predicate(view);
p.setPropTxt(this.propTxt);
p.setAnnotation(this.terminal.convert(view, topNode));
p.setBegin(p.getAnnotation().getBegin());
p.setEnd(p.getAnnotation().getEnd());
p.setSentence(sentence);
p.setFrameSet(this.frameSet);
p.setBaseForm(this.baseForm);
List<Argument> aList = new ArrayList<Argument>();
for (Proplabel proplabel : this.proplabels) {
aList.add(proplabel.convert(view, topNode));
}
p.setArguments(new FSArray(view, aList.size()));
FSCollectionFactory.fillArrayFS(p.getArguments(), aList);
p.addToIndexes();
return p;
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:33,代码来源:Propbank.java
示例10: process
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
String id = new File(ViewUriUtil.getURI(jCas)).getName();
PrintWriter outputWriter;
try {
outputWriter = new PrintWriter(new File(this.outputDir, id + ".pos"));
} catch (FileNotFoundException e) {
throw new AnalysisEngineProcessException(e);
}
for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
for (Token token : JCasUtil.selectCovered(jCas, Token.class, sentence)) {
outputWriter.print(token.getCoveredText());
outputWriter.print('/');
outputWriter.print(token.getPos());
outputWriter.print(' ');
}
outputWriter.println();
}
outputWriter.close();
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:21,代码来源:ExamplePosPlainTextWriter.java
示例11: main
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Options options = CliFactory.parseArguments(Options.class, args);
CollectionReader reader = UriCollectionReader.getCollectionReaderFromDirectory(options.getInputDirectory());
AggregateBuilder builder = new AggregateBuilder();
builder.add(UriToDocumentTextAnnotator.getDescription());
builder.add(SentenceAnnotator.getDescription());
builder.add(AnalysisEngineFactory.createEngineDescription(
LineWriter.class,
LineWriter.PARAM_OUTPUT_FILE_NAME,
options.getOutputFile(),
LineWriter.PARAM_OUTPUT_ANNOTATION_CLASS_NAME,
Sentence.class.getName()));
SimplePipeline.runPipeline(reader, builder.createAggregateDescription());
System.out.println("results written to " + options.getOutputFile());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:20,代码来源:Docs2Sentences.java
示例12: initZmusExtractor
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
private ZeroMeanUnitStddevExtractor<String, DocumentAnnotation> initZmusExtractor()
throws IOException {
CombinedExtractor1<DocumentAnnotation> featuresToNormalizeExtractor = new CombinedExtractor1<DocumentAnnotation>(
new CountAnnotationExtractor<DocumentAnnotation>(Sentence.class),
new CountAnnotationExtractor<DocumentAnnotation>(Token.class));
ZeroMeanUnitStddevExtractor<String, DocumentAnnotation> zmusExtractor = new ZeroMeanUnitStddevExtractor<String, DocumentAnnotation>(
ZMUS_EXTRACTOR_KEY,
featuresToNormalizeExtractor);
if (this.zmusUri != null) {
zmusExtractor.load(this.zmusUri);
}
return zmusExtractor;
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:DocumentClassificationAnnotator.java
示例13: initMinMaxExtractor
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
private MinMaxNormalizationExtractor<String, DocumentAnnotation> initMinMaxExtractor()
throws IOException {
CombinedExtractor1<DocumentAnnotation> featuresToNormalizeExtractor = new CombinedExtractor1<DocumentAnnotation>(
new CountAnnotationExtractor<DocumentAnnotation>(Sentence.class),
new CountAnnotationExtractor<DocumentAnnotation>(Token.class));
MinMaxNormalizationExtractor<String, DocumentAnnotation> minmaxExtractor = new MinMaxNormalizationExtractor<String, DocumentAnnotation>(
MINMAX_EXTRACTOR_KEY,
featuresToNormalizeExtractor);
if (this.minmaxUri != null) {
minmaxExtractor.load(this.minmaxUri);
}
return minmaxExtractor;
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:DocumentClassificationAnnotator.java
示例14: getSubordinateEvents
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
private List<Event> getSubordinateEvents(JCas jCas, Event source, Sentence sentence) {
List<Event> targets = new ArrayList<Event>();
TreebankNode sourceNode = TreebankNodeUtil.selectMatchingLeaf(jCas, source);
for (Event target : JCasUtil.selectCovered(jCas, Event.class, sentence)) {
if (!target.equals(source)) {
TreebankNode targetNode = TreebankNodeUtil.selectMatchingLeaf(jCas, target);
if (sourceNode != null && targetNode != null) {
String path = noLeavesPath(TreebankNodeUtil.getPath(sourceNode, targetNode));
if (SUBORDINATE_PATH_PATTERN.matcher(path).matches()) {
targets.add(target);
}
}
}
}
return targets;
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:TemporalLinkEventToSubordinatedEventAnnotator.java
示例15: getSourceTargetPairs
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Override
protected List<SourceTargetPair> getSourceTargetPairs(JCas jCas) {
List<SourceTargetPair> pairs = Lists.newArrayList();
Iterator<Sentence> sentences = JCasUtil.select(jCas, Sentence.class).iterator();
Sentence prev = sentences.hasNext() ? sentences.next() : null;
while (sentences.hasNext()) {
Sentence curr = sentences.next();
Event source = getMainEvent(jCas, prev);
Event target = getMainEvent(jCas, curr);
if (source != null && target != null) {
pairs.add(new SourceTargetPair(source, target));
}
prev = curr;
}
return pairs;
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:17,代码来源:TemporalLinkMainEventToNextSentenceMainEventAnnotator.java
示例16: getMainEvent
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
private Event getMainEvent(JCas jCas, Sentence sentence) {
// group events by their depth, and record the minimum depth
Integer minDepth = null;
Map<Integer, List<Event>> depthEvents = new HashMap<Integer, List<Event>>();
for (Event event : JCasUtil.selectCovered(jCas, Event.class, sentence)) {
TreebankNode node = TreebankNodeUtil.selectMatchingLeaf(jCas, event);
Integer depth = node == null ? null : TreebankNodeUtil.getDepth(node);
if (!depthEvents.containsKey(depth)) {
depthEvents.put(depth, new ArrayList<Event>());
}
depthEvents.get(depth).add(event);
if (depth != null && (minDepth == null || depth < minDepth)) {
minDepth = depth;
}
}
// select the last event
if (depthEvents.isEmpty()) {
return null;
} else {
List<Event> events = depthEvents.get(minDepth);
return events.get(events.size() - 1);
}
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:TemporalLinkMainEventToNextSentenceMainEventAnnotator.java
示例17: process
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
for (Sentence sentence : JCasUtil.select(jcas, Sentence.class)) {
Instance<Boolean> instance = new Instance<Boolean>(false, this.extractor.extract(
jcas,
sentence));
if (this.isTraining()) {
this.dataWriter.write(instance);
} else {
Map<Boolean, Double> scoredOutcomes = this.classifier.score(instance.getFeatures());
Double trueScore = scoredOutcomes.get(true);
if (trueScore > 0.0) {
SummarySentence extractedSentence = new SummarySentence(
jcas,
sentence.getBegin(),
sentence.getEnd());
extractedSentence.setScore(trueScore);
extractedSentence.addToIndexes();
}
}
}
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:24,代码来源:SumBasicAnnotator.java
示例18: createTokenCountsExtractor
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
private FeatureExtractor1<Sentence> createTokenCountsExtractor() {
FeatureExtractor1<Token> tokenFieldExtractor = new CoveredTextExtractor<Token>();
switch (this.tokenField) {
case COVERED_TEXT:
tokenFieldExtractor = new CoveredTextExtractor<Token>();
break;
case STEM:
tokenFieldExtractor = new TypePathExtractor<Token>(Token.class, "stem");
break;
case LEMMA:
tokenFieldExtractor = new TypePathExtractor<Token>(Token.class, "lemma");
break;
}
CleartkExtractor<Sentence, Token> countsExtractor = new CleartkExtractor<Sentence, Token>(
Token.class,
new StopwordRemovingExtractor<Token>(this.stopwords, tokenFieldExtractor),
new CleartkExtractor.Count(new CleartkExtractor.Covered()));
return countsExtractor;
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:22,代码来源:SumBasicAnnotator.java
示例19: process
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
// save all the design decisions
for (DesignDecision dd : JCasUtil.select(jCas, DesignDecision.class)) {
if (!designDecisionSet.contains(dd.getCoveredText()))
designDecisionSet.add(dd.getCoveredText());
}
// get all the sentences
List<Sentence> sentences = new ArrayList<Sentence>(JCasUtil.select(jCas, Sentence.class));
// loop sentences and remove those that are in the designDecisionSet
for (Sentence sentence : sentences) {
if (designDecisionSet.contains(sentence.getCoveredText()))
sentence.removeFromIndexes();
}
}
开发者ID:germanattanasio,项目名称:traceability-assistant,代码行数:17,代码来源:DesignDecisionSentenceRemover.java
示例20: writeSentences
import org.cleartk.token.type.Sentence; //导入依赖的package包/类
/**
* Write the concern annotations into @param outputFilename.
*
* @param inputDirectory the input directory
* @param outputFilename the output filename
* @throws ResourceInitializationException the resource initialization exception
* @throws UIMAException the uIMA exception
* @throws IOException Signals that an I/O exception has occurred.
*/
public void writeSentences(String inputDirectory,String outputFilename) throws ResourceInitializationException, UIMAException, IOException {
File filesDirectory = new File(inputDirectory);
SimplePipeline.runPipeline(
UriCollectionReader.getDescriptionFromDirectory(filesDirectory),
UriToXmiCasAnnotator.getDescription(),
AnalysisEngineFactory.createEngineDescription(
LineWriter.class,
LineWriter.PARAM_OUTPUT_FILE_NAME,
outputFilename,
LineWriter.PARAM_OUTPUT_ANNOTATION_CLASS_NAME,
Sentence.class.getName(),
LineWriter.PARAM_ANNOTATION_WRITER_CLASS_NAME,
CoveredTextAnnotationWriter.class.getName())
);
}
开发者ID:germanattanasio,项目名称:traceability-assistant,代码行数:26,代码来源:CarchaPipeline.java
注:本文中的org.cleartk.token.type.Sentence类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论