本文整理汇总了Java中edu.umass.cs.mallet.base.types.Instance类的典型用法代码示例。如果您正苦于以下问题:Java Instance类的具体用法?Java Instance怎么用?Java Instance使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Instance类属于edu.umass.cs.mallet.base.types包,在下文中一共展示了Instance类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: averageTokenAccuracy
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public double averageTokenAccuracy (InstanceList ilist, String fileName)
{
double accuracy = 0;
PrintWriter out;
File f = new File(fileName);
try {
out = new PrintWriter(new FileWriter(f));
}
catch (IOException e) {
out = null;
}
for (int i = 0; i < ilist.size(); i++) {
Instance instance = ilist.getInstance(i);
Sequence input = (Sequence) instance.getData();
Sequence output = (Sequence) instance.getTarget();
assert (input.size() == output.size());
double pathAccuracy = viterbiPath(input).tokenAccuracy(output, out);
accuracy += pathAccuracy;
logger.info ("Transducer path accuracy = "+pathAccuracy);
}
out.close();
return accuracy/ilist.size();
}
开发者ID:clulab,项目名称:reach-banner,代码行数:24,代码来源:Transducer.java
示例2: test
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public void test (Transducer crf, InstanceList data, String description, PrintStream viterbiOutputStream)
{
int correct = 0;
for (int i = 0; i < data.size(); i++) {
Instance instance = data.getInstance(i);
Sequence input = (Sequence) instance.getData();
Sequence trueOutput = (Sequence) instance.getTarget();
assert (input.size() == trueOutput.size());
Sequence predOutput = crf.transduce (input);
assert (predOutput.size() == trueOutput.size());
if (sequencesMatch (trueOutput, predOutput))
correct++;
}
double acc = ((double)correct) / data.size();
logger.info (description+" Num instances = "+data.size()+" Num correct = "+correct);
logger.info (description+" Per-instance accuracy = "+acc);
}
开发者ID:clulab,项目名称:reach-banner,代码行数:18,代码来源:InstanceAccuracyEvaluator.java
示例3: add
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
/** Appends the instance to this list. Note that since memory for
* the Instance has already been allocated, no check is made to
* catch OutOfMemoryError.
* @return <code>true</code> if successful
*/
public boolean add (Instance instance)
{
if (pipe == notYetSetPipe)
pipe = instance.getPipe();
else if (instance.getPipe() != pipe)
// Making sure that the Instance has the same pipe as us.
// xxx This also is a good time check that the constituent data is
// of a consistent type?
throw new IllegalArgumentException ("pipes don't match: instance: "+
instance.getPipe()+" Instance.list: "+
this.pipe);
if (dataClass == null) {
dataClass = instance.data.getClass();
if (pipe != null && pipe.isTargetProcessing())
targetClass = instance.target.getClass();
}
instance.setLock();
boolean ret = instances.add (instance);
inMemory.set(size()-1);
logger.finer ("Added instance " + (size()-1) + ". Free memory remaining (bytes): " +
Runtime.getRuntime().freeMemory());
return ret;
}
开发者ID:clulab,项目名称:reach-banner,代码行数:29,代码来源:PagedInstanceList.java
示例4: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe (Instance carrier)
{
TokenSequence ts = (TokenSequence) carrier.getData();
if (ts.size() > 3
&& (ts.getToken(2).getText().equals("-") || ts.getToken(3).getText().equals("-"))
&& ts.getToken(1).getText().matches("[A-Z]+")) {
String header = ts.getToken(1).getText();
if (header.equals("PRESS")) // Don't bother with "PRESS DIGEST" headers
return carrier;
String featureName = "HEADER="+header;
for (int i = 0; i < ts.size(); i++) {
Token t = ts.getToken(i);
// Only apply this feature to capitalized words, because if we apply it to everything
// we easily get an immense number of possible feature conjunctions, (e.g. every word
// with each of these HEADER= features.
if (t.getText().matches("^[A-Z].*"))
t.setFeatureValue (featureName, 1.0);
}
}
return carrier;
}
开发者ID:clulab,项目名称:reach-banner,代码行数:22,代码来源:TokenSequenceDocHeader.java
示例5: pipeOutputAccumulate
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public void pipeOutputAccumulate (Instance carrier, Pipe iteratedPipe)
{
// xxx ??? assert (iteratedPipe == pipe);
// The assertion above won't be true when using IteratedPipe...
//logger.fine ("pipeOutputAccumulate target="+target);
// These various add() methods below will make sure that the Pipes match appropriately
if (carrier.getData() instanceof InstanceList)
add ((InstanceList)carrier.getData());
else if (carrier.getData() instanceof PipeInputIterator)
add ((PipeInputIterator)carrier.getData());
else if (carrier.getData() instanceof Instance)
add ((Instance)carrier.getData());
else {
if (pipe == notYetSetPipe)
pipe = iteratedPipe;
//System.out.println ("Instance.pipeOuputAccumulate carrier.getSource()="+carrier.getSource());
// Carrier has already been piped; make sure not to repipe it.
add (carrier);
}
}
开发者ID:clulab,项目名称:reach-banner,代码行数:21,代码来源:InstanceList.java
示例6: add
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
/** Appends the instance to this list.
* @return <code>true</code>
*/
public boolean add (Instance instance)
{
if (pipe == notYetSetPipe)
pipe = instance.getPipe();
else if (instance.getPipe() != pipe)
// Making sure that the Instance has the same pipe as us.
// xxx This also is a good time check that the constituent data is
// of a consistent type?
throw new IllegalArgumentException ("pipes don't match: instance: "+
instance.getPipe()+" Instance.list: "+
this.pipe);
if (dataClass == null) {
dataClass = instance.data.getClass();
if (pipe != null && pipe.isTargetProcessing())
if (instance.target != null)
targetClass = instance.target.getClass();
}
return instances.add (instance);
}
开发者ID:clulab,项目名称:reach-banner,代码行数:23,代码来源:InstanceList.java
示例7: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe(Instance carrier, int startingIndex)
{
// System.err.println(pipes.size());
for (int i = startingIndex; i < pipes.size(); i++)
{
// System.err.println("Pipe: " + i);
Pipe p = (Pipe) pipes.get(i);
if (p == null)
{
System.err.println("Pipe is null");
} else
{
carrier = p.pipe(carrier);
}
}
return carrier;
}
开发者ID:clulab,项目名称:reach-banner,代码行数:18,代码来源:SerialPipes.java
示例8: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe (Instance carrier)
{
TokenSequence ts = (TokenSequence) carrier.getData();
int tsSize = ts.size();
for (int i = tsSize-1; i >= 0; i--) {
Token t = ts.getToken (i);
String text = t.getText();
if (featureRegex != null && !featureRegex.matcher(text).matches())
continue;
for (int j = 0; j < i; j++) {
if (ts.getToken(j).getText().equals(text)) {
PropertyList.Iterator iter = ts.getToken(j).getFeatures().iterator();
while (iter.hasNext()) {
iter.next();
String key = iter.getKey();
if (filterRegex == null || (filterRegex.matcher(key).matches() ^ !includeFiltered))
t.setFeatureValue (namePrefix+key, iter.getNumericValue());
}
break;
}
if (firstMentionName != null)
t.setFeatureValue (firstMentionName, 1.0);
}
}
return carrier;
}
开发者ID:clulab,项目名称:reach-banner,代码行数:27,代码来源:FeaturesOfFirstMention.java
示例9: getUnnormalizedClassificationScores
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public void getUnnormalizedClassificationScores (Instance instance, double[] scores)
{
// arrayOutOfBounds if pipe has grown since training
// int numFeatures = getAlphabet().size() + 1;
int numFeatures = this.defaultFeatureIndex + 1;
int numLabels = getLabelAlphabet().size();
assert (scores.length == numLabels);
FeatureVector fv = (FeatureVector) instance.getData (this.instancePipe);
// Make sure the feature vector's feature dictionary matches
// what we are expecting from our data pipe (and thus our notion
// of feature probabilities.
assert (fv.getAlphabet ()
== this.instancePipe.getDataAlphabet ());
// Include the feature weights according to each label
for (int li = 0; li < numLabels; li++) {
scores[li] = parameters[li*numFeatures + defaultFeatureIndex]
+ MatrixOps.rowDotProduct (parameters, numFeatures,
li, fv,
defaultFeatureIndex,
(perClassFeatureSelection == null
? featureSelection
: perClassFeatureSelection[li]));
}
}
开发者ID:clulab,项目名称:reach-banner,代码行数:27,代码来源:MaxEnt.java
示例10: getPositionsAndTypes
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
private void getPositionsAndTypes(Sentence sentence, TagPosition[] positions, MentionType[] types, boolean reverse)
{
Instance instance = new Instance(sentence.getTrainingText(format, reverse), null, sentence.getTag(), null, forwardCRF.getInputPipe());
Sequence tags = forwardCRF.viterbiPath((Sequence)instance.getData()).output();
if (positions.length != tags.size())
throw new IllegalArgumentException();
if (types.length != tags.size())
throw new IllegalArgumentException();
for (int i = 0; i < tags.size(); i++)
{
// The tag string is e.g. "O" or "B-GENE"
String[] split = tags.get(i).toString().split("-");
positions[i] = TagPosition.valueOf(split[0]);
// TODO Verify that the type stays the same
if (split.length == 2)
types[i] = MentionType.getType(split[1]);
}
if (reverse)
{
reverse(positions);
reverse(types);
}
}
开发者ID:leebird,项目名称:legonlp,代码行数:24,代码来源:CRFTagger.java
示例11: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe (Instance carrier)
{
String newTerm = null;
TokenSequence tmpTS = new TokenSequence();
TokenSequence ts = (TokenSequence) carrier.getData();
for (int i = 0; i < ts.size(); i++) {
Token t = ts.getToken(i);
for(int j = 0; j < gramSizes.length; j++) {
int len = gramSizes[j];
if (len <= 0 || len > (i+1)) continue;
if (len == 1) { tmpTS.add(t); continue; }
newTerm = new String(t.getText());
for(int k = 1; k < len; k++)
newTerm = ts.getToken(i-k) + "_" + newTerm;
tmpTS.add(newTerm);
}
}
carrier.setData(tmpTS);
return carrier;
}
开发者ID:clulab,项目名称:reach-banner,代码行数:24,代码来源:TokenSequenceNGrams.java
示例12: train
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
private static CRF4 train(List<Sentence> sentences, int order, boolean useFeatureInduction, TagFormat format, Pipe pipe, boolean reverse)
{
InstanceList instances = new InstanceList(pipe);
for (Sentence sentence : sentences)
{
String text = sentence.getTrainingText(format, reverse);
instances.add(new Instance(text, null, sentence.getTag(), null, pipe));
}
CRF4 crf = new CRF4(pipe, null);
if (order == 1)
crf.addStatesForLabelsConnectedAsIn(instances);
else if (order == 2)
crf.addStatesForBiLabelsConnectedAsIn(instances);
else
throw new IllegalArgumentException("Order must be equal to 1 or 2");
if (useFeatureInduction)
crf.trainWithFeatureInduction(instances, null, null, null, 99999, 100, 10, 1000, 0.5, false, new double[] {.2, .5, .8});
else
crf.train(instances, null, null, (MultiSegmentationEvaluator)null, 99999, 10, new double[] {.2, .5, .8});
return crf;
}
开发者ID:leebird,项目名称:legonlp,代码行数:22,代码来源:CRFTagger.java
示例13: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
/** Convert the data in an <CODE>Instance</CODE> from a CharSequence
* of comma-separated-values to an array, where each index is the
* feature name.
*/
public Instance pipe( Instance carrier ) {
CharSequence c = (CharSequence)carrier.getData();
int nf = countNumberFeatures (c);
if (numberFeatures == -1) // first instance seen
numberFeatures = nf;
else if (numberFeatures != nf)
throw new IllegalArgumentException ("Instances must have same-length feature vectors. length_i: " + numberFeatures + " length_j: " + nf);
double[] feats = new double[numberFeatures];
lexer.setCharSequence (c);
int i=0;
while (lexer.hasNext())
feats[i++] = Double.parseDouble ((String)lexer.next());
carrier.setData (feats);
return carrier;
}
开发者ID:clulab,项目名称:reach-banner,代码行数:22,代码来源:Csv2Array.java
示例14: main
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public static void main (String[] args)
{
try {
for (int i = 0; i < args.length; i++) {
Instance carrier = new Instance (new File(args[i]), null, null, null);
Pipe p = new SerialPipes (new Pipe[] {
new Input2CharSequence (),
new CharSequence2TokenSequence(new CharSequenceLexer())});
carrier = p.pipe (carrier);
TokenSequence ts = (TokenSequence) carrier.getData();
System.out.println ("===");
System.out.println (args[i]);
System.out.println (ts.toString());
}
} catch (Exception e) {
System.out.println (e);
e.printStackTrace();
}
}
开发者ID:clulab,项目名称:reach-banner,代码行数:20,代码来源:CharSequence2TokenSequence.java
示例15: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe (Instance carrier)
{
try {
if (carrier.getData() instanceof URI)
carrier.setData(pipe ((URI)carrier.getData()));
else if (carrier.getData() instanceof File)
carrier.setData(pipe ((File)carrier.getData()));
else if (carrier.getData() instanceof Reader)
carrier.setData(pipe ((Reader)carrier.getData()));
else if (carrier.getData() instanceof CharSequence)
; // No conversion necessary
else
throw new IllegalArgumentException ("Does not handle class "+carrier.getData().getClass());
} catch (java.io.IOException e) {
throw new IllegalArgumentException ("IOException " + e);
}
// System.out.println(carrier.getData().toString());
return carrier;
}
开发者ID:clulab,项目名称:reach-banner,代码行数:22,代码来源:Input2CharSequence.java
示例16: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe (Instance carrier)
{
TokenSequence ts = (TokenSequence) carrier.getData();
// xxx This doesn't seem so efficient. Perhaps have TokenSequence
// use a LinkedList, and remove Tokens from it? -?
// But a LinkedList implementation of TokenSequence would be quite inefficient -AKM
TokenSequence ret = new TokenSequence ();
Token prevToken = null;
for (int i = 0; i < ts.size(); i++) {
Token t = ts.getToken(i);
String s = t.getText();
if (CharSequenceLexer.LEX_ALPHA.matcher(s).matches()) {
ret.add (t);
prevToken = t;
} else if (markDeletions && prevToken != null)
prevToken.setProperty (FeatureSequenceWithBigrams.deletionMark, t.getText());
}
carrier.setData(ret);
return carrier;
}
开发者ID:clulab,项目名称:reach-banner,代码行数:21,代码来源:TokenSequenceRemoveNonAlpha.java
示例17: pipe
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance pipe (Instance carrier)
{
TokenSequence ts = (TokenSequence) carrier.getData();
TokenSequence targetTokenSeq = new TokenSequence (ts.size());
for (int i = 0; i < ts.size(); i++) {
Token t = ts.getToken(i);
Matcher matcher = regex.matcher (t.getText());
if (matcher.matches()) {
targetTokenSeq.add (matcher.group(targetGroup));
t.setText (matcher.group (dataGroup));
} else {
logger.warning ("Skipping token: No match of "+regex.pattern()
+" at token #"+i+" with text "+t.getText());
}
}
carrier.setTarget(targetTokenSeq);
carrier.setData(ts);
return carrier;
}
开发者ID:clulab,项目名称:reach-banner,代码行数:20,代码来源:TokenSequenceMatchDataAndTarget.java
示例18: nextInstance
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance nextInstance ()
{
File nextFile = (File) subIterator.next();
String path = nextFile.getParent();
String targetName = null;
if (targetPattern == STARTING_DIRECTORIES) {
targetName = path.substring(commonPrefixIndex);
}
else if (targetPattern != null) {
Matcher m = targetPattern.matcher(path);
if (m.find ()){
targetName = m.group (1);
}
}
return new Instance (nextFile, targetName, nextFile.toURI(), null);
}
开发者ID:clulab,项目名称:reach-banner,代码行数:19,代码来源:FileListIterator.java
示例19: nextInstance
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance nextInstance ()
{
if (currentInstanceIndex < 0) {
if (currentClassIndex <= 0)
throw new IllegalStateException ("No next FeatureVector.");
currentClassIndex--;
currentInstanceIndex = numInstancesPerClass[currentClassIndex] - 1;
}
URI uri = null;
try { uri = new URI ("random:" + classNames[currentClassIndex] + "/" + currentInstanceIndex); }
catch (Exception e) {e.printStackTrace(); throw new IllegalStateException (); }
//xxx Producing small numbers? int randomSize = r.nextPoisson (featureVectorSizePoissonLambda);
int randomSize = (int)featureVectorSizePoissonLambda;
FeatureVector fv = classCentroid[currentClassIndex].randomFeatureVector (r, randomSize);
//logger.fine ("FeatureVector "+currentClassIndex+" "+currentInstanceIndex); fv.print();
currentInstanceIndex--;
return new Instance (fv, classNames[currentClassIndex], uri, null);
}
开发者ID:clulab,项目名称:reach-banner,代码行数:19,代码来源:RandomFeatureVectorIterator.java
示例20: nextInstance
import edu.umass.cs.mallet.base.types.Instance; //导入依赖的package包/类
public Instance nextInstance ()
{
if (currentInstanceIndex < 0) {
if (currentClassIndex <= 0)
throw new IllegalStateException ("No next TokenSequence.");
currentClassIndex--;
currentInstanceIndex = numInstancesPerClass[currentClassIndex] - 1;
}
URI uri = null;
try { uri = new URI ("random:" + classNames[currentClassIndex] + "/" + currentInstanceIndex); }
catch (Exception e) {e.printStackTrace(); throw new IllegalStateException (); }
//xxx Producing small numbers? int randomSize = r.nextPoisson (featureVectorSizePoissonLambda);
int randomSize = (int)featureVectorSizePoissonLambda;
TokenSequence ts = classCentroid[currentClassIndex].randomTokenSequence (r, randomSize);
//logger.fine ("FeatureVector "+currentClassIndex+" "+currentInstanceIndex); fv.print();
currentInstanceIndex--;
return new Instance (ts, classNames[currentClassIndex], uri, null);
}
开发者ID:clulab,项目名称:reach-banner,代码行数:19,代码来源:RandomTokenSequenceIterator.java
注:本文中的edu.umass.cs.mallet.base.types.Instance类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论