本文整理汇总了Java中edu.stanford.nlp.util.Index类的典型用法代码示例。如果您正苦于以下问题:Java Index类的具体用法?Java Index怎么用?Java Index使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Index类属于edu.stanford.nlp.util包,在下文中一共展示了Index类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getWordDistributionsPerTopic
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
/**
* Analysis and Debugging
* @param wordIndex The index of words to integer IDs.
*/
private List<double[]> getWordDistributionsPerTopic(ClassicCounter<Integer>[] countsBySlot, double smoothing, double smoothingTimesNum, Index<String> wordIndex) {
// System.out.println("Calling getWordDistPerTopic...wordIndex size " + wordIndex.size());
List<double[]> dists = new ArrayList<double[]>(numTopics);
for( int topic = 0; topic < numTopics; topic++ ) {
double[] dist = new double[wordIndex.size()];
dists.add(dist);
for( int ii = 0; ii < wordIndex.size(); ii++ ) {
double probOfWGivenTopic = (countsBySlot[topic].getCount(ii) + smoothing) / (countsBySlot[topic].totalCount() + smoothingTimesNum);
// System.out.println("P(w=" + wordIndex.get(ii) + "|slot=" + topic + ") \t= " + probOfWGivenTopic);
dist[ii] = probOfWGivenTopic;
}
}
return dists;
}
开发者ID:nchambers,项目名称:probschemas,代码行数:20,代码来源:GibbsSamplerEntities.java
示例2: factory
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
/**
* Creates a scorer.
*
* @throws IOException
*/
public static Scorer<String> factory(String scorerName, Counter<String> config, Index<String> featureIndex)
throws IOException {
switch (scorerName) {
case UNIFORM_SCORER:
return new UniformScorer<String>();
case DENSE_SCORER:
return new DenseScorer(config, featureIndex);
case SPARSE_SCORER:
return new SparseScorer(config, featureIndex);
}
throw new RuntimeException(String.format("Unknown scorer \"%s\"",
scorerName));
}
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:21,代码来源:ScorerFactory.java
示例3: IntTaggedWord
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
/**
* Creates an IntTaggedWord given by the tagString and wordString
*/
public IntTaggedWord(String wordString, String tagString,
Index<String> wordIndex, Index<String> tagIndex) {
if (wordString.equals(ANY)) {
word = ANY_WORD_INT;
} else if (wordString.equals(STOP)) {
word = STOP_WORD_INT;
} else {
word = wordIndex.indexOf(wordString, true);
}
if (tagString.equals(ANY)) {
tag = (short) ANY_TAG_INT;
} else if (tagString.equals(STOP)) {
tag = (short) STOP_TAG_INT;
} else {
tag = (short) tagIndex.indexOf(tagString, true);
}
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:21,代码来源:IntTaggedWord.java
示例4: BaseLexicon
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
public BaseLexicon(Options op, Index<String> wordIndex, Index<String> tagIndex) {
this.wordIndex = wordIndex;
this.tagIndex = tagIndex;
flexiTag = op.lexOptions.flexiTag;
useSignatureForKnownSmoothing = op.lexOptions.useSignatureForKnownSmoothing;
this.smoothInUnknownsThreshold = op.lexOptions.smoothInUnknownsThreshold;
this.smartMutation = op.lexOptions.smartMutation;
this.trainOptions = op.trainOptions;
this.testOptions = op.testOptions;
this.op = op;
// Construct UnknownWordModel by reflection -- a right pain
// Lexicons and UnknownWordModels aren't very well encapsulated
// from each other!
if (op.lexOptions.uwModelTrainer == null) {
this.uwModelTrainerClass = "edu.stanford.nlp.parser.lexparser.BaseUnknownWordModelTrainer";
} else {
this.uwModelTrainerClass = op.lexOptions.uwModelTrainer;
}
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:23,代码来源:BaseLexicon.java
示例5: initializeTraining
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
@Override
public void initializeTraining(Options op, Lexicon lex,
Index<String> wordIndex,
Index<String> tagIndex, double totalTrees) {
super.initializeTraining(op, lex, wordIndex, tagIndex, totalTrees);
this.indexToStartUnkCounting = (totalTrees * op.trainOptions.fractionBeforeUnseenCounting);
seenCounter = new ClassicCounter<IntTaggedWord>();
unSeenCounter = new ClassicCounter<IntTaggedWord>();
model = new EnglishUnknownWordModel(op, lex, wordIndex, tagIndex,
unSeenCounter);
// scan data
if (DOCUMENT_UNKNOWNS) {
System.err.println("Collecting " + Lexicon.UNKNOWN_WORD +
" from trees " + (indexToStartUnkCounting + 1) +
" to " + totalTrees);
}
}
开发者ID:amark-india,项目名称:eventspotter,代码行数:22,代码来源:EnglishUnknownWordModelTrainer.java
示例6: Feature
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
/**
* This is if we are given an array of double with a value for each training sample in the order of their occurrence.
*/
public Feature(Experiments e, double[] vals, Index<IntPair> instanceIndex) {
this.instanceIndex = instanceIndex;
Map<Integer, Double> setNonZeros = Generics.newHashMap();
for (int i = 0; i < vals.length; i++) {
if (vals[i] != 0.0) {
Integer in = Integer.valueOf(indexOf(e.get(i)[0], e.get(i)[1]));// new Integer(e.get(i)[0]*e.ySize+e.get(i)[1]);
Double oldVal = setNonZeros.put(in, Double.valueOf(vals[i]));
if (oldVal != null && oldVal.doubleValue() != vals[i]) {
throw new IllegalStateException("Incorrect function specification: Feature has two values at one point: " + oldVal + " and " + vals[i]);
}
}//if
}// for
Integer[] keys = setNonZeros.keySet().toArray(new Integer[setNonZeros.keySet().size()]);
indexedValues = new int[keys.length];
valuesI = new double[keys.length];
for (int j = 0; j < keys.length; j++) {
indexedValues[j] = keys[j].intValue();
valuesI[j] = setNonZeros.get(keys[j]).doubleValue();
} // for
domain = e;
}
开发者ID:jaimeguzman,项目名称:data_mining,代码行数:25,代码来源:Feature.java
示例7: BaseUnknownWordModel
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
public BaseUnknownWordModel(Options op, Lexicon lex,
Index<String> wordIndex,
Index<String> tagIndex,
ClassicCounter<IntTaggedWord> unSeenCounter,
Map<Label,ClassicCounter<String>> tagHash,
Map<String,Float> unknownGT,
Set<String> seenEnd) {
endLength = op.lexOptions.unknownSuffixSize;
// TODO: refactor these terms into BaseUnknownWordModelTrainer
useEnd = (op.lexOptions.unknownSuffixSize > 0 &&
op.lexOptions.useUnknownWordSignatures > 0);
useFirstCap = op.lexOptions.useUnknownWordSignatures > 0;
useGT = (op.lexOptions.useUnknownWordSignatures == 0);
useFirst = false;
this.lex = lex;
this.trainOptions = op.trainOptions;
this.wordIndex = wordIndex;
this.tagIndex = tagIndex;
this.unSeenCounter = unSeenCounter;
this.tagHash = tagHash;
this.seenEnd = seenEnd;
this.unknownGT = unknownGT;
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:24,代码来源:BaseUnknownWordModel.java
示例8: readSVMLightFormat
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
private static RVFDataset<String, String> readSVMLightFormat(String filename, Index<String> featureIndex, Index<String> labelIndex, List<String> lines) {
BufferedReader in = null;
RVFDataset<String, String> dataset;
try {
dataset = new RVFDataset<String, String>(10, featureIndex, labelIndex);
in = new BufferedReader(new FileReader(filename));
while (in.ready()) {
String line = in.readLine();
if (lines != null)
lines.add(line);
dataset.add(svmLightLineToRVFDatum(line));
}
} catch (IOException e) {
throw new RuntimeIOException(e);
} finally {
IOUtils.closeIgnoringExceptions(in);
}
return dataset;
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:21,代码来源:RVFDataset.java
示例9: CRFCliqueTree
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
/** This extra constructor was added to support the CRFCliqueTreeForPartialLabels */
CRFCliqueTree(FactorTable[] factorTables, Index<E> classIndex, E backgroundSymbol, double z) {
this.factorTables = factorTables;
this.z = z;
this.classIndex = classIndex;
this.backgroundSymbol = backgroundSymbol;
backgroundIndex = classIndex.indexOf(backgroundSymbol);
windowSize = factorTables[0].windowSize();
numClasses = classIndex.size();
possibleValues = new int[numClasses];
for (int i = 0; i < numClasses; i++) {
possibleValues[i] = i;
}
// Debug only
// System.out.println("CRFCliqueTree constructed::numClasses: " +
// numClasses);
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:19,代码来源:CRFCliqueTree.java
示例10: Feature
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
/**
* @param vals a value for each (x,y) pair
*/
public Feature(Experiments e, double[][] vals, Index<IntPair> instanceIndex) {
this.instanceIndex = instanceIndex;
domain = e;
int num = 0;
for (int x = 0; x < e.xSize; x++) {
for (int y = 0; y < e.ySize; y++) {
if (vals[x][y] != 0) {
num++;
}
}
}
indexedValues = new int[num];
valuesI = new double[num];
int current = 0;
for (int x = 0; x < e.xSize; x++) {
for (int y = 0; y < e.ySize; y++) {
if (vals[x][y] != 0) {
indexedValues[current] = indexOf(x, y);
valuesI[current] = vals[x][y];
current++;
}//if
}//for
}
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:28,代码来源:Feature.java
示例11: ArabicUnknownWordModel
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
public ArabicUnknownWordModel(Options op, Lexicon lex,
Index<String> wordIndex,
Index<String> tagIndex,
ClassicCounter<IntTaggedWord> unSeenCounter) {
super(op, lex, wordIndex, tagIndex, unSeenCounter, null, null, null);
unknownLevel = op.lexOptions.useUnknownWordSignatures;
if (unknownLevel < MIN_UNKNOWN || unknownLevel > MAX_UNKNOWN) {
if (unknownLevel < MIN_UNKNOWN) {
unknownLevel = MIN_UNKNOWN;
} else if (unknownLevel > MAX_UNKNOWN) {
unknownLevel = MAX_UNKNOWN;
}
System.err.println("Invalid value for useUnknownWordSignatures");
}
this.smartMutation = op.lexOptions.smartMutation;
this.unknownSuffixSize = op.lexOptions.unknownSuffixSize;
this.unknownPrefixSize = op.lexOptions.unknownPrefixSize;
}
开发者ID:amark-india,项目名称:eventspotter,代码行数:19,代码来源:ArabicUnknownWordModel.java
示例12: BaseUnknownWordModel
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
public BaseUnknownWordModel(Options op, Lexicon lex,
Index<String> wordIndex,
Index<String> tagIndex,
ClassicCounter<IntTaggedWord> unSeenCounter,
HashMap<Label,ClassicCounter<String>> tagHash,
HashMap<String,Float> unknownGT,
Set<String> seenEnd) {
endLength = op.lexOptions.unknownSuffixSize;
// TODO: refactor these terms into BaseUnknownWordModelTrainer
useEnd = (op.lexOptions.unknownSuffixSize > 0 &&
op.lexOptions.useUnknownWordSignatures > 0);
useFirstCap = op.lexOptions.useUnknownWordSignatures > 0;
useGT = (op.lexOptions.useUnknownWordSignatures == 0);
useFirst = false;
this.lex = lex;
this.trainOptions = op.trainOptions;
this.wordIndex = wordIndex;
this.tagIndex = tagIndex;
this.unSeenCounter = unSeenCounter;
this.tagHash = tagHash;
this.seenEnd = seenEnd;
this.unknownGT = unknownGT;
}
开发者ID:amark-india,项目名称:eventspotter,代码行数:24,代码来源:BaseUnknownWordModel.java
示例13: getCalibratedCliqueTree
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
/**
* @return a new CRFCliqueTree for the weights on the data
*/
public static <E> CRFCliqueTree<E> getCalibratedCliqueTree(double[] weights, double wscale, int[][] weightIndices,
int[][][] data, List<Index<CRFLabel>> labelIndices, int numClasses, Index<E> classIndex, E backgroundSymbol) {
FactorTable[] factorTables = new FactorTable[data.length];
FactorTable[] messages = new FactorTable[data.length - 1];
for (int i = 0; i < data.length; i++) {
factorTables[i] = getFactorTable(weights, wscale, weightIndices, data[i], labelIndices, numClasses);
if (i > 0) {
messages[i - 1] = factorTables[i - 1].sumOutFront();
factorTables[i].multiplyInFront(messages[i - 1]);
}
}
for (int i = factorTables.length - 2; i >= 0; i--) {
FactorTable summedOut = factorTables[i + 1].sumOutEnd();
summedOut.divideBy(messages[i]);
factorTables[i].multiplyInEnd(summedOut);
}
return new CRFCliqueTree<E>(factorTables, classIndex, backgroundSymbol);
}
开发者ID:benblamey,项目名称:stanford-nlp,代码行数:29,代码来源:CRFCliqueTree.java
示例14: init
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
@Override
public void init(Properties prop, Index<String> featureIndex,
AlignmentTemplates alTemps) {
super.init(prop, featureIndex, alTemps);
// Set counts of "NULL":
fLexCounts.add(0);
eLexCounts.add(0);
// Do we want exact counts?
boolean exact = prop.getProperty(PhraseExtract.EXACT_PHI_OPT, "true")
.equals("true");
this.numPasses = exact ? 2 : 1;
System.err.println("Exact denominator counts for phi(f|e): " + exact);
// IBM lexicalized model?
ibmLexModel = prop.getProperty(PhraseExtract.IBM_LEX_MODEL_OPT, "false")
.equals("true");
if (!ibmLexModel)
alTemps.enableAlignmentCounts(true);
onlyPhi = prop.getProperty(PhraseExtract.ONLY_ML_OPT, "false").equals(
"true");
// Filtering:
phiFilter = Double.parseDouble(prop.getProperty(
PhraseExtract.PTABLE_PHI_FILTER_OPT,
Double.toString(DEFAULT_PHI_FILTER)));
lexFilter = Double.parseDouble(prop.getProperty(
PhraseExtract.PTABLE_LEX_FILTER_OPT,
Double.toString(DEFAULT_LEX_FILTER)));
System.err.printf("Cut-off value for phi(e|f): %.5f\n", phiFilter);
System.err.printf("Cut-off value for lex(e|f): %.5f\n", lexFilter);
}
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:30,代码来源:MosesPharoahFeatureExtractor.java
示例15: readWeights
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
/**
* Read weights from a file. Supports both binary and text formats.
*
* TODO(spenceg) Replace ClassicCounter with our own SparseVector implementation.
*
* @param filename
* @param featureIndex
* @return a counter of weights
* @throws IOException
*/
@SuppressWarnings("unchecked")
public static Counter<String> readWeights(String filename,
Index<String> featureIndex) {
Counter<String> wts = (Counter<String>) deserialize(filename, ClassicCounter.class, SerializationMode.BIN_GZ);
if (wts == null) wts = new ClassicCounter<>();
if (featureIndex != null) {
for (String key : wts.keySet()) {
featureIndex.addToIndex(key);
}
}
return wts;
}
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:23,代码来源:IOTools.java
示例16: DenseFeatureValueCollection
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
public DenseFeatureValueCollection(Collection<? extends FeatureValue<E>> c,
Index<E> featureIndex) {
w = new double[featureIndex.size()];
isDefined = new BitSet(c.size());
this.featureIndex = featureIndex;
for (FeatureValue<E> feature : c) {
int index = featureIndex.addToIndex(feature.name);
w[index] = feature.value;
isDefined.set(index);
}
}
开发者ID:stanfordnlp,项目名称:phrasal,代码行数:12,代码来源:DenseFeatureValueCollection.java
示例17: toBiggestValuesFirstString
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
public static <T> String toBiggestValuesFirstString(Counter<Integer> c, int k, Index<T> index) {
PriorityQueue<Integer> pq = toPriorityQueue(c);
PriorityQueue<T> largestK = new BinaryHeapPriorityQueue<T>();
// while (largestK.size() < k && ((Iterator)pq).hasNext()) { //same as above
while (largestK.size() < k && !pq.isEmpty()) {
double firstScore = pq.getPriority(pq.getFirst());
int first = pq.removeFirst();
largestK.changePriority(index.get(first), firstScore);
}
return largestK.toString();
}
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:12,代码来源:Counters.java
示例18: toCounter
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
public static <T> Counter<T> toCounter(double[] counts, Index<T> index) {
if (index.size()<counts.length) throw new IllegalArgumentException("Index not large enough to name all the array elements!");
Counter<T> c = new ClassicCounter<T>();
for (int i=0; i<counts.length; i++) {
if (counts[i]!=0.0) c.setCount(index.get(i), counts[i]);
}
return c;
}
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:9,代码来源:Counters.java
示例19: MLEDependencyGrammar
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
public MLEDependencyGrammar(TagProjection tagProjection, TreebankLangParserParams tlpParams, boolean directional, boolean useDistance, boolean useCoarseDistance, Options op, Index<String> wordIndex, Index<String> tagIndex) {
super(tlpParams.treebankLanguagePack(), tagProjection, directional, useDistance, useCoarseDistance, op, wordIndex, tagIndex);
useSmoothTagProjection = op.useSmoothTagProjection;
useUnigramWordSmoothing = op.useUnigramWordSmoothing;
argCounter = new ClassicCounter<IntDependency>();
stopCounter = new ClassicCounter<IntDependency>();
double[] smoothParams = tlpParams.MLEDependencyGrammarSmoothingParams();
smooth_aT_hTWd = smoothParams[0];
smooth_aTW_hTWd = smoothParams[1];
smooth_stop = smoothParams[2];
interp = smoothParams[3];
// cdm added Jan 2007 to play with dep grammar smoothing. Integrate this better if we keep it!
smoothTP = new BasicCategoryTagProjection(tlpParams.treebankLanguagePack());
}
开发者ID:paulirwin,项目名称:Stanford.NER.Net,代码行数:16,代码来源:MLEDependencyGrammar.java
示例20: ExhaustiveDependencyParser
import edu.stanford.nlp.util.Index; //导入依赖的package包/类
public ExhaustiveDependencyParser(DependencyGrammar dg, Lexicon lex, Options op, Index<String> wordIndex, Index<String> tagIndex) {
this.dg = dg;
this.lex = lex;
this.op = op;
this.tlp = op.langpack();
this.wordIndex = wordIndex;
this.tagIndex = tagIndex;
tf = new LabeledScoredTreeFactory();
}
开发者ID:chbrown,项目名称:stanford-parser,代码行数:10,代码来源:ExhaustiveDependencyParser.java
注:本文中的edu.stanford.nlp.util.Index类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论