本文整理汇总了Java中cc.mallet.topics.TopicAssignment类的典型用法代码示例。如果您正苦于以下问题:Java TopicAssignment类的具体用法?Java TopicAssignment怎么用?Java TopicAssignment使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
TopicAssignment类属于cc.mallet.topics包,在下文中一共展示了TopicAssignment类的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: FastQParallelTopicModel
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public FastQParallelTopicModel(LabelAlphabet topicAlphabet, double alpha, double beta, boolean useCycleProposals) {
this.useCycleProposals = useCycleProposals;
this.data = new ArrayList<TopicAssignment>();
this.topicAlphabet = topicAlphabet;
this.numTopics = topicAlphabet.size();
this.alphaSum = new double[1];
this.alphaSum[0] = numTopics * alpha;
this.alpha = new double[numTopics];
Arrays.fill(this.alpha, alpha);
this.betaSum = new double[1];
this.beta = new double[1];
this.beta[0] = beta;
this.gamma = new double[1];
this.gamma[0] = 1;
tokensPerTopic = new int[numTopics];
formatter = NumberFormat.getInstance();
formatter.setMaximumFractionDigits(5);
logger.info("FastQ LDA: " + numTopics + " topics ");
}
开发者ID:hmetaxa,项目名称:MixLDA,代码行数:24,代码来源:FastQParallelTopicModel.java
示例2: ParallelTopicModel
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public ParallelTopicModel (LabelAlphabet topicAlphabet, double alphaSum, double beta)
{
this.data = new ArrayList<TopicAssignment>();
this.topicAlphabet = topicAlphabet;
this.numTopics = topicAlphabet.size();
if (Integer.bitCount(numTopics) == 1) {
// exact power of 2
topicMask = numTopics - 1;
topicBits = Integer.bitCount(topicMask);
}
else {
// otherwise add an extra bit
topicMask = Integer.highestOneBit(numTopics) * 2 - 1;
topicBits = Integer.bitCount(topicMask);
}
this.alphaSum = alphaSum;
this.alpha = new double[numTopics];
Arrays.fill(alpha, alphaSum / numTopics);
this.beta = beta;
tokensPerTopic = new int[numTopics];
formatter = NumberFormat.getInstance();
formatter.setMaximumFractionDigits(5);
logger.info("Coded LDA: " + numTopics + " topics, " + topicBits + " topic bits, " +
Integer.toBinaryString(topicMask) + " topic mask");
}
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:32,代码来源:ParallelTopicModel.java
示例3: initializeFromState
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public void initializeFromState(File stateFile) throws IOException {
String line;
String[] fields;
BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(stateFile))));
line = reader.readLine();
// Skip some lines starting with "#" that describe the format and specify hyperparameters
while (line.startsWith("#")) {
line = reader.readLine();
}
fields = line.split(" ");
for (TopicAssignment document: data) {
FeatureSequence tokens = (FeatureSequence) document.instance.getData();
FeatureSequence topicSequence = (FeatureSequence) document.topicSequence;
int[] topics = topicSequence.getFeatures();
for (int position = 0; position < tokens.size(); position++) {
int type = tokens.getIndexAtPosition(position);
if (type == Integer.parseInt(fields[3])) {
topics[position] = Integer.parseInt(fields[5]);
}
else {
System.err.println("instance list and state do not match: " + line);
throw new IllegalStateException();
}
line = reader.readLine();
if (line != null) {
fields = line.split(" ");
}
}
}
buildInitialTypeTopicCounts();
initializeHistograms();
}
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:41,代码来源:ParallelTopicModel.java
示例4: ParallelTopicModel
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public ParallelTopicModel (LabelAlphabet topicAlphabet, double alphaSum, double beta) {
this.data = new ArrayList<TopicAssignment>();
this.topicAlphabet = topicAlphabet;
this.alphaSum = alphaSum;
this.beta = beta;
setNumTopics(topicAlphabet.size());
formatter = NumberFormat.getInstance();
formatter.setMaximumFractionDigits(5);
logger.info("Mallet LDA: " + numTopics + " topics, " + topicBits + " topic bits, " +
Integer.toBinaryString(topicMask) + " topic mask");
}
开发者ID:iamxiatian,项目名称:wikit,代码行数:16,代码来源:ParallelTopicModel.java
示例5: addInstances
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public void addInstances (InstanceList training) {
alphabet = training.getDataAlphabet();
numTypes = alphabet.size();
betaSum = beta * numTypes;
Randoms random = null;
if (randomSeed == -1) {
random = new Randoms();
}
else {
random = new Randoms(randomSeed);
}
for (Instance instance : training) {
FeatureSequence tokens = (FeatureSequence) instance.getData();
LabelSequence topicSequence =
new LabelSequence(topicAlphabet, new int[ tokens.size() ]);
int[] topics = topicSequence.getFeatures();
for (int position = 0; position < topics.length; position++) {
int topic = random.nextInt(numTopics);
topics[position] = topic;
}
TopicAssignment t = new TopicAssignment(instance, topicSequence);
data.add(t);
}
buildInitialTypeTopicCounts();
initializeHistograms();
}
开发者ID:iamxiatian,项目名称:wikit,代码行数:36,代码来源:ParallelTopicModel.java
示例6: ParallelTopicModel_origin
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public ParallelTopicModel_origin (LabelAlphabet topicAlphabet, double alphaSum, double beta)
{
this.data = new ArrayList<TopicAssignment>();
this.topicAlphabet = topicAlphabet;
this.numTopics = topicAlphabet.size();
if (Integer.bitCount(numTopics) == 1) {
// exact power of 2
topicMask = numTopics - 1;
topicBits = Integer.bitCount(topicMask);
}
else {
// otherwise add an extra bit
topicMask = Integer.highestOneBit(numTopics) * 2 - 1;
topicBits = Integer.bitCount(topicMask);
}
this.alphaSum = alphaSum;
this.alpha = new double[numTopics];
Arrays.fill(alpha, alphaSum / numTopics);
this.beta = beta;
tokensPerTopic = new int[numTopics];
formatter = NumberFormat.getInstance();
formatter.setMaximumFractionDigits(5);
logger.info("Coded LDA: " + numTopics + " topics, " + topicBits + " topic bits, " +
Integer.toBinaryString(topicMask) + " topic mask");
}
开发者ID:shalomeir,项目名称:tctm,代码行数:32,代码来源:ParallelTopicModel_origin.java
示例7: FastParallelTopicModel
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public FastParallelTopicModel(LabelAlphabet topicAlphabet, double alphaSum, double beta) {
this.data = new ArrayList<TopicAssignment>();
this.topicAlphabet = topicAlphabet;
this.numTopics = topicAlphabet.size();
if (Integer.bitCount(numTopics) == 1) {
// exact power of 2
topicMask = numTopics - 1;
topicBits = Integer.bitCount(topicMask);
} else {
// otherwise add an extra bit
topicMask = Integer.highestOneBit(numTopics) * 2 - 1;
topicBits = Integer.bitCount(topicMask);
}
this.alphaSum = alphaSum;
this.alpha = new double[numTopics];
Arrays.fill(alpha, alphaSum / numTopics);
this.beta = beta;
tokensPerTopic = new int[numTopics];
formatter = NumberFormat.getInstance();
formatter.setMaximumFractionDigits(5);
logger.info("Coded LDA: " + numTopics + " topics, " + topicBits + " topic bits, "
+ Integer.toBinaryString(topicMask) + " topic mask");
}
开发者ID:hmetaxa,项目名称:MixLDA,代码行数:29,代码来源:FastParallelTopicModel.java
示例8: initializeFromState
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public void initializeFromState(File stateFile) throws IOException {
String line;
String[] fields;
BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(stateFile))));
line = reader.readLine();
// Skip some lines starting with "#" that describe the format and specify hyperparameters
while (line.startsWith("#")) {
line = reader.readLine();
}
fields = line.split(" ");
for (TopicAssignment document : data) {
FeatureSequence tokens = (FeatureSequence) document.instance.getData();
FeatureSequence topicSequence = (FeatureSequence) document.topicSequence;
int[] topics = topicSequence.getFeatures();
for (int position = 0; position < tokens.size(); position++) {
int type = tokens.getIndexAtPosition(position);
if (type == Integer.parseInt(fields[3])) {
topics[position] = Integer.parseInt(fields[5]);
} else {
System.err.println("instance list and state do not match: " + line);
throw new IllegalStateException();
}
line = reader.readLine();
if (line != null) {
fields = line.split(" ");
}
}
}
buildInitialTypeTopicCounts();
initializeHistograms();
}
开发者ID:hmetaxa,项目名称:MixLDA,代码行数:40,代码来源:FastParallelTopicModel.java
示例9: ParallelTopicModel
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public ParallelTopicModel(LabelAlphabet topicAlphabet, double alphaSum, double beta) {
this.data = new ArrayList<TopicAssignment>();
this.topicAlphabet = topicAlphabet;
this.numTopics = topicAlphabet.size();
if (Integer.bitCount(numTopics) == 1) {
// exact power of 2
topicMask = numTopics - 1;
topicBits = Integer.bitCount(topicMask);
} else {
// otherwise add an extra bit
topicMask = Integer.highestOneBit(numTopics) * 2 - 1;
topicBits = Integer.bitCount(topicMask);
}
this.alphaSum = alphaSum;
this.alpha = new double[numTopics];
Arrays.fill(alpha, alphaSum / numTopics);
this.beta = beta;
tokensPerTopic = new int[numTopics];
formatter = NumberFormat.getInstance();
formatter.setMaximumFractionDigits(5);
logger.info("Coded LDA: " + numTopics + " topics, " + topicBits + " topic bits, "
+ Integer.toBinaryString(topicMask) + " topic mask");
}
开发者ID:hmetaxa,项目名称:MixLDA,代码行数:29,代码来源:ParallelTopicModel.java
示例10: initializeFromState
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public void initializeFromState(File stateFile) throws IOException {
String line;
String[] fields;
BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(stateFile))));
line = reader.readLine();
// Skip some lines starting with "#" that describe the format and specify hyperparameters
while (line.startsWith("#")) {
line = reader.readLine();
}
fields = line.split(" ");
for (TopicAssignment document : data) {
FeatureSequence tokens = (FeatureSequence) document.instance.getData();
FeatureSequence topicSequence = (FeatureSequence) document.topicSequence;
int[] topics = topicSequence.getFeatures();
for (int position = 0; position < tokens.size(); position++) {
int type = tokens.getIndexAtPosition(position);
if (type == Integer.parseInt(fields[3])) {
topics[position] = Integer.parseInt(fields[5]);
} else {
System.err.println("instance list and state do not match: " + line);
throw new IllegalStateException();
}
line = reader.readLine();
if (line != null) {
fields = line.split(" ");
}
}
}
initializeHistograms();
buildInitialTypeTopicCounts();
}
开发者ID:hmetaxa,项目名称:MixLDA,代码行数:41,代码来源:FastQParallelTopicModel.java
示例11: readObject
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException {
int version = in.readInt ();
data = (ArrayList<TopicAssignment>) in.readObject ();
alphabet = (Alphabet) in.readObject();
topicAlphabet = (LabelAlphabet) in.readObject();
numTopics = in.readInt();
topicMask = in.readInt();
topicBits = in.readInt();
numTypes = in.readInt();
alpha = (double[]) in.readObject();
alphaSum = in.readDouble();
beta = in.readDouble();
betaSum = in.readDouble();
typeTopicCounts = (int[][]) in.readObject();
tokensPerTopic = (int[]) in.readObject();
docLengthCounts = (int[]) in.readObject();
topicDocCounts = (int[][]) in.readObject();
numIterations = in.readInt();
burninPeriod = in.readInt();
saveSampleInterval = in.readInt();
optimizeInterval = in.readInt();
showTopicsInterval = in.readInt();
wordsPerTopic = in.readInt();
saveStateInterval = in.readInt();
stateFilename = (String) in.readObject();
saveModelInterval = in.readInt();
modelFilename = (String) in.readObject();
randomSeed = in.readInt();
formatter = (NumberFormat) in.readObject();
printLogLikelihood = in.readBoolean();
numThreads = in.readInt();
}
开发者ID:kostagiolasn,项目名称:NucleosomePatternClassifier,代码行数:46,代码来源:ParallelTopicModel.java
示例12: initializeFromState
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public void initializeFromState(File stateFile) throws IOException {
String line;
String[] fields;
BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(stateFile))));
line = reader.readLine();
// Skip some lines starting with "#" that describe the format and specify hyperparameters
while (line.startsWith("#")) {
if (line.startsWith("#alpha : ")) {
line = line.replace("#alpha : ", "");
fields = line.split(" ");
setNumTopics(fields.length);
this.alphaSum = 0.0;
for (int topic = 0; topic < fields.length; topic++) {
this.alpha[topic] = Double.parseDouble(fields[topic]);
this.alphaSum += this.alpha[topic];
}
}
else if (line.startsWith("#beta : ")) {
line = line.replace("#beta : ", "");
this.beta = Double.parseDouble(line);
this.betaSum = beta * numTypes;
}
line = reader.readLine();
}
fields = line.split(" ");
for (TopicAssignment document: data) {
FeatureSequence tokens = (FeatureSequence) document.instance.getData();
FeatureSequence topicSequence = (FeatureSequence) document.topicSequence;
int[] topics = topicSequence.getFeatures();
for (int position = 0; position < tokens.size(); position++) {
int type = tokens.getIndexAtPosition(position);
if (type == Integer.parseInt(fields[3])) {
topics[position] = Integer.parseInt(fields[5]);
}
else {
System.err.println("instance list and state do not match: " + line);
throw new IllegalStateException();
}
line = reader.readLine();
if (line != null) {
fields = line.split(" ");
}
}
}
buildInitialTypeTopicCounts();
initializeHistograms();
}
开发者ID:iamxiatian,项目名称:wikit,代码行数:58,代码来源:ParallelTopicModel.java
示例13: initializeFromState
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public void initializeFromState(File stateFile) throws IOException {
String line;
String[] fields;
BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(stateFile))));
line = reader.readLine();
// Skip some lines starting with "#" that describe the format and specify hyperparameters
while (line.startsWith("#")) {
line = reader.readLine();
}
fields = line.split(" ");
for (MixTopicModelTopicAssignment entity : data) {
for (Byte i = 0; i < numModalities; i++) {
TopicAssignment document = entity.Assignments[i];
if (document != null) {
FeatureSequence tokens = (FeatureSequence) document.instance.getData();
FeatureSequence topicSequence = (FeatureSequence) document.topicSequence;
int[] topics = topicSequence.getFeatures();
for (int position = 0; position < tokens.size(); position++) {
int type = tokens.getIndexAtPosition(position);
if (type == Integer.parseInt(fields[3])) {
topics[position] = Integer.parseInt(fields[7]);
} else {
System.err.println("instance list and state do not match: " + line);
throw new IllegalStateException();
}
line = reader.readLine();
if (line != null) {
fields = line.split(" ");
}
}
}
}
}
buildInitialTypeTopicCounts();
initializeHistograms();
}
开发者ID:hmetaxa,项目名称:MixLDA,代码行数:47,代码来源:MixLDAParallelTopicModel.java
示例14: initializeHistograms
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
/**
* Gather statistics on the size of documents and create histograms for use
* in Dirichlet hyperparameter optimization.
*/
private void initializeHistograms() {
int maxTotalAllModalities = 0;
//int[] maxTokens = new int[numModalities];
histogramSize = new int[numModalities];
Arrays.fill(totalTokens, 0);
// Arrays.fill(maxTokens, 0);
Arrays.fill(histogramSize, 0);
for (MixTopicModelTopicAssignment entity : data) {
for (Byte i = 0; i < numModalities; i++) {
int seqLen;
TopicAssignment document = entity.Assignments[i];
if (document != null) {
FeatureSequence fs = (FeatureSequence) document.instance.getData();
seqLen = fs.getLength();
// if (seqLen > maxTokens) {
// maxTokens = seqLen;
// }
totalTokens[i] += seqLen;
if (seqLen > histogramSize[i]) {
histogramSize[i] = seqLen;
}
}
}
//int maxSize = Math.max(maxLabels, maxTokens);
}
for (Byte i = 0; i < numModalities; i++) {
String infoStr = "Modality<" + i + "> Max tokens per entity: " + histogramSize[i] + ", Total tokens: " + totalTokens[i];
logger.info(infoStr);
appendMetadata(infoStr);
maxTotalAllModalities += histogramSize[i];
}
logger.info("max tokens all modalities: " + maxTotalAllModalities);
//histogramSize = maxTotalAllModalities + 1;
docLengthCounts = new int[numModalities][];
topicDocCounts = new int[numModalities][][];
for (Byte m = 0; m < numModalities; m++) {
docLengthCounts[m] = new int[histogramSize[m] + 1];
topicDocCounts[m] = new int[numTopics][histogramSize[m] + 1];
}
// for (int topic = 0; topic < topicDocCounts.size(); topic++) {
// topicDocCounts.put(topic, new int[docLengthCounts.length]);
// }
}
开发者ID:hmetaxa,项目名称:MixLDA,代码行数:57,代码来源:MixLDAParallelTopicModel.java
示例15: checkConvergence
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public boolean checkConvergence(double convergenceLimit, int prevTopicsNum, int iteration) {
int[] totalModalityTokens = new int[numModalities];
int[] totalConvergedTokens = new int[numModalities];
Arrays.fill(totalModalityTokens, 0);
Arrays.fill(totalConvergedTokens, 0);
boolean converged = true;
for (MixTopicModelTopicAssignment entity : data) {
for (Byte m = 0; m < numModalities; m++) {
TopicAssignment document = entity.Assignments[m];
if (document != null) {
FeatureSequence tokens = (FeatureSequence) document.instance.getData();
//FeatureSequence topicSequence = (FeatureSequence) document.topicSequence;
//int[] topics = topicSequence.getFeatures();
for (int position = 0; position < tokens.size(); position++) {
totalModalityTokens[m]++;
long tmpPreviousTopics = document.prevTopicsSequence[position];
long currentMask = (long) topicMask << 63 - topicBits;
long topTopic = (tmpPreviousTopics & currentMask);
topTopic = topTopic >> 63 - topicBits;
int index = 1;
boolean isSameTopic = true;
while (index < prevTopicsNum && isSameTopic) {
index++;
currentMask = (long) topicMask << 63 - topicBits * index;
long curTopic = tmpPreviousTopics & currentMask;
curTopic = curTopic >> 63 - topicBits * index;
isSameTopic = curTopic == topTopic;
//currentTopic = currentTypeTopicCounts[index] & topicMask;
}
if (isSameTopic) {
totalConvergedTokens[m]++;
}
}
}
}
}
for (Byte m = 0; m < numModalities; m++) {
double rate = (double) totalConvergedTokens[m] / (double) totalModalityTokens[m];
converged = converged && (rate < convergenceLimit);
convergenceRates[m][iteration] = rate;
logger.info("Convergence Rate for modality: " + m + " Converged/Total: " + totalConvergedTokens[m] + "/" + totalModalityTokens[m] + " (%):" + rate);
}
return converged;
}
开发者ID:hmetaxa,项目名称:MixLDA,代码行数:54,代码来源:MixLDAParallelTopicModel.java
示例16: findTopicPhrases
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public TObjectIntHashMap<String>[] findTopicPhrases() {
int numTopics = this.getNumTopics();
TObjectIntHashMap<String>[] phrases = new TObjectIntHashMap[numTopics];
Alphabet alphabet = this.getAlphabet()[0];
// Get counts of phrases in topics
// Search bigrams within corpus to see if they have been assigned to the same topic, adding them to topic phrases
for (int ti = 0; ti < numTopics; ti++) {
phrases[ti] = new TObjectIntHashMap<String>();
}
for (int di = 0; di < this.getData().size(); di++) {
TopicAssignment t = this.getData().get(di).Assignments[0];
if (t != null) {
Instance instance = t.instance;
FeatureSequence fvs = (FeatureSequence) instance.getData();
boolean withBigrams = false;
if (fvs instanceof FeatureSequenceWithBigrams) {
withBigrams = true;
}
int prevtopic = -1;
int prevfeature = -1;
int topic = -1;
StringBuffer sb = null;
int feature = -1;
int doclen = fvs.size();
for (int pi = 0; pi < doclen; pi++) {
feature = fvs.getIndexAtPosition(pi);
topic = t.topicSequence.getIndexAtPosition(pi);
if (topic == prevtopic && (!withBigrams || ((FeatureSequenceWithBigrams) fvs).getBiIndexAtPosition(pi) != -1)) {
if (sb == null) {
sb = new StringBuffer(alphabet.lookupObject(prevfeature).toString() + " " + alphabet.lookupObject(feature));
} else {
sb.append(" ");
sb.append(alphabet.lookupObject(feature));
}
} else if (sb != null) {
String sbs = sb.toString();
//logger.info ("phrase:"+sbs);
if (phrases[prevtopic].get(sbs) == 0) {
phrases[prevtopic].put(sbs, 0);
}
phrases[prevtopic].increment(sbs);
prevtopic = prevfeature = -1;
sb = null;
} else {
prevtopic = topic;
prevfeature = feature;
}
}
}
}
return phrases;
}
开发者ID:hmetaxa,项目名称:MixLDA,代码行数:57,代码来源:MixLDAParallelTopicModel.java
示例17: initializeFromState
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public void initializeFromState(File stateFile) throws IOException {
String line;
String[] fields;
BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(stateFile))));
line = reader.readLine();
// Skip some lines starting with "#" that describe the format and specify hyperparameters
while (line.startsWith("#")) {
line = reader.readLine();
}
fields = line.split(" ");
for (MixTopicModelTopicAssignment entity : data) {
for (Byte i = 0; i < numModalities; i++) {
TopicAssignment document = entity.Assignments[i];
if (document != null) {
FeatureSequence tokens = (FeatureSequence) document.instance.getData();
FeatureSequence topicSequence = (FeatureSequence) document.topicSequence;
int[] topics = topicSequence.getFeatures();
for (int position = 0; position < tokens.size(); position++) {
int type = tokens.getIndexAtPosition(position);
if (type == Integer.parseInt(fields[3])) {
topics[position] = Integer.parseInt(fields[7]);
} else {
System.err.println("instance list and state do not match: " + line);
throw new IllegalStateException();
}
line = reader.readLine();
if (line != null) {
fields = line.split(" ");
}
}
}
}
}
initializeHistograms();
initializeAlphaStatistics(histogramSize);
buildInitialTypeTopicCounts();
}
开发者ID:hmetaxa,项目名称:MixLDA,代码行数:48,代码来源:iMixLDAParallelTopicModel.java
示例18: initializeHistograms
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
/**
* Gather statistics on the size of documents and create histograms for use
* in Dirichlet hyperparameter optimization.
*/
private void initializeHistograms() {
histogramSize = new int[numModalities];
Arrays.fill(histogramSize, 0);
int maxTotalAllModalities = 0;
//int[] maxTokens = new int[numModalities];
//int[] maxTotal = new int[numModalities];
Arrays.fill(totalTokens, 0);
// Arrays.fill(maxTokens, 0);
//Arrays.fill(maxTotal, 0);
for (MixTopicModelTopicAssignment entity : data) {
for (Byte i = 0; i < numModalities; i++) {
int seqLen;
TopicAssignment document = entity.Assignments[i];
if (document != null) {
FeatureSequence fs = (FeatureSequence) document.instance.getData();
seqLen = fs.getLength();
// if (seqLen > maxTokens) {
// maxTokens = seqLen;
// }
totalTokens[i] += seqLen;
if (seqLen > histogramSize[i]) {
histogramSize[i] = seqLen;
}
}
}
//int maxSize = Math.max(maxLabels, maxTokens);
}
for (Byte i = 0; i < numModalities; i++) {
String infoStr = "Modality<" + i + "> Max tokens per entity: " + histogramSize[i] + ", Total tokens: " + totalTokens[i];
logger.info(infoStr);
appendMetadata(infoStr);
//logger.info(" modality: " + i + " total tokens: " + totalTokens[i]);
maxTotalAllModalities += histogramSize[i];
}
logger.info("max tokens all modalities: " + maxTotalAllModalities);
/*TODO: #NewAddition
docLengthCounts = new int[numModalities][];
topicDocCounts = new int[numModalities][][];
for (Byte m = 0; m < numModalities; m++) {
docLengthCounts[m] = new int[histogramSize[m] + 1];
topicDocCounts[m] = new int[numTopics][histogramSize[m] + 1];
}
*/
// histogramSize = maxTotalAllModalities + 1;
//not needed
// docLengthCounts = new int[maxTotalAllModalities + 1];
// topicDocCounts = new TIntObjectHashMap<int[]>(numTopics); //[maxTotalAllModalities + 1];
// for (int topic = 0; topic < topicDocCounts.size(); topic++) {
// topicDocCounts.put(topic, new int[docLengthCounts.length]);
// }
}
开发者ID:hmetaxa,项目名称:MixLDA,代码行数:65,代码来源:iMixLDAParallelTopicModel.java
示例19: getData
import cc.mallet.topics.TopicAssignment; //导入依赖的package包/类
public ArrayList<TopicAssignment> getData() {
return data;
}
开发者ID:hmetaxa,项目名称:MixLDA,代码行数:4,代码来源:FastParallelTopicModel.java
注:本文中的cc.mallet.topics.TopicAssignment类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论