本文整理汇总了C++中Vocab类的典型用法代码示例。如果您正苦于以下问题:C++ Vocab类的具体用法?C++ Vocab怎么用?C++ Vocab使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Vocab类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: PrintSentence
void ParallelCorpus::PrintSentence(
const Sentence& sentence, const Vocab& vocab, std::ostream& out) const {
if (sentence.size() > 0) {
out << vocab.GetWord(sentence.at(0));
}
for (int i = 1; i < sentence.size(); ++i) {
out << " " << vocab.GetWord(sentence.at(i));
}
}
开发者ID:jrs026,项目名称:SentenceAlignment,代码行数:9,代码来源:parallel_corpus.cpp
示例2: Print
void PackedTrie::Print(const Vocab& source_vocab, const Vocab& target_vocab,
std::ostream& out) const {
for (int s = 0; s < source_count_; ++s) {
for (int i = offsets_[s]; i < offsets_[s + 1]; ++i) {
out << source_vocab.GetWord(s) << "\t"
<< target_vocab.GetWord(target_words_[i]) << "\t"
<< exp(data_[i]) << std::endl;
}
}
}
开发者ID:jrs026,项目名称:SentenceAlignment,代码行数:10,代码来源:packed_trie.cpp
示例3: getBigramProb
//Get P(W2 | W1) -- bigram
double getBigramProb(const char *w1, const char *w2, Vocab &voc, Ngram &lm){
VocabIndex wid1 = voc.getIndex(w1);
VocabIndex wid2 = voc.getIndex(w2);
if(wid1 == Vocab_None) //OOV
wid1 = voc.getIndex(Vocab_Unknown);
if(wid2 == Vocab_None){ //OOV
wid2 = voc.getIndex(Vocab_Unknown);
return -20;
}
VocabIndex context[] = { wid1, Vocab_None };
return lm.wordProb( wid2, context);
}
开发者ID:hbtsai,项目名称:pdp_hw4_problems,代码行数:13,代码来源:disambig_my.cpp
示例4: CreateFromString
void Word::CreateFromString(const std::string &inString, Vocab &vocab)
{
if (inString.substr(0, 1) == "[" && inString.substr(inString.size() - 1, 1) == "]") {
// non-term
m_isNonTerminal = true;
string str = inString.substr(1, inString.size() - 2);
m_vocabId = vocab.AddVocabId(str);
} else {
m_isNonTerminal = false;
m_vocabId = vocab.AddVocabId(inString);
}
}
开发者ID:Avmb,项目名称:mosesdecoder,代码行数:13,代码来源:Word.cpp
示例5: CreateVocabMap
void CorpusReader::CreateVocabMap(const Vocab& corpus_vocab,
const vector< vector<string> >& filter_vocab,
vector<IntIntMap>* lookup) {
assert(corpus_vocab.has_language());
int lang = corpus_vocab.language();
if (lang >= (int)lookup->size()) lookup->resize(lang + 1);
if (filter_vocab[lang].size() > 0) {
cout << "Adding vocab for language " << lang << "(" <<
corpus_vocab.terms().size() << ")" << endl;
CreateFilteredMap(corpus_vocab, filter_vocab[lang], &(*lookup)[lang]);
} else {
cout << "Skipping language " << lang << endl;
}
}
开发者ID:NetBUG,项目名称:topicmod,代码行数:14,代码来源:corpus_reader.cpp
示例6: CreateUnfilteredMap
void CorpusReader::CreateUnfilteredMap(const Vocab& proto_voc,
StringIntMap* lookup,
IntIntMap* mapping) {
for (int ii = 0; ii < proto_voc.terms_size(); ++ii) {
const lib_corpora_proto::Vocab_Entry& word = proto_voc.terms(ii);
string term = word.original();
if (lookup->find(term) == lookup->end()) {
int new_id = lookup->size();
(*lookup)[term] = new_id;
// cout << "Adding " << term << " with id " << new_id << endl;
}
(*mapping)[word.id()] = (*lookup)[term];
// cout << "---------------" << endl;
}
}
开发者ID:NetBUG,项目名称:topicmod,代码行数:15,代码来源:corpus_reader.cpp
示例7: ConvertToMoses
void Word::ConvertToMoses(
const std::vector<Moses::FactorType> &outputFactorsVec,
const Vocab &vocab,
Moses::Word &overwrite) const {
Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance();
overwrite = Moses::Word(m_isNonTerminal);
// TODO: this conversion should have been done at load time.
util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
UTIL_THROW_IF(!tok, util::Exception, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
overwrite.SetFactor(*t, factorColl.AddFactor(*tok));
}
UTIL_THROW_IF(tok, util::Exception, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
}
开发者ID:Avmb,项目名称:mosesdecoder,代码行数:16,代码来源:Word.cpp
示例8: LoadVocabulary
void SparseHieroReorderingFeature::LoadVocabulary(const std::string& filename, Vocab& vocab)
{
if (filename.empty()) return;
ifstream in(filename.c_str());
UTIL_THROW_IF(!in, util::Exception, "Unable to open vocab file: " << filename);
string line;
while(getline(in,line)) {
vocab.insert(FactorCollection::Instance().AddFactor(line));
}
in.close();
}
开发者ID:tangym,项目名称:mosesdecoder,代码行数:11,代码来源:SparseHieroReorderingFeature.cpp
示例9: CreateFilteredMap
void CorpusReader::CreateFilteredMap(const Vocab& corpus_voc,
const vector<string>& filter_voc,
IntIntMap* id_lookup) {
map<string, int> new_id;
// RHS will be new vocab
for (int ii = 0; ii < (int)filter_voc.size(); ++ii) {
new_id[filter_voc[ii]] = ii;
}
// LHS will be old vocab
for (int ii = 0; ii < corpus_voc.terms_size(); ++ii) {
const lib_corpora_proto::Vocab_Entry& word = corpus_voc.terms(ii);
string term = word.original();
if (new_id.find(term) != new_id.end()) {
(*id_lookup)[word.id()] = new_id[term];
// cout << word.id() << "->" << new_id[term] << "(term)" << endl;
}
}
}
开发者ID:NetBUG,项目名称:topicmod,代码行数:20,代码来源:corpus_reader.cpp
示例10: extractBinaryfromStream
void extractBinaryfromStream(const char * inputStream, Vocab & textHash,
vector < tuple <int *, int > > & src_batch, vector < tuple <int *, int > > & tgt_batch, int isFilter, int debugLines)
{
ifstream infile;
infile.open(inputStream, ifstream::in);
string line;
int lineIdx = 0;
while (getline(infile, line))
{
stringstream linestream(line);
string src, tgt;
getline(linestream, src, '\t');
getline(linestream, tgt, '\t');
int src_token_num = 0;
int tgt_token_num = 0;
char** src_tokens = BasicUtil::TokenizeString(src, src_token_num, MAX_TOKEN_NUM, MAX_TOKEN_LEN);
char** tgt_tokens = BasicUtil::TokenizeString(tgt, tgt_token_num, MAX_TOKEN_NUM, MAX_TOKEN_LEN);
int * src_fea = new int[MAX_TOKEN_LEN * MAX_TOKEN_NUM];
int * src_seg = new int[MAX_TOKEN_NUM];
int * tgt_fea = new int[MAX_TOKEN_LEN * MAX_TOKEN_NUM];
int * tgt_seg = new int[MAX_TOKEN_NUM];
int src_seg_num = textHash.FeatureExtract((const char **)src_tokens, src_token_num, src_seg, src_fea);
int tgt_seg_num = textHash.FeatureExtract((const char **)tgt_tokens, tgt_token_num, tgt_seg, tgt_fea);
int src_feature_num = 0; //src_seg[src_seg_num - 1];
int tgt_feature_num = 0; //tgt_seg[tgt_seg_num - 1];
if(src_seg_num >= 1)
{
src_feature_num = src_seg[src_seg_num - 1];
}
if(tgt_seg_num >= 1)
{
tgt_feature_num = tgt_seg[tgt_seg_num - 1];
}
if(isFilter == 1)
{
if(src_feature_num <= 0 || tgt_feature_num <= 0) continue;
}
src_batch.push_back(tuple<int*, int>(src_fea, src_feature_num));
tgt_batch.push_back(tuple<int*, int>(tgt_fea, tgt_feature_num));
lineIdx += 1;
if(lineIdx == debugLines) break;
}
}
开发者ID:yelongshen,项目名称:biglearn-dssm,代码行数:53,代码来源:main.cpp
示例11: CreateFromString
void Word::CreateFromString(const std::string &inString, Vocab &vocab)
{
if (inString.substr(0, 1) == "[" && inString.substr(inString.size() - 1, 1) == "]")
{ // non-term
m_isNonTerminal = true;
}
else
{
m_isNonTerminal = false;
}
m_factors.resize(1);
m_factors[0] = vocab.AddVocabId(inString);
}
开发者ID:obo,项目名称:Moses-Extensions-at-UFAL,代码行数:14,代码来源:Word.cpp
示例12: convert_trees_to_indexed_minibatches
treebank_minibatch_dataset convert_trees_to_indexed_minibatches(
const Vocab& word_vocab,
const std::vector<AnnotatedParseTree::shared_tree>& trees,
int minibatch_size) {
treebank_minibatch_dataset dataset;
auto to_index_pair = [&word_vocab](std::pair<std::vector<std::string>, uint>&& pair, bool&& is_root) {
return std::tuple<std::vector<uint>, uint, bool>(
word_vocab.encode(pair.first),
pair.second,
is_root);
};
if (dataset.size() == 0)
dataset.emplace_back(0);
for (auto& tree : trees) {
// create new minibatch
if (dataset[dataset.size()-1].size() == minibatch_size) {
dataset.emplace_back(0);
dataset.back().reserve(minibatch_size);
}
// add root
dataset[dataset.size()-1].emplace_back(
to_index_pair(
tree->to_labeled_pair(),
true
)
);
// add children:
for (auto& child : tree->general_children) {
if (dataset[dataset.size()-1].size() == minibatch_size) {
dataset.emplace_back(0);
dataset.back().reserve(minibatch_size);
}
dataset[dataset.size()-1].emplace_back(
to_index_pair(
child->to_labeled_pair(),
false
)
);
}
}
return dataset;
}
开发者ID:codeaudit,项目名称:Dali,代码行数:48,代码来源:SST.cpp
示例13: add_example
void add_example(
const Vocab& vocab,
const vector<string>& example_orig,
size_t& example_idx) {
int len = std::min(example_orig.size(), (size_t)FLAGS_max_sentence_length);
vector<string> example(example_orig.begin(), example_orig.begin() + len);
auto description_length = example.size();
this->data.w(0, example_idx) = vocab.word2index.at(START);
auto encoded = vocab.encode(example, true);
this->mask.w(0, example_idx) = 0.0;
for (size_t j = 0; j < encoded.size(); j++) {
this->data.w(j + 1, example_idx) = encoded[j];
this->mask.w(j + 1, example_idx) = (R)1.0;
}
this->code_lengths[example_idx] = description_length + 1;
this->total_codes += description_length + 1;
}
开发者ID:byzhang,项目名称:dali-examples,代码行数:18,代码来源:language_model.cpp
示例14: AddLine
void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab)
{
//cerr << line << endl;
NgramCounter ngramCounts;
list<WordVec> openNgrams;
size_t length = 0;
//tokenize & count
for (util::TokenIter<util::SingleCharacter, true> j(line, util::SingleCharacter(' ')); j; ++j) {
const Vocab::Entry* nextTok = &(vocab.FindOrAdd(*j));
++length;
openNgrams.push_front(WordVec());
for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end(); ++k) {
k->push_back(nextTok);
++ngramCounts[*k];
}
if (openNgrams.size() >= kBleuNgramOrder) openNgrams.pop_back();
}
//merge into overall ngram map
for (NgramCounter::const_iterator ni = ngramCounts.begin();
ni != ngramCounts.end(); ++ni) {
size_t count = ni->second;
//cerr << *ni << " " << count << endl;
if (ngramCounts_.size() <= sentenceId) ngramCounts_.resize(sentenceId+1);
NgramMap::iterator totalsIter = ngramCounts_[sentenceId].find(ni->first);
if (totalsIter == ngramCounts_[sentenceId].end()) {
ngramCounts_[sentenceId][ni->first] = pair<size_t,size_t>(count,count);
} else {
ngramCounts_[sentenceId][ni->first].first = max(count, ngramCounts_[sentenceId][ni->first].first); //clip
ngramCounts_[sentenceId][ni->first].second += count; //no clip
}
}
//length
if (lengths_.size() <= sentenceId) lengths_.resize(sentenceId+1);
//TODO - length strategy - this is MIN
if (!lengths_[sentenceId]) {
lengths_[sentenceId] = length;
} else {
lengths_[sentenceId] = min(length,lengths_[sentenceId]);
}
//cerr << endl;
}
开发者ID:mitramah,项目名称:mosesdecoder,代码行数:43,代码来源:ForestRescore.cpp
示例15: forwardish
// Returns a vector of LiveGuessResults
// warning: words is mutated temporarily
std::auto_ptr< std::vector<LiveGuessResult> >
forwardish(std::vector<const char *> & words, // the current words can be empty
const double currentProb, // log prob
const int size, // how many to grab
const int depthLeft,
const NgramLM & _lm,
const int _order,
const Vocab & vocab ) {
// Index contains the last ngram word
//Logger::Log(0, "Forwardish [%d] [%d]\n", depthLeft, index);
VocabIndex vwords[ _order ];
//int n = (words.size() < (_order - 1))?words.size():_order;
//for (int i = words.size() - _order - 1; i < words.size(); i++) {
// if ( i >= 0) {
// Logger::Log(0,"Word: %d %s\n",i,words[i]);
// }
//}
//vwords[0] to _order -1 are filled in
// if it's small EndOfSentence starts it..
for (int i = 1; i < _order; i++) {
int j = words.size() - _order + i;
if (j < 0) {
vwords[i - 1] = Vocab::Invalid; // probably should be end of sentence
} else {
vwords[i - 1] = vocab.Find( words[ j ] );
}
}
vector<VocabProb> heap(0);
mkHeap(heap);
const ProbVector & probabilities = _lm.probs( _order ) ;// _order - 2 );
const CountVector & counts = _lm.counts( _order );
int count = 0;
//Logger::Log(0, "Find probabilities %d\n",vocab.size());
for (int j = 0; j < vocab.size(); j++) {
VocabIndex vWordI = j;//vocab[j];
vwords[ _order - 1 ] = j;
NgramIndex newIndex = _lm.model()._Find( vwords, _order );
if (newIndex == -1) { // not legit :(
continue;
}
Prob probRaw = probabilities[ newIndex ];
if (probRaw == 0.0) {
continue;
}
Prob prob = -1 * log( probRaw ); //biggest is smallest
//Prob prob = (probRaw == 0.0)?10000:(-1 * log( probRaw )); //biggest is smallest
//Prob probRaw = (counts[newIndex]==0)?1.0:counts[newIndex]/vocab.size()
//Prob prob = -1 * log(probRaw);
//Prob prob = -1 * counts[newIndex];
//Logger::Log(0, "Prob %e\n",prob);
const VocabProb v( prob,j, newIndex);
if ( count < size ) {
heap.push_back( v );
count++;
if (count == size) {
mkHeap( heap );
}
// this is irritating, basically it means the highest rank stuff
// will be in the list and we only kick out the lowest ranked stuff
// (which will be the GREATEST of what is already there)
//
} else if ( heap.front().prob > prob ) {
// this is dumb
// remove the least element
popHeap( heap );
pushHeap( heap, v );
// should we update?
}
}
sortHeap( heap );
std::vector<LiveGuessResult> * resVector = new std::vector<LiveGuessResult>();
for( int j = 0; j < heap.size(); j++) {
VocabProb v = heap[ j ];
Prob prob = v.prob;
prob += currentProb;
const char * word = vocab[ v.index ];
vector<const char *> ourWords(words);
ourWords.push_back( word ); // add
char * str = joinVectorOfCStrings( ourWords ); // Remember to deallocate later :(
//.........这里部分代码省略.........
开发者ID:abramhindle,项目名称:MIT-Language-Modeling-Toolkit,代码行数:101,代码来源:LiveGuess.cpp
示例16: DebugPrint
void Word::DebugPrint(ostream &out, const Vocab &vocab) const
{
const string &str = vocab.GetString(m_vocabId);
out << str;
}
开发者ID:Avmb,项目名称:mosesdecoder,代码行数:5,代码来源:Word.cpp
示例17: main
int main(int argc, char* argv[])
{
Vocab vocab;
Ngram lm(vocab, 2);
vector<string> splitLine;
map<string, set<string> > mapping;
map<string, set<string> >::iterator map_iter;
vector<string> BestLine;
vector<string>::iterator Best_iter;
FILE * TextFile;
FILE * MapFile;
char ch;
char tmpstr[BUFSIZE];
for(int i=0 ; i<argc ; i++)
{
if(string("-text")==argv[i])
{
TextFile = fopen(argv[i+1],"r");
}
if(string("-map")==argv[i])
{
MapFile = fopen(argv[i+1], "r");
}
if(string("-lm")==argv[i])
{
File lmFile(argv[i+1],"r");
lm.read(lmFile);
lmFile.close();
}
}
//read MapFile into map<string, set<string> > mapping
while(fgets(tmpstr,4096,MapFile))
{
char *tok=strtok(tmpstr,"\n");
string Key,StringTok;
set<string> ZhuYin;
while(tok!=NULL)
{
StringTok=string(tok);
Key = StringTok.substr(0,2);//read the first ZhuYin or ChuIn to key
int pos;
string tmpLine=StringTok.substr(3);
while((pos = tmpLine.find(" "))!=-1)
{
tmpLine.erase(pos,1);
}
assert(tmpLine.size()%2==0);
for(int i=0 ; i<tmpLine.size() ; i+=2)
{
string buf = tmpLine.substr(i, 2);
ZhuYin.insert(buf);
}
mapping[Key]=ZhuYin;
tok = strtok(NULL,"\n");
ZhuYin.clear();
}
}
//read TextFile into vector<string> splitLine
int line =0;
while(fgets(tmpstr,4096,TextFile))
{
line++;
char *tok=strtok(tmpstr,"\n");//Splite into one line
string tmpLine = string(tok);
while(tok!=NULL)
{
int pos;
while((pos = tmpLine.find(" "))!=-1)
{
tmpLine.erase(pos,1);
}
assert(tmpLine.size()%2==0);
for(int i=0 ; i<tmpLine.size() ; i+=2)
{
string buf = tmpLine.substr(i, 2);
splitLine.push_back(buf);//push one word to splitLine
}
tok = strtok(NULL,"\n");
}
splitLine.push_back("\n");
}
int count = 1;
//Viterbi
for(int i=0;i<splitLine.size();i++)
{
set<string> TmpSet;
if(i==0)
{
//cout << count << endl;
BestLine.push_back("<s>");
BestLine.push_back(" ");
}
if(splitLine[i]=="\n")
{
count++;
//cout << endl;
//cout << count << endl;
BestLine.push_back("</s>");
//.........这里部分代码省略.........
开发者ID:Plabo1028,项目名称:DSP_NTU_Hw,代码行数:101,代码来源:mydisambig.cpp
示例18: ModelTrain
void ModelTrain()
{
Vocab vocab;
vocab.LoadVocab("l3g.txt");
cout << "vocab Size " << vocab.VocabSize << endl;
vector < tuple <int *, int > > src_batch, tgt_batch;
extractBinaryfromStream("data//train_data_40k.tsv", vocab, src_batch, tgt_batch, 1, 0);
int sampleSize = src_batch.size();
cout << "train sample size" << sampleSize << endl;
int iteration = 30;
int miniBatchSize = 1024;
int featureDim = vocab.VocabSize;
int batchNum = sampleSize / miniBatchSize;
int nTrial = 4;
vector <int> shuff(sampleSize);
RunnerBehavior rb;
rb.RunMode = RUNMODE_TRAIN;
rb.Device = DEVICE_GPU;
cout<<"init cuda computation ...."<<endl;
rb.ComputeLib = new CudaOperationManager(true, true);
cout<<"init cuda computation done"<<endl;
int hiddenDim1 = 128;
int hiddenDim2 = 128;
SparseIndexMatrixStat srcMiniBatchInfo;
srcMiniBatchInfo.MAX_ROW_SIZE = miniBatchSize;
srcMiniBatchInfo.MAX_COL_SIZE = featureDim;
srcMiniBatchInfo.TOTAL_BATCH_NUM = batchNum;
srcMiniBatchInfo.TOTAL_SAMPLE_NUM = sampleSize;
srcMiniBatchInfo.MAX_ELEMENT_SIZE = miniBatchSize * 256;
SparseIndexMatrixStat tgtMiniBatchInfo;
tgtMiniBatchInfo.MAX_ROW_SIZE = miniBatchSize;
tgtMiniBatchInfo.MAX_COL_SIZE = featureDim;
tgtMiniBatchInfo.TOTAL_BATCH_NUM = batchNum;
tgtMiniBatchInfo.TOTAL_SAMPLE_NUM = sampleSize;
tgtMiniBatchInfo.MAX_ELEMENT_SIZE = miniBatchSize * 256;
DenseMatrixStat OutputLayer1Info;
OutputLayer1Info.MAX_ROW_SIZE = miniBatchSize;
OutputLayer1Info.MAX_COL_SIZE = hiddenDim1;
OutputLayer1Info.TOTAL_BATCH_NUM = batchNum;
OutputLayer1Info.TOTAL_SAMPLE_NUM = sampleSize;
DenseMatrixStat OutputLayer2Info;
OutputLayer2Info.MAX_ROW_SIZE = miniBatchSize;
OutputLayer2Info.MAX_COL_SIZE = hiddenDim2;
OutputLayer2Info.TOTAL_BATCH_NUM = batchNum;
OutputLayer2Info.TOTAL_SAMPLE_NUM = sampleSize;
FullyConnectedLayer srcLayer1(featureDim, hiddenDim1, &rb);
FullyConnectedLayer srcLayer2(hiddenDim1, hiddenDim2, &rb);
FullyConnectedLayer tgtLayer1(featureDim, hiddenDim1, &rb);
FullyConnectedLayer tgtLayer2(hiddenDim1, hiddenDim2, &rb);
DenseMatrixStat OutputSimInfo;
OutputSimInfo.MAX_ROW_SIZE = miniBatchSize;
OutputSimInfo.MAX_COL_SIZE = 1 + nTrial;
OutputSimInfo.TOTAL_BATCH_NUM = batchNum;
OutputSimInfo.TOTAL_SAMPLE_NUM = sampleSize;
SparseIndexMatrix srcBatch(&srcMiniBatchInfo, rb.Device);
HiddenDenseMatrix srcLayer1Data(&OutputLayer1Info, rb.Device);
HiddenDenseMatrix srcLayer2Data(&OutputLayer2Info, rb.Device);
SparseIndexMatrix tgtBatch(&tgtMiniBatchInfo, rb.Device);
HiddenDenseMatrix tgtLayer1Data(&OutputLayer1Info, rb.Device);
HiddenDenseMatrix tgtLayer2Data(&OutputLayer2Info, rb.Device);
BiMatchData biMatchData(miniBatchSize, nTrial, rb.Device);
SimilarityRunner similarityRunner(10, &rb);
HiddenDenseMatrix simOutput(&OutputSimInfo, rb.Device);
HiddenDenseMatrix probOutput(&OutputSimInfo, rb.Device);
probOutput.Deriv->Data->Zero();
//iteration = 1;
cout<<"start training iteration"<<endl;
double train_time = 0;
double io_time = 0;
struct timeval train_start, train_end;
struct timeval io_start, io_end;
gettimeofday(&train_start, 0);
for (int iter = 0; iter<iteration; iter++)
{
for (int i = 0; i<sampleSize; i++) shuff[i] = i;
//.........这里部分代码省略.........
开发者ID:yelongshen,项目名称:biglearn-dssm,代码行数:101,代码来源:main.cpp
示例19: ModelPredict
void ModelPredict()
{
Vocab vocab;
vocab.LoadVocab("l3g.txt");
cout << "vocab Size " << vocab.VocabSize << endl;
vector < tuple <int *, int > > src_batch, tgt_batch;
extractBinaryfromStream("data//test_data_clean.tsv", vocab, src_batch, tgt_batch, 0, 0);
int sampleSize = src_batch.size();
cout << "test sample size" << sampleSize << endl;
int miniBatchSize = 1024;
int featureDim = vocab.VocabSize;
int batchNum = (sampleSize - 1) / miniBatchSize + 1;
RunnerBehavior rb;
rb.RunMode = RUNMODE_PREDICT;
rb.Device = DEVICE_GPU;
rb.ComputeLib = new CudaOperationManager(true, true);
int hiddenDim1 = 128;
int hiddenDim2 = 128;
SparseIndexMatrixStat srcMiniBatchInfo;
srcMiniBatchInfo.MAX_ROW_SIZE = miniBatchSize;
srcMiniBatchInfo.MAX_COL_SIZE = featureDim;
srcMiniBatchInfo.TOTAL_BATCH_NUM = batchNum;
srcMiniBatchInfo.TOTAL_SAMPLE_NUM = sampleSize;
srcMiniBatchInfo.MAX_ELEMENT_SIZE = miniBatchSize * 256;
SparseIndexMatrixStat tgtMiniBatchInfo;
tgtMiniBatchInfo.MAX_ROW_SIZE = miniBatchSize;
tgtMiniBatchInfo.MAX_COL_SIZE = featureDim;
tgtMiniBatchInfo.TOTAL_BATCH_NUM = batchNum;
tgtMiniBatchInfo.TOTAL_SAMPLE_NUM = sampleSize;
tgtMiniBatchInfo.MAX_ELEMENT_SIZE = miniBatchSize * 256;
DenseMatrixStat OutputLayer1Info;
OutputLayer1Info.MAX_ROW_SIZE = miniBatchSize;
OutputLayer1Info.MAX_COL_SIZE = hiddenDim1;
OutputLayer1Info.TOTAL_BATCH_NUM = batchNum;
OutputLayer1Info.TOTAL_SAMPLE_NUM = sampleSize;
DenseMatrixStat OutputLayer2Info;
OutputLayer2Info.MAX_ROW_SIZE = miniBatchSize;
OutputLayer2Info.MAX_COL_SIZE = hiddenDim2;
OutputLayer2Info.TOTAL_BATCH_NUM = batchNum;
OutputLayer2Info.TOTAL_SAMPLE_NUM = sampleSize;
ifstream modelReader;
modelReader.open("model//dssm.v2.model", ofstream::binary);
FullyConnectedLayer srcLayer1(modelReader, &rb);
FullyConnectedLayer srcLayer2(modelReader, &rb);
FullyConnectedLayer tgtLayer1(modelReader, &rb);
FullyConnectedLayer tgtLayer2(modelReader, &rb);
modelReader.close();
DenseMatrixStat OutputSimInfo;
OutputSimInfo.MAX_ROW_SIZE = miniBatchSize;
OutputSimInfo.MAX_COL_SIZE = 1;
OutputSimInfo.TOTAL_BATCH_NUM = batchNum;
OutputSimInfo.TOTAL_SAMPLE_NUM = sampleSize;
SparseIndexMatrix srcBatch(&srcMiniBatchInfo, rb.Device);
HiddenDenseMatrix srcLayer1Data(&OutputLayer1Info, rb.Device);
HiddenDenseMatrix srcLayer2Data(&OutputLayer2Info, rb.Device);
SparseIndexMatrix tgtBatch(&tgtMiniBatchInfo, rb.Device);
HiddenDenseMatrix tgtLayer1Data(&OutputLayer1Info, rb.Device);
HiddenDenseMatrix tgtLayer2Data(&OutputLayer2Info, rb.Device);
BiMatchData biMatchData(miniBatchSize, 0, rb.Device);
SimilarityRunner similarityRunner(10, &rb);
HiddenDenseMatrix simOutput(&OutputSimInfo, rb.Device);
HiddenDenseMatrix probOutput(&OutputSimInfo, rb.Device);
ofstream outfile;
outfile.open("data//test_data.v2.result", ofstream::out);
int smpIdx = 0;
for (int b = 0; b<batchNum; b++)
{
srcBatch.Refresh();
tgtBatch.Refresh();
while (smpIdx < sampleSize && srcBatch.RowSize < miniBatchSize && tgtBatch.RowSize < miniBatchSize)
{
srcBatch.PushSample(get<0>(src_batch[smpIdx]), get<1>(src_batch[smpIdx]));
tgtBatch.PushSample(get<0>(tgt_batch[smpIdx]), get<1>(tgt_batch[smpIdx]));
smpIdx++;
}
srcLayer1.Forward(&srcBatch, srcLayer1Data.Output);
srcLayer2.Forward(srcLayer1Data.Output, srcLayer2Data.Output);
tgtLayer1.Forward(&tgtBatch, tgtLayer1Data.Output);
tgtLayer2.Forward(tgtLayer1Data.Output, tgtLayer2Data.Output);
//.........这里部分代码省略.........
开发者ID:yelongshen,项目名称:biglearn-dssm,代码行数:101,代码来源:main.cpp
示例20:
void
LinkableValueNode::set_children_vocab(const Vocab &newvocab)
{
children_vocab.assign(newvocab.begin(),newvocab.end());
}
开发者ID:sergiorm,项目名称:synfig_jcome,代码行数:5,代码来源:valuenode.cpp
注:本文中的Vocab类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论