• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

C++ Vocab类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了C++中Vocab的典型用法代码示例。如果您正苦于以下问题:C++ Vocab类的具体用法?C++ Vocab怎么用?C++ Vocab使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Vocab类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: PrintSentence

void ParallelCorpus::PrintSentence(
    const Sentence& sentence, const Vocab& vocab, std::ostream& out) const {
  if (sentence.size() > 0) {
    out << vocab.GetWord(sentence.at(0));
  }
  for (int i = 1; i < sentence.size(); ++i) {
    out << " " << vocab.GetWord(sentence.at(i));
  }
}
开发者ID:jrs026,项目名称:SentenceAlignment,代码行数:9,代码来源:parallel_corpus.cpp


示例2: Print

void PackedTrie::Print(const Vocab& source_vocab, const Vocab& target_vocab,
    std::ostream& out) const {
  for (int s = 0; s < source_count_; ++s) {
    for (int i = offsets_[s]; i < offsets_[s + 1]; ++i) {
      out << source_vocab.GetWord(s) << "\t"
          << target_vocab.GetWord(target_words_[i]) << "\t"
          << exp(data_[i]) << std::endl;
    }
  }
}
开发者ID:jrs026,项目名称:SentenceAlignment,代码行数:10,代码来源:packed_trie.cpp


示例3: getBigramProb

//Get P(W2 | W1) -- bigram
double getBigramProb(const char *w1, const char *w2, Vocab &voc, Ngram &lm){
	VocabIndex wid1 = voc.getIndex(w1);
	VocabIndex wid2 = voc.getIndex(w2);
	if(wid1 == Vocab_None)  //OOV
		wid1 = voc.getIndex(Vocab_Unknown);
	if(wid2 == Vocab_None){  //OOV
		wid2 = voc.getIndex(Vocab_Unknown);
		return -20;
	}
	VocabIndex context[] = { wid1, Vocab_None };
	return lm.wordProb( wid2, context);
}
开发者ID:hbtsai,项目名称:pdp_hw4_problems,代码行数:13,代码来源:disambig_my.cpp


示例4: CreateFromString

void Word::CreateFromString(const std::string &inString, Vocab &vocab)
{
  if (inString.substr(0, 1) == "[" && inString.substr(inString.size() - 1, 1) == "]") {
    // non-term
    m_isNonTerminal = true;
    string str = inString.substr(1, inString.size() - 2);
    m_vocabId = vocab.AddVocabId(str);
  } else {
    m_isNonTerminal = false;
    m_vocabId = vocab.AddVocabId(inString);
  }

}
开发者ID:Avmb,项目名称:mosesdecoder,代码行数:13,代码来源:Word.cpp


示例5: CreateVocabMap

void CorpusReader::CreateVocabMap(const Vocab& corpus_vocab,
                                  const vector< vector<string> >& filter_vocab,
                                  vector<IntIntMap>* lookup) {
  assert(corpus_vocab.has_language());
  int lang = corpus_vocab.language();
  if (lang >= (int)lookup->size()) lookup->resize(lang + 1);
  if (filter_vocab[lang].size() > 0) {
    cout << "Adding vocab for language " << lang << "(" <<
      corpus_vocab.terms().size() << ")" << endl;
    CreateFilteredMap(corpus_vocab, filter_vocab[lang], &(*lookup)[lang]);
  } else {
    cout << "Skipping language " << lang << endl;
  }
}
开发者ID:NetBUG,项目名称:topicmod,代码行数:14,代码来源:corpus_reader.cpp


示例6: CreateUnfilteredMap

void CorpusReader::CreateUnfilteredMap(const Vocab& proto_voc,
                                       StringIntMap* lookup,
                                       IntIntMap* mapping) {
  for (int ii = 0; ii < proto_voc.terms_size(); ++ii) {
    const lib_corpora_proto::Vocab_Entry& word = proto_voc.terms(ii);
    string term = word.original();
    if (lookup->find(term) == lookup->end()) {
      int new_id = lookup->size();
      (*lookup)[term] = new_id;
      // cout << "Adding " << term << " with id " << new_id << endl;
    }
    (*mapping)[word.id()] = (*lookup)[term];
    // cout << "---------------" << endl;
  }
}
开发者ID:NetBUG,项目名称:topicmod,代码行数:15,代码来源:corpus_reader.cpp


示例7: ConvertToMoses

void Word::ConvertToMoses(
    const std::vector<Moses::FactorType> &outputFactorsVec, 
    const Vocab &vocab,
    Moses::Word &overwrite) const {
  Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance();
  overwrite = Moses::Word(m_isNonTerminal);

  // TODO: this conversion should have been done at load time.  
  util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');

  for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
    UTIL_THROW_IF(!tok, util::Exception, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
    overwrite.SetFactor(*t, factorColl.AddFactor(*tok));
  }
  UTIL_THROW_IF(tok, util::Exception, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
}
开发者ID:Avmb,项目名称:mosesdecoder,代码行数:16,代码来源:Word.cpp


示例8: LoadVocabulary

void SparseHieroReorderingFeature::LoadVocabulary(const std::string& filename, Vocab& vocab)
{
  if (filename.empty()) return;
  ifstream in(filename.c_str());
  UTIL_THROW_IF(!in, util::Exception, "Unable to open vocab file: " << filename);
  string line;
  while(getline(in,line)) {
    vocab.insert(FactorCollection::Instance().AddFactor(line)); 
  }
  in.close();
}
开发者ID:tangym,项目名称:mosesdecoder,代码行数:11,代码来源:SparseHieroReorderingFeature.cpp


示例9: CreateFilteredMap

void CorpusReader::CreateFilteredMap(const Vocab& corpus_voc,
                                     const vector<string>& filter_voc,
                                     IntIntMap* id_lookup) {
  map<string, int> new_id;

  // RHS will be new vocab
  for (int ii = 0; ii < (int)filter_voc.size(); ++ii) {
    new_id[filter_voc[ii]] = ii;
  }

  // LHS will be old vocab
  for (int ii = 0; ii < corpus_voc.terms_size(); ++ii) {
    const lib_corpora_proto::Vocab_Entry& word = corpus_voc.terms(ii);
    string term = word.original();
    if (new_id.find(term) != new_id.end()) {
      (*id_lookup)[word.id()] = new_id[term];
      // cout << word.id() << "->" << new_id[term] << "(term)" << endl;
    }
  }
}
开发者ID:NetBUG,项目名称:topicmod,代码行数:20,代码来源:corpus_reader.cpp


示例10: extractBinaryfromStream

void extractBinaryfromStream(const char * inputStream, Vocab & textHash,
		vector < tuple <int *, int > > & src_batch, vector < tuple <int *, int > > & tgt_batch, int isFilter, int debugLines)
{
	ifstream infile;
	infile.open(inputStream, ifstream::in);
	string line;
	int lineIdx = 0;
	while (getline(infile, line))
	{
		stringstream linestream(line);
		string src, tgt;
		getline(linestream, src, '\t');
		getline(linestream, tgt, '\t');

		int src_token_num = 0;
		int tgt_token_num = 0;
		char** src_tokens = BasicUtil::TokenizeString(src, src_token_num, MAX_TOKEN_NUM, MAX_TOKEN_LEN);
		char** tgt_tokens = BasicUtil::TokenizeString(tgt, tgt_token_num, MAX_TOKEN_NUM, MAX_TOKEN_LEN);

		int * src_fea = new int[MAX_TOKEN_LEN * MAX_TOKEN_NUM];
		int * src_seg = new int[MAX_TOKEN_NUM];

		int * tgt_fea = new int[MAX_TOKEN_LEN * MAX_TOKEN_NUM];
		int * tgt_seg = new int[MAX_TOKEN_NUM];

		int src_seg_num = textHash.FeatureExtract((const char **)src_tokens, src_token_num, src_seg, src_fea);
		int tgt_seg_num = textHash.FeatureExtract((const char **)tgt_tokens, tgt_token_num, tgt_seg, tgt_fea);
		
		int src_feature_num = 0; //src_seg[src_seg_num - 1];
		int tgt_feature_num = 0; //tgt_seg[tgt_seg_num - 1];
		
		if(src_seg_num >= 1)
		{
		    src_feature_num = src_seg[src_seg_num - 1];
		}
		
		if(tgt_seg_num >= 1)
		{
		    tgt_feature_num = tgt_seg[tgt_seg_num - 1];
		}
		
		if(isFilter == 1)
		{
		    if(src_feature_num <= 0 || tgt_feature_num <= 0) continue;
		}
		
		src_batch.push_back(tuple<int*, int>(src_fea, src_feature_num));
		tgt_batch.push_back(tuple<int*, int>(tgt_fea, tgt_feature_num));

		lineIdx += 1;
		if(lineIdx == debugLines) break;
	}
}
开发者ID:yelongshen,项目名称:biglearn-dssm,代码行数:53,代码来源:main.cpp


示例11: CreateFromString

void Word::CreateFromString(const std::string &inString, Vocab &vocab)
{
	if (inString.substr(0, 1) == "[" && inString.substr(inString.size() - 1, 1) == "]")
	{ // non-term
		m_isNonTerminal = true;
	}
	else
	{
		m_isNonTerminal = false;
	}

	m_factors.resize(1);
	m_factors[0] = vocab.AddVocabId(inString);	
}
开发者ID:obo,项目名称:Moses-Extensions-at-UFAL,代码行数:14,代码来源:Word.cpp


示例12: convert_trees_to_indexed_minibatches

    treebank_minibatch_dataset convert_trees_to_indexed_minibatches(
        const Vocab& word_vocab,
        const std::vector<AnnotatedParseTree::shared_tree>& trees,
        int minibatch_size) {
        treebank_minibatch_dataset dataset;

        auto to_index_pair = [&word_vocab](std::pair<std::vector<std::string>, uint>&& pair, bool&& is_root) {
            return std::tuple<std::vector<uint>, uint, bool>(
                word_vocab.encode(pair.first),
                pair.second,
                is_root);
        };

        if (dataset.size() == 0)
            dataset.emplace_back(0);

        for (auto& tree : trees) {

            // create new minibatch
            if (dataset[dataset.size()-1].size() == minibatch_size) {
                dataset.emplace_back(0);
                dataset.back().reserve(minibatch_size);
            }

            // add root
            dataset[dataset.size()-1].emplace_back(
                to_index_pair(
                    tree->to_labeled_pair(),
                    true
                )
            );

            // add children:
            for (auto& child : tree->general_children) {
                if (dataset[dataset.size()-1].size() == minibatch_size) {
                    dataset.emplace_back(0);
                    dataset.back().reserve(minibatch_size);
                }
                dataset[dataset.size()-1].emplace_back(
                    to_index_pair(
                        child->to_labeled_pair(),
                        false
                    )
                );
            }
        }
        return dataset;
    }
开发者ID:codeaudit,项目名称:Dali,代码行数:48,代码来源:SST.cpp


示例13: add_example

    void add_example(
            const Vocab& vocab,
            const vector<string>& example_orig,
            size_t& example_idx) {
        int len = std::min(example_orig.size(), (size_t)FLAGS_max_sentence_length);
        vector<string> example(example_orig.begin(), example_orig.begin() + len);

        auto description_length = example.size();
        this->data.w(0, example_idx) = vocab.word2index.at(START);
        auto encoded = vocab.encode(example, true);
        this->mask.w(0, example_idx) = 0.0;
        for (size_t j = 0; j < encoded.size(); j++) {
            this->data.w(j + 1, example_idx) = encoded[j];
            this->mask.w(j + 1, example_idx) = (R)1.0;
        }
        this->code_lengths[example_idx] = description_length + 1;
        this->total_codes += description_length + 1;
    }
开发者ID:byzhang,项目名称:dali-examples,代码行数:18,代码来源:language_model.cpp


示例14: AddLine

void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vocab)
{
    //cerr << line << endl;
    NgramCounter ngramCounts;
    list<WordVec> openNgrams;
    size_t length = 0;
    //tokenize & count
    for (util::TokenIter<util::SingleCharacter, true> j(line, util::SingleCharacter(' ')); j; ++j) {
        const Vocab::Entry* nextTok = &(vocab.FindOrAdd(*j));
        ++length;
        openNgrams.push_front(WordVec());
        for (list<WordVec>::iterator k = openNgrams.begin(); k != openNgrams.end();  ++k) {
            k->push_back(nextTok);
            ++ngramCounts[*k];
        }
        if (openNgrams.size() >=  kBleuNgramOrder) openNgrams.pop_back();
    }

    //merge into overall ngram map
    for (NgramCounter::const_iterator ni = ngramCounts.begin();
            ni != ngramCounts.end(); ++ni) {
        size_t count = ni->second;
        //cerr << *ni << " " << count <<  endl;
        if (ngramCounts_.size() <= sentenceId) ngramCounts_.resize(sentenceId+1);
        NgramMap::iterator totalsIter = ngramCounts_[sentenceId].find(ni->first);
        if (totalsIter == ngramCounts_[sentenceId].end()) {
            ngramCounts_[sentenceId][ni->first] = pair<size_t,size_t>(count,count);
        } else {
            ngramCounts_[sentenceId][ni->first].first = max(count, ngramCounts_[sentenceId][ni->first].first); //clip
            ngramCounts_[sentenceId][ni->first].second += count; //no clip
        }
    }
    //length
    if (lengths_.size() <= sentenceId) lengths_.resize(sentenceId+1);
    //TODO - length strategy - this is MIN
    if (!lengths_[sentenceId]) {
        lengths_[sentenceId] = length;
    } else {
        lengths_[sentenceId] = min(length,lengths_[sentenceId]);
    }
    //cerr << endl;

}
开发者ID:mitramah,项目名称:mosesdecoder,代码行数:43,代码来源:ForestRescore.cpp


示例15: forwardish

// Returns a vector of LiveGuessResults
// warning: words is mutated temporarily
std::auto_ptr< std::vector<LiveGuessResult> > 
forwardish(std::vector<const char *> & words, // the current words can be empty
           const double currentProb, // log prob
           const int size, // how many to grab
           const int depthLeft,
           const NgramLM & _lm, 
           const int _order,  
           const Vocab & vocab ) {
  
  // Index contains the last ngram word 



  //Logger::Log(0, "Forwardish [%d] [%d]\n", depthLeft, index);

  VocabIndex vwords[ _order ];
  //int n = (words.size() < (_order - 1))?words.size():_order;

  //for (int i = words.size() - _order - 1; i < words.size(); i++) {
  //  if ( i >= 0) {
  //    Logger::Log(0,"Word: %d %s\n",i,words[i]);
  //  }
  //}

  //vwords[0] to _order -1 are filled in
  // if it's small EndOfSentence starts it..
  for (int i = 1; i < _order; i++) {
    int j = words.size() - _order + i;
    if (j < 0) {
      vwords[i - 1] = Vocab::Invalid; // probably should be end of sentence
    } else {
      vwords[i - 1] = vocab.Find( words[ j ] );
    }
  }


  vector<VocabProb> heap(0);

  mkHeap(heap);

  const ProbVector & probabilities = _lm.probs(  _order ) ;// _order - 2  );
  const CountVector & counts = _lm.counts( _order );
  
  int count = 0;
  //Logger::Log(0, "Find probabilities %d\n",vocab.size());

  for (int j = 0; j < vocab.size(); j++) {
    VocabIndex vWordI = j;//vocab[j];
    vwords[ _order - 1 ] = j;
    NgramIndex newIndex = _lm.model()._Find( vwords, _order );
    
    if (newIndex == -1) { // not legit :(
      continue;
    }
    Prob probRaw = probabilities[ newIndex ];
    if (probRaw == 0.0) {
      continue;
    }
    Prob prob = -1 * log( probRaw ); //biggest is smallest

    //Prob prob = (probRaw == 0.0)?10000:(-1 * log( probRaw )); //biggest is smallest
    //Prob probRaw = (counts[newIndex]==0)?1.0:counts[newIndex]/vocab.size()
    //Prob prob = -1 * log(probRaw);
    //Prob prob = -1 * counts[newIndex];
    //Logger::Log(0, "Prob %e\n",prob);

    const VocabProb v( prob,j, newIndex);
    if ( count < size ) {
      heap.push_back( v );
      count++;
      if (count == size) {
        mkHeap( heap );
      }
      // this is irritating, basically it means the highest rank stuff
      // will be in the list and we only kick out the lowest ranked stuff
      // (which will be the GREATEST of what is already there)
      // 
    } else if (  heap.front().prob >  prob ) {
      // this is dumb        
      // remove the least element
      popHeap( heap );
      pushHeap( heap, v );
      // should we update?
    }
  }
  sortHeap( heap );

  std::vector<LiveGuessResult> * resVector = new std::vector<LiveGuessResult>();
  
  for( int j = 0; j < heap.size(); j++) {
    VocabProb v = heap[ j ];
    Prob prob = v.prob;
    prob += currentProb;
    const char * word = vocab[ v.index ];
    vector<const char *> ourWords(words);
    ourWords.push_back( word ); // add 
    char * str = joinVectorOfCStrings( ourWords ); // Remember to deallocate later :(
    
//.........这里部分代码省略.........
开发者ID:abramhindle,项目名称:MIT-Language-Modeling-Toolkit,代码行数:101,代码来源:LiveGuess.cpp


示例16: DebugPrint

void Word::DebugPrint(ostream &out, const Vocab &vocab) const
{
 	const string &str = vocab.GetString(m_vocabId);
  out << str;
}
开发者ID:Avmb,项目名称:mosesdecoder,代码行数:5,代码来源:Word.cpp


示例17: main

int main(int argc, char* argv[])
{
	Vocab vocab;
	Ngram lm(vocab, 2); 
	vector<string> splitLine;
	map<string, set<string> > mapping;
	map<string, set<string> >::iterator map_iter;
	vector<string> BestLine;
	vector<string>::iterator Best_iter;
	FILE * TextFile;
	FILE * MapFile;

	char ch;
	char tmpstr[BUFSIZE];

	for(int i=0 ; i<argc ; i++)
	{
		if(string("-text")==argv[i])
		{
			TextFile = fopen(argv[i+1],"r");
		}
		if(string("-map")==argv[i])
		{
			MapFile = fopen(argv[i+1], "r");
		}
		if(string("-lm")==argv[i])
		{
			File lmFile(argv[i+1],"r"); 
			lm.read(lmFile);
			lmFile.close();
		}
	}
	//read MapFile into map<string, set<string> > mapping
	while(fgets(tmpstr,4096,MapFile))
	{
		char *tok=strtok(tmpstr,"\n");
		string Key,StringTok;
		set<string> ZhuYin;
		while(tok!=NULL)
		{
			StringTok=string(tok);
			Key = StringTok.substr(0,2);//read the first ZhuYin or ChuIn to key
			int pos;
			string tmpLine=StringTok.substr(3);
			while((pos = tmpLine.find(" "))!=-1)
			{
				tmpLine.erase(pos,1);
			}
			assert(tmpLine.size()%2==0);
			for(int i=0 ; i<tmpLine.size() ; i+=2)
			{
				string buf = tmpLine.substr(i, 2);
				ZhuYin.insert(buf);
			}
			mapping[Key]=ZhuYin;
			tok = strtok(NULL,"\n");
			ZhuYin.clear();
		}
	}
	//read TextFile into vector<string> splitLine
	int line =0;
	while(fgets(tmpstr,4096,TextFile))
	{
		line++;
		char *tok=strtok(tmpstr,"\n");//Splite into one line
		string tmpLine = string(tok);    	
		while(tok!=NULL)
		{	
			int pos;
			while((pos = tmpLine.find(" "))!=-1)
			{
				tmpLine.erase(pos,1);
			}
			assert(tmpLine.size()%2==0);
			for(int i=0 ; i<tmpLine.size() ; i+=2)
			{
				string buf = tmpLine.substr(i, 2);
				splitLine.push_back(buf);//push one word to splitLine 
			}
			tok = strtok(NULL,"\n");
		}
		splitLine.push_back("\n");	
	}
	int count = 1;
	//Viterbi
	for(int i=0;i<splitLine.size();i++)
	{
		set<string> TmpSet;
		if(i==0)
		{
			//cout << count << endl;
			BestLine.push_back("<s>");
			BestLine.push_back(" ");
		}
		if(splitLine[i]=="\n")
		{
			count++;
			//cout << endl;
			//cout << count << endl;
			BestLine.push_back("</s>");
//.........这里部分代码省略.........
开发者ID:Plabo1028,项目名称:DSP_NTU_Hw,代码行数:101,代码来源:mydisambig.cpp


示例18: ModelTrain

void ModelTrain()
{
	Vocab vocab;
	vocab.LoadVocab("l3g.txt");
	cout << "vocab Size " << vocab.VocabSize << endl;
	vector < tuple <int *, int > > src_batch, tgt_batch;
	extractBinaryfromStream("data//train_data_40k.tsv", vocab, src_batch, tgt_batch, 1, 0);

	int sampleSize = src_batch.size();
	cout << "train sample size" << sampleSize << endl;

	int iteration = 30;
	int miniBatchSize = 1024;
	int featureDim = vocab.VocabSize;
	int batchNum = sampleSize / miniBatchSize;
	int nTrial = 4;

	vector <int> shuff(sampleSize);

	RunnerBehavior rb;
	rb.RunMode = RUNMODE_TRAIN;
	rb.Device = DEVICE_GPU;
	cout<<"init cuda computation ...."<<endl;
	rb.ComputeLib = new CudaOperationManager(true, true);
	
	cout<<"init cuda computation done"<<endl;
	
	int hiddenDim1 = 128;
	int hiddenDim2 = 128;

	SparseIndexMatrixStat srcMiniBatchInfo;
	srcMiniBatchInfo.MAX_ROW_SIZE = miniBatchSize;
	srcMiniBatchInfo.MAX_COL_SIZE = featureDim;
	srcMiniBatchInfo.TOTAL_BATCH_NUM = batchNum;
	srcMiniBatchInfo.TOTAL_SAMPLE_NUM = sampleSize;
	srcMiniBatchInfo.MAX_ELEMENT_SIZE = miniBatchSize * 256;

	SparseIndexMatrixStat tgtMiniBatchInfo;
	tgtMiniBatchInfo.MAX_ROW_SIZE = miniBatchSize;
	tgtMiniBatchInfo.MAX_COL_SIZE = featureDim;
	tgtMiniBatchInfo.TOTAL_BATCH_NUM = batchNum;
	tgtMiniBatchInfo.TOTAL_SAMPLE_NUM = sampleSize;
	tgtMiniBatchInfo.MAX_ELEMENT_SIZE = miniBatchSize * 256;

	DenseMatrixStat OutputLayer1Info;
	OutputLayer1Info.MAX_ROW_SIZE = miniBatchSize;
	OutputLayer1Info.MAX_COL_SIZE = hiddenDim1;
	OutputLayer1Info.TOTAL_BATCH_NUM = batchNum;
	OutputLayer1Info.TOTAL_SAMPLE_NUM = sampleSize;


	DenseMatrixStat OutputLayer2Info;
	OutputLayer2Info.MAX_ROW_SIZE = miniBatchSize;
	OutputLayer2Info.MAX_COL_SIZE = hiddenDim2;
	OutputLayer2Info.TOTAL_BATCH_NUM = batchNum;
	OutputLayer2Info.TOTAL_SAMPLE_NUM = sampleSize;


	FullyConnectedLayer srcLayer1(featureDim, hiddenDim1, &rb);
	FullyConnectedLayer srcLayer2(hiddenDim1, hiddenDim2, &rb);

	FullyConnectedLayer tgtLayer1(featureDim, hiddenDim1, &rb);
	FullyConnectedLayer tgtLayer2(hiddenDim1, hiddenDim2, &rb);

	DenseMatrixStat OutputSimInfo;
	OutputSimInfo.MAX_ROW_SIZE = miniBatchSize;
	OutputSimInfo.MAX_COL_SIZE = 1 + nTrial;
	OutputSimInfo.TOTAL_BATCH_NUM = batchNum;
	OutputSimInfo.TOTAL_SAMPLE_NUM = sampleSize;

	SparseIndexMatrix srcBatch(&srcMiniBatchInfo, rb.Device);	
	HiddenDenseMatrix srcLayer1Data(&OutputLayer1Info, rb.Device);
	HiddenDenseMatrix srcLayer2Data(&OutputLayer2Info, rb.Device);

	SparseIndexMatrix tgtBatch(&tgtMiniBatchInfo, rb.Device);
	HiddenDenseMatrix tgtLayer1Data(&OutputLayer1Info, rb.Device);
	HiddenDenseMatrix tgtLayer2Data(&OutputLayer2Info, rb.Device);

	BiMatchData biMatchData(miniBatchSize, nTrial, rb.Device);

	SimilarityRunner similarityRunner(10, &rb);
	HiddenDenseMatrix simOutput(&OutputSimInfo, rb.Device);
	HiddenDenseMatrix probOutput(&OutputSimInfo, rb.Device);

	probOutput.Deriv->Data->Zero();
	
	//iteration = 1;
	cout<<"start training iteration"<<endl;
	
	double train_time = 0;
	double io_time = 0;
		
	struct timeval train_start, train_end;
	struct timeval io_start, io_end;
	
	gettimeofday(&train_start, 0);
	
	for (int iter = 0; iter<iteration; iter++)
	{
		for (int i = 0; i<sampleSize; i++) shuff[i] = i;
//.........这里部分代码省略.........
开发者ID:yelongshen,项目名称:biglearn-dssm,代码行数:101,代码来源:main.cpp


示例19: ModelPredict

void ModelPredict()
{
	Vocab vocab;
	vocab.LoadVocab("l3g.txt");
	cout << "vocab Size " << vocab.VocabSize << endl;
	vector < tuple <int *, int > > src_batch, tgt_batch;
	extractBinaryfromStream("data//test_data_clean.tsv", vocab, src_batch, tgt_batch, 0, 0);

	int sampleSize = src_batch.size();
	cout << "test sample size" << sampleSize << endl;

	int miniBatchSize = 1024;
	int featureDim = vocab.VocabSize;
	int batchNum = (sampleSize - 1) / miniBatchSize + 1;

	RunnerBehavior rb;
	rb.RunMode = RUNMODE_PREDICT;
	rb.Device = DEVICE_GPU;

	rb.ComputeLib = new CudaOperationManager(true, true);
	int hiddenDim1 = 128;
	int hiddenDim2 = 128;

	SparseIndexMatrixStat srcMiniBatchInfo;
	srcMiniBatchInfo.MAX_ROW_SIZE = miniBatchSize;
	srcMiniBatchInfo.MAX_COL_SIZE = featureDim;
	srcMiniBatchInfo.TOTAL_BATCH_NUM = batchNum;
	srcMiniBatchInfo.TOTAL_SAMPLE_NUM = sampleSize;
	srcMiniBatchInfo.MAX_ELEMENT_SIZE = miniBatchSize * 256;

	SparseIndexMatrixStat tgtMiniBatchInfo;
	tgtMiniBatchInfo.MAX_ROW_SIZE = miniBatchSize;
	tgtMiniBatchInfo.MAX_COL_SIZE = featureDim;
	tgtMiniBatchInfo.TOTAL_BATCH_NUM = batchNum;
	tgtMiniBatchInfo.TOTAL_SAMPLE_NUM = sampleSize;
	tgtMiniBatchInfo.MAX_ELEMENT_SIZE = miniBatchSize * 256;

	DenseMatrixStat OutputLayer1Info;
	OutputLayer1Info.MAX_ROW_SIZE = miniBatchSize;
	OutputLayer1Info.MAX_COL_SIZE = hiddenDim1;
	OutputLayer1Info.TOTAL_BATCH_NUM = batchNum;
	OutputLayer1Info.TOTAL_SAMPLE_NUM = sampleSize;

	DenseMatrixStat OutputLayer2Info;
	OutputLayer2Info.MAX_ROW_SIZE = miniBatchSize;
	OutputLayer2Info.MAX_COL_SIZE = hiddenDim2;
	OutputLayer2Info.TOTAL_BATCH_NUM = batchNum;
	OutputLayer2Info.TOTAL_SAMPLE_NUM = sampleSize;

	ifstream modelReader;
	modelReader.open("model//dssm.v2.model", ofstream::binary);
	FullyConnectedLayer srcLayer1(modelReader, &rb);
	FullyConnectedLayer srcLayer2(modelReader, &rb);
	FullyConnectedLayer tgtLayer1(modelReader, &rb);
	FullyConnectedLayer tgtLayer2(modelReader, &rb);
	modelReader.close();

	DenseMatrixStat OutputSimInfo;
	OutputSimInfo.MAX_ROW_SIZE = miniBatchSize;
	OutputSimInfo.MAX_COL_SIZE = 1;
	OutputSimInfo.TOTAL_BATCH_NUM = batchNum;
	OutputSimInfo.TOTAL_SAMPLE_NUM = sampleSize;

	SparseIndexMatrix srcBatch(&srcMiniBatchInfo, rb.Device);	
	HiddenDenseMatrix srcLayer1Data(&OutputLayer1Info, rb.Device);
	HiddenDenseMatrix srcLayer2Data(&OutputLayer2Info, rb.Device);

	SparseIndexMatrix tgtBatch(&tgtMiniBatchInfo, rb.Device);
	HiddenDenseMatrix tgtLayer1Data(&OutputLayer1Info, rb.Device);
	HiddenDenseMatrix tgtLayer2Data(&OutputLayer2Info, rb.Device);

	BiMatchData biMatchData(miniBatchSize, 0, rb.Device);

	SimilarityRunner similarityRunner(10, &rb);
	HiddenDenseMatrix simOutput(&OutputSimInfo, rb.Device);
	HiddenDenseMatrix probOutput(&OutputSimInfo, rb.Device);

	ofstream outfile;
	outfile.open("data//test_data.v2.result", ofstream::out);

	int smpIdx = 0;

	for (int b = 0; b<batchNum; b++)
	{
		srcBatch.Refresh();
		tgtBatch.Refresh();

		while (smpIdx < sampleSize && srcBatch.RowSize < miniBatchSize && tgtBatch.RowSize < miniBatchSize)
		{
			srcBatch.PushSample(get<0>(src_batch[smpIdx]), get<1>(src_batch[smpIdx]));
			tgtBatch.PushSample(get<0>(tgt_batch[smpIdx]), get<1>(tgt_batch[smpIdx]));
			smpIdx++;
		}

		srcLayer1.Forward(&srcBatch, srcLayer1Data.Output);
		srcLayer2.Forward(srcLayer1Data.Output, srcLayer2Data.Output);

		tgtLayer1.Forward(&tgtBatch, tgtLayer1Data.Output);
		tgtLayer2.Forward(tgtLayer1Data.Output, tgtLayer2Data.Output);

//.........这里部分代码省略.........
开发者ID:yelongshen,项目名称:biglearn-dssm,代码行数:101,代码来源:main.cpp


示例20:

void
LinkableValueNode::set_children_vocab(const Vocab &newvocab)
{
	children_vocab.assign(newvocab.begin(),newvocab.end());
}
开发者ID:sergiorm,项目名称:synfig_jcome,代码行数:5,代码来源:valuenode.cpp



注:本文中的Vocab类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
C++ Vocabulary类代码示例发布时间:2022-05-31
下一篇:
C++ Vloc类代码示例发布时间:2022-05-31
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap