• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python viterbi.viterbi函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中viterbi.viterbi函数的典型用法代码示例。如果您正苦于以下问题:Python viterbi函数的具体用法?Python viterbi怎么用?Python viterbi使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了viterbi函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: learning

def learning(T):
    weight = defaultdict(lambda : uniform(-1.0, 1.0))
    data = []
    possible_tags, transition = set(["<s>", "</s>"]), set()
    for line in iter(sys.stdin.readline, ""):
        X, Y = [], []
        pre_y = "<s>"
        for x_y in line.rstrip().split():
            (x, y) = x_y.split('_')
            X.append(x)
            Y.append(y)
            possible_tags.add(y)
            transition.add(" ".join([pre_y, y]))
            pre_y = y
        transition.add(" ".join([pre_y, "</s>"]) )
        data.append((X, Y))
    data_size = len(data)
    for t in range(T):
        for line_num, (X, Y_prime) in enumerate(data):
            sys.stdout.write("\rIteration %d, linenum %d / %d" % (t+1, line_num+1, data_size))
            sys.stdout.flush()
            Y_hat = viterbi(weight, X, possible_tags, transition)
            phi_prime = create_feature(X, Y_prime)
            phi_hat = create_feature(X, Y_hat)
            update_weight(weight, phi_prime, phi_hat)
    return (weight, possible_tags, transition)
开发者ID:biligee,项目名称:nlp_programming_tutorial,代码行数:26,代码来源:learning.py


示例2: evaluate

def evaluate(model, examples, gold, label=None):
    output = list(model.predict({'input': examples },
                                batch_size=config.batch_size)['output'])
    pred = np.argmax(np.asarray(output), axis=2).flatten()
    vpred = viterbi.viterbi(np.concatenate(output), *viterbi_probabilities)
    return (common.classification_summary(gold, pred) + '\n' +
            'w/viterbi ' + common.classification_summary(gold, vpred))
开发者ID:billy322,项目名称:BioNLP-2016,代码行数:7,代码来源:rnn.py


示例3: seg

def seg(inp):
	"segmenter main function"
	tail = ""
	if len(inp) % 2 != 0:
		tail = inp[-1]
		inp = inp[:-1]
		
	#load wubimap
	for line in wubi:
		wubi_map[line[0]] = int(line[1])
	
	#all segments here
	seg = []
	all_seg(inp,seg,0,tail)
	
	#find viterbi path every segment
	vit = []
	for segline in seg:
		ans = vt.viterbi(segline,uni_map, big_map, wd_map, wubi)
		if not ans == None:
			#ans[1] = ans[1] - 20*len(ans[0])
			vit.append(ans)
	
	vit.sort(key=lambda path: path[1], reverse=False)
	
	for v in vit:
		#print(v)
		pass
	
	#return max viterbi path
	if len(vit) > 0:
		return vit[-1][0]
	else:
		return False
开发者ID:ChestnutHeng,项目名称:Wubi-Kernel,代码行数:34,代码来源:segment.py


示例4: viterbi_decode

 def viterbi_decode(self, seq):
     node_potentials, edge_potentials = self.build_potentials(seq)
     viterbi_path, _ = viterbi(node_potentials, edge_potentials)
     res = viterbi_path
     new_seq = seq.copy_sequence()
     new_seq.y = res
     return new_seq
开发者ID:Joao-M-Almeida,项目名称:lxmls-toolkit,代码行数:7,代码来源:hmm_n_order.py


示例5: decode

 def decode(self, initials):
     timer = Timer()
     states = set()
     for obs in initials:
         states.update(self.words_by_letter[obs])
     logger.info("Searching %s possible states", len(states))
     result = viterbi.viterbi(initials, states, self.start_p, self.transition_p, self.emission_p)
     logger.info("Decoding %r took %s s", initials, timer.elapsed())
     return result
开发者ID:shirlston,项目名称:initialisms,代码行数:9,代码来源:decode.py


示例6: t_BMES

def t_BMES():
    PI, A, B = build()
    S = B.keys()
    for k in S:
        if k not in PI:
            PI[k] = 0.0
    for sen in samples:
        Y = tuple(sen)
        prob, X = viterbi(Y, S, PI, A, B)
        print u''.join(sen[i] + (X[i] in 'ES' and '|' or '') for i in xrange(len(sen)))
开发者ID:Catentropy,项目名称:mylab,代码行数:10,代码来源:t_wordseg.py


示例7: test_trellis

 def test_trellis(self):
     _, actual, _ = viterbi(self.obs, self.A, self.B, self.pi)
     expected = LMatrix(("H", "L"),
                        xrange(len(self.obs)),
                        data = np.array([
                            [ -2.737, -5.474, -8.211, -11.533, -14.007, -17.329, -19.54, -22.862, -25.657],
                            [ -3.322, -6.059, -8.796, -10.948, -14.007, -16.481, -19.54, -22.014, -24.487]
                        ])
     )
     for s in actual.rlabels:
         for t in actual.clabels:
             self.assertAlmostEqual(actual[s,t], expected[s,t], 3)
开发者ID:xiaohan2012,项目名称:irem,代码行数:12,代码来源:t_viterbi.py


示例8: evaluate

def evaluate():
    global possible_tags
    global strings
    global cca_length
    get_words()
    get_strings()
    get_alpha()
    get_phi()
    get_regExp()
#    get_codeWords()
    get_cca()
#    cca_length = len(cca1['amended'])
    cca_length = 20
    data = open('inputs/eng.test{0}'.format(sys.argv[2]), 'r')
    s = 'outputs_cca_pos_egw30_rounding_currentOnly/result_{0}_{1}.txt'.format(sys.argv[2], sys.argv[1])
    output = open(s, 'w')
    line = data.readline()
    output.write('{0}\n\n'.format(line.strip()))
    line = data.readline()
    vals = get_sentence(data)
    sentence = vals[0]
    correct_tags = vals[1]
    POS = vals[2]
    count = 0
    time1 = 0.0
    time2 = 0.0
    avg_time = 0.0
    time_val = 0.0
    first = True
    while sentence:
#------------------------
#-------TIME-STATS-------
#------------------------
        count += 1
        time2 = time()
        if not first:
            avg_time = (avg_time*(count-1)+(time2-time1))/count
            time_val = int((avg_time)*(number_of_sentences-count))
        first = False
        progress = open('progress_test.txt', 'w')
        progress.write('Percent complete:\n{0}/{1} = {2}%\n\nTime remaining: \n{3} h {4} min {5} sec'.format(int(count), int(number_of_sentences), float(count*100)/float(number_of_sentences), time_val/3600, (time_val%3600)/60, time_val%60))
        time1 = time2
        progress.close()
#--------------------------
#--------------------------
        tags = viterbi.viterbi(sentence, POS, phi, possible_tags, alpha, strings, Words, regExp, codes, cca1, cca_length)
        for i in range(len(sentence)):
            output.write('{0} {1} {2} {3}\n'.format(sentence[i], POS[i][0], correct_tags[i], tags[i]))
        output.write('\n')
        vals = get_sentence(data)
        sentence = vals[0]
        correct_tags = vals[1]
        POS = vals[2]
开发者ID:NweZinOo,项目名称:perceptron-crf,代码行数:53,代码来源:perceptron_test.py


示例9: perceptron

def perceptron(print_alpha = 0, mult = 0, import_alpha = 0):
    global alpha
    global alpha_average
    global possible_tags
    global strings
    global strings_abr
    global add_factor
    global mult_factor
    init_phi_alpha(mult)
    get_strings()
    if import_alpha:
        read_alpha()
    alpha_average = copy.deepcopy(alpha)
    for t in range(T_DEFAULT):
        print '---{0}---'.format(t)
        sys.stdout.flush()
        dont_repeat = True
        data = open(sys.argv[2], 'r')
        vals = get_sentence_and_tags(data)
        j = 0
        while vals:
            sentence = vals[0]
            correct_tags = vals[1]
            result = viterbi.viterbi(sentence, phi, possible_tags, alpha, strings, strings_abr, mult)
            z = result[0]
            indices = result[1]
            if not z == correct_tags:
                dont_repeat = False
                correct_indices = get_indices(sentence, correct_tags)
                if mult:
                    for i in indices:
                        alpha[i] = float(alpha[i])/mult_factor
                    for i in correct_indices:
                        alpha[i] = float(alpha[i])*mult_factor
                else:
                    for i in indices:
                        alpha[i] += -1*add_factor
                    for i in correct_indices:
                        alpha[i] += add_factor
            else:
                j += 1
            for i in range(len(alpha)):
                alpha_average[i] += alpha[i]
            vals = get_sentence_and_tags(data)
        data.close()
        if dont_repeat:
            print 'SUCCESS!!!'
            break
#        print 'number correct: {0}'.format(j)
        if print_alpha:
            write_alpha(t)
开发者ID:ROZBEH,项目名称:hmm-perceptron,代码行数:51,代码来源:perceptron.py


示例10: run

 def run(self):
     if self.isTest:
         print "Running HMM"
         h = HiddenMarkovModel(self.train_file,smoothed=self.smoothing)
         print "Running Viterbi"
         toc = time.clock()
         predicted = viterbi(h,self.test_file, test = False)
         tic = time.clock()
         print "Viterbi ran in %f seconds"%(tic-toc)
         actual, tokens = zip(*self.parse_file(self.test_answers))
         return (predicted,actual,tokens)
     else:
         print "Splitting Data"
         (train,test) = self.splitCV(self.parse_file(self.train_file),self.cv_validation_percentage)
         print "Converting Lists"
         train_text = "".join(["%s %s\n" % (p,t) for [p,t] in train])
         test_text = "".join(["%s\n" % t for [p,t] in test])
         print "Running HMM"
         h = HiddenMarkovModel(text=train_text, smoothed=self.smoothing)
         print "Running Viterbi"
         predicted = viterbi(h,text=test_text, test=False)
         actual = self.getActual(test)
         return (predicted,actual)
开发者ID:amm385,项目名称:POS,代码行数:23,代码来源:Analyzer.py


示例11: t_wordseg

def t_wordseg():
    PI, A, B = build(True)
    for k in B.keys():
        if '|' == k[-1]:
            B[k[:-1]] = {k[:-1]: 1.0}
        else:
            B[k + '|'] = B[k]
    S = B.keys()
    for k in S:
        if k not in PI:
            PI[k] = 0.0
    for sen in samples:
        Y = tuple(sen)
        prob, X = viterbi(Y, S, PI, A, B)
        print u''.join(X)
开发者ID:Catentropy,项目名称:mylab,代码行数:15,代码来源:t_wordseg.py


示例12: __cut

def __cut(sentence):
    prob, pos_list =  viterbi.viterbi(sentence,char_state_tab_P, start_P, trans_P, emit_P)
    begin, next = 0,0

    for i,char in enumerate(sentence):
        pos = pos_list[i][0]
        if pos=='B':
            begin = i
        elif pos=='E':
            yield pair(sentence[begin:i+1], pos_list[i][1])
            next = i+1
        elif pos=='S':
            yield pair(char,pos_list[i][1])
            next = i+1
    if next<len(sentence):
        yield pair(sentence[next:], pos_list[next][1] )
开发者ID:smerdy,项目名称:newsmosaic,代码行数:16,代码来源:__init__.py


示例13: viterbi_run

def viterbi_run(training, test_file): 

	#returns a list of sentence list containing tuples (word,part of speech) 
	

	corpus_list = viterbi.corpus_list(training) 

	#creates a dictionary of corpus part of speech tag : occurences 
	corpus_dictionary = viterbi.corpus_dictionary(training) 

	#pos_keys 
	keys = viterbi.key_list(corpus_dictionary) 

	#creates the prior_probabilities transitions table for the entire corpus 


	prior_probabilities_table = viterbi.transition_table(corpus_dictionary,corpus_list)


	#creates a word dictionary 
	#word: list of part of speeches and increment occurences of word as part of speech 
	word_dic = viterbi.word_dic(corpus_list,keys) 


	#word_keys
	words = viterbi.key_list(word_dic)


	#likelihood_table 
	likelihood_table  = viterbi.word_freq(corpus_dictionary,word_dic)


	#Emissions and Transitions 
	sentences = viterbi.corpus_list_2(test_file) 

	error_list = [] 
	error_list_i = [] 
	new_sentences = [] 
	count = 0 
	for sentence in sentences:
		trans = viterbi.sentence_tag(sentence,keys,words,likelihood_table)
		s_pos = viterbi.sentence_pos(trans)
		transition_table = viterbi.transition_probabilities(trans,s_pos,prior_probabilities_table,keys)

		observed_like = viterbi.observed_likelihoods(sentence,s_pos,trans,likelihood_table,words,keys)
		vit_sent = viterbi.viterbi(observed_like,sentence,s_pos,transition_table) 
开发者ID:lesliemanrique1,项目名称:HMM-Part-of-Speech-Tagger,代码行数:46,代码来源:viterbi_run.py


示例14: evaluate

def evaluate():
    global possible_tags
    global strings
    global strings_abr
    get_words()
    get_strings()
    get_alpha()
    get_phi()
    get_regExp()
    data = open(sys.argv[4], 'r')
    output = open(sys.argv[5], 'w')
    sentence = get_sentence(data)
    while sentence:
        tags = viterbi.viterbi(sentence, phi, possible_tags, alpha, strings, strings_abr, Words, regExp)
        for i in range(len(sentence)):
            output.write('{} {}\n'.format(sentence[i], tags[i]))
        output.write('\n')
        sentence = get_sentence(data)
开发者ID:ROZBEH,项目名称:hmm-perceptron,代码行数:18,代码来源:perceptron_test.py


示例15: gen_couplet

def gen_couplet(transition_prob_tree, output_prob_tree, unigram_freq, first_half):
    assert type(first_half) == unicode
    couplet_length = len(first_half)
    visible_words = np.array([first_half[i] for i in range (couplet_length)])
    hidden_candidate_words = np.array([u' ' for _ in range(top_k_word*couplet_length)]).reshape(top_k_word, couplet_length)
    output_prob = np.random.rand(top_k_word, couplet_length)
    for i in range(couplet_length):
        key = first_half[i]
        if not output_prob_tree.has_key(key):
            print '%s, Cannot generate couplet' % key
            return ''

        hash_leaf = output_prob_tree[key]
        hidden_candidate_words[:,i], output_prob[:,i] = gen_candidates(first_half, hash_leaf, top_k_word)

    for i in range(couplet_length):
        candidate = u''
        for j in range(top_k_word):
            candidate += hidden_candidate_words[j, i]

    try:
        transition_prob, init_prob = init_model(transition_prob_tree, unigram_freq, hidden_candidate_words, top_k_word)
    except:
        return ''

    optimal_path, prob = viterbi(transition_prob, output_prob, init_prob, [], visible_words, top_k_word, top_k_candidate)
    optimal_path = deal_repeat(first_half, optimal_path)

    results = []
    for i in range(optimal_path.shape[0]):
        second_half = ''
        for j in range(optimal_path.shape[1]):
            second_half += hidden_candidate_words[optimal_path[i, j], j]
        score = ranking_function(output_prob_tree, first_half, second_half)
        results.append((score, second_half))


    results = sorted(results, reverse=True)[:top_k_output]
    return results
开发者ID:dnc1994,项目名称:ReKan,代码行数:39,代码来源:gen_couplets.py


示例16: perceptron

def perceptron(print_alpha = 0):
    global possible_tags
    global strings
    global strings_abr
    global add_factor
    get_regExp()
    get_strings()
    get_tags()
    for t in range(T_DEFAULT):
        print '---{0}---'.format(t)
        sys.stdout.flush()
        dont_repeat = True
        data = open(sys.argv[1], 'r')
        vals = get_sentence_and_tags(data)
        j = 0
        while vals:
            sentence = vals[0]
            correct_tags = vals[1]
            tags = viterbi(sentence, phi, possible_tags, alpha, strings, strings_abr, Words, regExp)
            indices = get_indices(sentence, tags)
            if not tags == correct_tags:
                dont_repeat = False
                correct_indices = get_indices(sentence, correct_tags)
                for i in indices:
                    alpha[i] += -1*add_factor
                for i in correct_indices:
                    alpha[i] += add_factor
            else:
                j += 1
            for i in alpha:
                alpha_average[i] += alpha[i]
            vals = get_sentence_and_tags(data)
        data.close()
        if dont_repeat:
            print 'SUCCESS!!!'
            break
        print 'number correct: {0}'.format(j)
        if print_alpha:
            write_alpha(t)
开发者ID:ROZBEH,项目名称:hmm-perceptron,代码行数:39,代码来源:perceptron.py


示例17: k_fold_cross_valid_known

def k_fold_cross_valid_known(k, parsed, known, discounts):
    res = defaultdict(list)
    for train, test in _fold(parsed, k):
        for discount in discounts:
            print 'train: ', len(train), 'test: ', len(test)
            tag2id, word2id = build_dict(parsed)
            id2tag = {v: k for k, v in tag2id.iteritems()}
            id2word = {v: k for k, v in word2id.iteritems()}
            emission, transition = _counter_known(parsed, train, known,
                                                  0.85, tag2id, word2id, discount)

            count_ok, count_total = 0., 0.
            for i, seq in enumerate(test):
                out = viterbi(seq, transition, emission, word2id, tag2id)
                ok, total = _compare(seq[1:-1], id_to_token(out, id2word, id2tag))
                count_ok += ok; count_total += total
                if DEBUG:
                    print 'evaluating', i, 'th sentence.', count_ok/count_total, 'so far.'
            res[discount].append(count_ok/count_total)
            print 'Fold accuracy: ', res[discount][-1], 'discount: ', discount
    for d in res:
        print 'discount:', d, '->', 'avg:', np.mean(res[d])
开发者ID:liusiqi43,项目名称:ox-computational-linguistics,代码行数:22,代码来源:eval.py


示例18: test_model

def test_model(corpus):
	cp = corpus.corpus_sentence

	word_list = list()
	pos_list = list()
	
	for paragraph in cp:
		text = []
		pos = []

		for tp in paragraph:
			text.append(tp[0])
			pos.append(tp[1])

		word_list.append(text)
		pos_list.append(pos)

	initp, trans_bi, emiss = corpus.get_statistics_model(tri_gram=False)
	_, trans_tri, emiss = corpus.get_statistics_model(tri_gram=True)

	bigram_result = []
	trigram_result = []

	count = 0
	for paragraph in word_list:
		pos_bi = vtb.viterbi(paragraph, corpus.pos_list_sentence, initp, trans_bi, emiss)
		# pos_tri = vtb.viterbi_trigram(paragraph, corpus.pos_list_sentence, initp, trans_tri, emiss)

		bigram_result.append(pos_bi)
		# trigram_result.append(pos_tri)

		print(count)
		count += 1
		if count == 1000:
			break

	tp, tn, fp, fn, other = evaluate_sentence(pos_list[0:1000], bigram_result)
	write_results_to_file("test/test_model_orchid_bigram", word_list[0:1000], pos_list, bigram_result, tp, tn, fp, fn, other, test_text="bigram model test")
开发者ID:myscloud,项目名称:Question-Generation-Thai,代码行数:38,代码来源:test_model.py


示例19: predict_one

def predict_one(weight, words, possible_tags, transition):
    return " ".join(viterbi(weight, words, possible_tags, transition))
开发者ID:biligee,项目名称:nlp_programming_tutorial,代码行数:2,代码来源:predict.py


示例20: viterbi

import numpy as np
from viterbi import viterbi


if __name__ == '__main__':
    n_hid = 2
    n_obs = 3

    trans_hid = np.array( [ [0.5,0.5], [0.5,0.5] ] )
    trans_obs = np.array( [ [0.5,0.4,0.1], [0.4,0.1, 0.5] ])

    solver = viterbi(n_hid, n_obs, trans_hid, trans_obs)

    obs = np.array( [0,1,1,0,2,0,2,2,2,0,2,2,2,2,2,0,0,1,1,2] )

    mlp = solver.get_MLP(obs)

    print mlp
开发者ID:EmCeeEs,项目名称:machine_learning,代码行数:18,代码来源:chimp.py



注:本文中的viterbi.viterbi函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python transformer_base.extract_field_value函数代码示例发布时间:2022-05-26
下一篇:
Python vital.ConfigBlock类代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap