• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python word2vec.load函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中word2vec.load函数的典型用法代码示例。如果您正苦于以下问题:Python load函数的具体用法?Python load怎么用?Python load使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了load函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: get_w2v_labels

def get_w2v_labels(y_original, dim=200):
    y_new = np.zeros((y_original.shape[0], dim))
    if dim == 200:
        model = word2vec.load(root + 'word2vec/vectors.bin')
    elif dim in [100,50,25,10]:
        model = word2vec.load(root + 'semantic-network/data/text8-%s.bin'%dim)
    else:
        raise NotImplementedError
    for i, label in enumerate(y_original):
        y_new[i,:] = model[classes[label]]

    return y_new
开发者ID:bjkomer,项目名称:semantic-network,代码行数:12,代码来源:network_utils.py


示例2: test

def test():
    # ------------ common between two measurments ---------------------------- #
    t1 = "a quick brown dog jumps over the lazy fox"
    t2 = "a quick brown fox jumps over the lazy dog"
    t2 = "jumps over the lazy fox is a quick brown dog"
    #t1 = "Amrozi accused his brother, whom he called the witness, of deliberately distorting his evidence.".lower()
    #t2 = "Referring to him as only the witness, Amrozi accused his brother of deliberately distorting his evidence.".lower()
    #t1 = "i have to find you, tell me you need me."
    #t2 = "don't wanna know who is taking you home"
    t1 = getWords(t1)
    t2 = getWords(t2)
    t1 = flex(t1)
    t2 = flex(t2)
    t = union(t1, t2)
    #t = ["a", "brown", "jumps", "the", "fox", "dog", "quick", "over", "lazy"]
    print t

    model = word2vec.load('./latents.bin')
    # -------------- sementic similarity between two sentences --------------- #
    similarity_ssv = ssv(t, t1, t2, model)
    print 'ssv ', similarity_ssv

    # ----------------- word similarity between sentences -------------------- #
    similarity_wo = wo(t, t1, t2, model)
    print 'wo ', similarity_wo

    alpha = 0.8
    print alpha*similarity_ssv + (1-alpha)*similarity_wo
开发者ID:ruchir594,项目名称:yelpbot,代码行数:28,代码来源:ssvwo.py


示例3: embed

def embed(sentences):
    model = word2vec.load('~/word2vec_models/GoogleNews-vectors-negative300.bin')
    embedded_sentences = []
    tokenized_sentences = []

    max_len = 0
    for sentence in sentences:
        tokenized_sentence = sent_tokenize(sentence)
        tokenized_sentences.append(tokenized_sentence)
        if len(tokenized_sentence) > max_len:
            max_len = len(tokenized_sentence)


    for sentence in sentences:
        tokenized_sentence = sent_tokenize(sentence)
        embedded_words = []
        
        for word in tokenized_sentence:
            try:
                word = model['word']
            except:
                word = np.zeros(300)
            embedded_words.append(word)

        #padding    
        for i in range(max_len - len(embedded_words)):
            embedded_words.append(np.zeros(300))

        embedded_sentences.append(embedded_words)

    embedded_sentences = np.array(embedded_sentences)

    return embedded_sentences
开发者ID:RemedyHealthcare,项目名称:cnn-text-classification-tf,代码行数:33,代码来源:data_helpers.py


示例4: predict

def predict():
    model = word2vec.load('./latents.bin')
    predictions = []
    with open('MSRParaphraseCorpus/MSR_easy.txt') as f:
        data = f.readlines()
    block = []
    for each in data:
        block.append(flex(getWords(each.lower())))
    i = 1
    while i+1 < len(block):
        if int(block[i][0]) - int(block[i+1][0]) < 200 and int(block[i][0]) - int(block[i+1][0]) > -200:
            t1 = block[i][1:]
            t2 = block[i+1][1:]
            t = union(t1, t2)
            # -------------- sementic similarity between two sentences ------- #
            similarity_ssv = ssv(t, t1, t2, model)
            #print 'ssv ', similarity_ssv

            # ----------------- word similarity between sentences ------------ #
            similarity_wo = wo(t, t1, t2, model)
            #print 'wo ', similarity_wo

            alpha = 0.8
            similarity = alpha*similarity_ssv + (1-alpha)*similarity_wo
            print similarity, str(block[i][0]), str(block[i+1][0])
            predictions.append([similarity, str(block[i][0]), str(block[i+1][0])])
            i = i + 2
        else:
            i = i + 1
开发者ID:ruchir594,项目名称:yelpbot,代码行数:29,代码来源:ssvwo.py


示例5: test_distance

def test_distance():
    model = word2vec.load(output_txt)
    metrics = model.distance("the", "the", "the")
    assert len(metrics) == 3
    for item in metrics:
        # There should be 3 items per record
        assert len(item) == 3
开发者ID:danielfrg,项目名称:word2vec,代码行数:7,代码来源:test_word2vec.py


示例6: get_char_embedding

def get_char_embedding():
    """提取字向量,并保存至 ../data/char_embedding.npy"""
    print('getting the char_embedding.npy')
    wv = word2vec.load('../raw_data/char_embedding.txt')
    char_embedding = wv.vectors
    chars = wv.vocab
    n_special_sym = len(SPECIAL_SYMBOL)
    sr_id2char = pd.Series(chars, index=range(n_special_sym, n_special_sym + len(chars)))
    sr_char2id = pd.Series(range(n_special_sym, n_special_sym + len(chars)), index=chars)

    # 添加特殊符号:<PAD>:0, <UNK>:1
    embedding_size = 256

    vec_special_sym = np.random.randn(n_special_sym, embedding_size)
    for i in range(n_special_sym):
        sr_id2char[i] = SPECIAL_SYMBOL[i]
        sr_char2id[SPECIAL_SYMBOL[i]] = i
    char_embedding = np.vstack([vec_special_sym, char_embedding])
    # 保存字向量
    save_path = '../data/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    np.save(save_path + 'char_embedding.npy', char_embedding)
    # 保存字与id的对应关系
    with open(save_path + 'sr_char2id.pkl', 'wb') as outp:
        pickle.dump(sr_id2char, outp)
        pickle.dump(sr_char2id, outp)
    print('Saving the char_embedding.npy to ../data/char_embedding.npy')
开发者ID:brucexia6116,项目名称:zhihu-text-classification,代码行数:28,代码来源:embed2ndarray.py


示例7: loadArg1

def loadArg1():
    model=word2vec.load("/mnt/mint_share/text8.bin")
    data=np.empty((17572,1,100,100),dtype='float64')
    label=np.empty((17472,),dtype='uint8')
    with codecs.open("/mnt/mint_share/train_pdtb.json","rU","utf-8") as f:
        for i,line in  enumerate(f):
            unit=json.loads(line)
            len1 = len(unit['Arg1']['Word'])
            if(len1 <100):
                for j in range(len1):
                    try:
                        j_ = model[unit['Arg1']['Word'][j]]
                    except:
                        j_ = model['fillin']
                    data[i,:,j,:]= j_
                for j in range(100- len1):
                    data[i,:,len1+j,:]=model['fillin']
            else:
                for j in range(100):
                    try:
                        j_ = model[unit['Arg1']['Word'][j]]
                    except:
                        j_ = model['fillin']
                    data[i,:,j,:]= j_
    with open("arg1_image_100","wb") as f1:
        # dill.dump(data,f1)
        cPickle.dump(data,f1,protocol=2)
开发者ID:sjtu-lyj,项目名称:nlp_keras_nn,代码行数:27,代码来源:trainmakeImage.py


示例8: create_voabulary

def create_voabulary(simple=None,word2vec_model_path='zhihu-word2vec-title-desc.bin-100',name_scope=''): #zhihu-word2vec-multilabel.bin-100
    cache_path ='cache_vocabulary_label_pik/'+ name_scope + "_word_voabulary.pik"
    print("cache_path:",cache_path,"file_exists:",os.path.exists(cache_path))
    if os.path.exists(cache_path):#如果缓存文件存在,则直接读取
        with open(cache_path, 'r') as data_f:
            vocabulary_word2index, vocabulary_index2word=pickle.load(data_f)
            return vocabulary_word2index, vocabulary_index2word
    else:
        vocabulary_word2index={}
        vocabulary_index2word={}
        if simple is not None:
            word2vec_model_path='zhihu-word2vec.bin-100'
        print("create vocabulary. word2vec_model_path:",word2vec_model_path)
        model=word2vec.load(word2vec_model_path,kind='bin')
        vocabulary_word2index['PAD_ID']=0
        vocabulary_index2word[0]='PAD_ID'
        special_index=0
        if 'biLstmTextRelation' in name_scope:
            vocabulary_word2index['EOS']=1 # a special token for biLstTextRelation model. which is used between two sentences.
            vocabulary_index2word[1]='EOS'
            special_index=1
        for i,vocab in enumerate(model.vocab):
            vocabulary_word2index[vocab]=i+1+special_index
            vocabulary_index2word[i+1+special_index]=vocab

        #save to file system if vocabulary of words is not exists.
        if not os.path.exists(cache_path): #如果不存在写到缓存文件中
            with open(cache_path, 'a') as data_f:
                pickle.dump((vocabulary_word2index,vocabulary_index2word), data_f)
    return vocabulary_word2index,vocabulary_index2word
开发者ID:AmjadHisham,项目名称:text_classification,代码行数:30,代码来源:data_util_zhihu.py


示例9: save_latent_features_of_tagsjson

def save_latent_features_of_tagsjson():
    model = word2vec.load('../lib/word2vec/vectors.bin')
    all_tags = []
    with open('tags.json', 'r') as f:
         data = json.load(f)

    i=0
    while i < len(data['item']):
        all_tags = all_tags + data['item'][i]['tag_text'].replace('"','').lower().split('|')
        all_tags = all_tags + data['item'][i]['tag_query'].replace('"','').lower().split('|')
        i=i+1
    i=0
    while i < len(all_tags):
        if all_tags[i][0] == ' ':
            all_tags[i] = all_tags[i][1:]
            i=i-1
        i=i+1
    print all_tags
    latent_tags=[]
    latent_model=[]
    for i in all_tags:
        try:
            a=model[str(i)]
            latent_tags.append(str(i))
            latent_model.append(a)
        except Exception, e:
            print i
            print e
开发者ID:ruchir594,项目名称:allevents,代码行数:28,代码来源:w2v.py


示例10: assign_pretrained_word_embedding

def assign_pretrained_word_embedding(sess,vocabulary_index2word,vocab_size,model,word2vec_model_path=None):
    print("using pre-trained word emebedding.started.word2vec_model_path:",word2vec_model_path)
    # word2vecc=word2vec.load('word_embedding.txt') #load vocab-vector fiel.word2vecc['w91874']
    word2vec_model = word2vec.load(word2vec_model_path, kind='bin')
    word2vec_dict = {}
    for word, vector in zip(word2vec_model.vocab, word2vec_model.vectors):
        word2vec_dict[word] = vector
    word_embedding_2dlist = [[]] * vocab_size  # create an empty word_embedding list.
    word_embedding_2dlist[0] = np.zeros(FLAGS.embed_size)  # assign empty for first word:'PAD'
    bound = np.sqrt(6.0) / np.sqrt(vocab_size)  # bound for random variables.
    count_exist = 0;
    count_not_exist = 0
    for i in range(1, vocab_size):  # loop each word
        word = vocabulary_index2word[i]  # get a word
        embedding = None
        try:
            embedding = word2vec_dict[word]  # try to get vector:it is an array.
        except Exception:
            embedding = None
        if embedding is not None:  # the 'word' exist a embedding
            word_embedding_2dlist[i] = embedding;
            count_exist = count_exist + 1  # assign array to this word.
        else:  # no embedding for this word
            word_embedding_2dlist[i] = np.random.uniform(-bound, bound, FLAGS.embed_size);
            count_not_exist = count_not_exist + 1  # init a random value for the word.
    word_embedding_final = np.array(word_embedding_2dlist)  # covert to 2d array.
    word_embedding = tf.constant(word_embedding_final, dtype=tf.float32)  # convert to tensor
    t_assign_embedding = tf.assign(model.Embedding,word_embedding)  # assign this value to our embedding variables of our model.
    sess.run(t_assign_embedding);
    print("word. exists embedding:", count_exist, " ;word not exist embedding:", count_not_exist)
    print("using pre-trained word emebedding.ended...")
开发者ID:brucexia6116,项目名称:text_classification,代码行数:31,代码来源:a8_train.py


示例11: test_load_txt

def test_load_txt():
    model = word2vec.load(output_txt)
    vocab = model.vocab
    vectors = model.vectors

    assert vectors.shape[0] == vocab.shape[0]
    assert vectors.shape[0] > 3000
    assert vectors.shape[1] == 10
开发者ID:danielfrg,项目名称:word2vec,代码行数:8,代码来源:test_word2vec.py


示例12: load

def load(modelpath):

    model = word2vec.load(modelpath)

    nvocab = [ unicode(i,'utf-8') for i in model.vocab ]
    index = { v:n for n,v in enumerate(nvocab) }
    l2norm = model.l2norm
    
    return (index,l2norm)
开发者ID:renning22,项目名称:cortana,代码行数:9,代码来源:w2v.py


示例13: test_closest

def test_closest():
    model = word2vec.load(output_txt)
    indexes, metrics = model.closest(model["the"], n=30)
    assert indexes.shape == (30,)
    assert indexes.shape == metrics.shape

    py_response = model.generate_response(indexes, metrics).tolist()
    assert len(py_response) == 30
    assert len(py_response[0]) == 2
开发者ID:danielfrg,项目名称:word2vec,代码行数:9,代码来源:test_word2vec.py


示例14: test_prediction

def test_prediction():
    model = word2vec.load(output_bin)
    indexes, metrics = model.cosine('the')
    assert indexes.shape == (10,)
    assert indexes.shape == metrics.shape

    py_response = model.generate_response(indexes, metrics).tolist()
    assert len(py_response) == 10
    assert len(py_response[0]) == 2
开发者ID:MoherX,项目名称:word2vec,代码行数:9,代码来源:test_word2vec.py


示例15: test_analogy

def test_analogy():
    model = word2vec.load(output_txt)
    indexes, metrics = model.analogy(pos=["the", "the"], neg=["the"], n=20)
    assert indexes.shape == (20,)
    assert indexes.shape == metrics.shape

    py_response = model.generate_response(indexes, metrics).tolist()
    assert len(py_response) == 20
    assert len(py_response[0]) == 2
开发者ID:danielfrg,项目名称:word2vec,代码行数:9,代码来源:test_word2vec.py


示例16: load_wv_model

def load_wv_model(word_vector_file, word_vector_type):
    if word_vector_type == WordVectorTypes.glove.name:
        #from glove import Glove
        glove_model = GloveWrapper.load(word_vector_file)
        wv_model = GloveWrapper(glove_model)
    else: 
        import word2vec
        w2v_model = word2vec.load(word_vector_file)
        wv_model = W2VWrapper(w2v_model)
    return wv_model
开发者ID:Lab41,项目名称:attalos,代码行数:10,代码来源:main.py


示例17: extract

def extract(dim, data, trained):
    if(not trained):
        word2vec.word2phrase(data, data+'-phrases', verbose=True)
        word2vec.word2vec(data+'-phrases', data+'.bin', size=dim, verbose=True)
    model = word2vec.load(data+'.bin')
    keys = model.vocab
    features = model.vectors
    dic = dict(zip(keys,features))
    print(len(dic))
    return dic
开发者ID:We-can-apply-GPU,项目名称:aMeLiDoSu-HW3,代码行数:10,代码来源:extract.py


示例18: test_similar

def test_similar():
    model = word2vec.load(output_bin)
    indexes, metrics = model.similar("the")
    assert indexes.shape == (10,)
    assert indexes.shape == metrics.shape

    py_response = model.generate_response(indexes, metrics).tolist()
    print(py_response)
    assert len(py_response) == 10
    assert len(py_response[0]) == 2
开发者ID:danielfrg,项目名称:word2vec,代码行数:10,代码来源:test_word2vec.py


示例19: __init__

 def __init__(self):
     self.word2vec_model = None
     self.cosine_similarity_map = {}
     self.word_vectors_map = {}
     #
     print 'Loading word vectors into the python model ...'
     start_time = time.time()
     self.word2vec_model = wv.load(cap.absolute_path+'./wordvectors/pubmed.bin')
     print 'The execution time for the loading was ', time.time()-start_time
     print 'word2vec_model.vocab', self.word2vec_model.vocab
开发者ID:sgarg87,项目名称:big_mech_isi_gg,代码行数:10,代码来源:word_vectors.py


示例20: getanology

def getanology(second, first, third):
	import word2vec
	# Import the word2vec binary file: dataset
	model = word2vec.load('/export/home/sysadmin/text8.bin')

	# We can do simple queries to retreive words related to "word"

	indexes, metrics = model.analogy(pos=[first, third], neg=[second], n=10)

	#model.vocab[indexes]
	related_word = model.vocab[indexes[0]]

	return related_word
开发者ID:crbothe,项目名称:naoyadtk,代码行数:13,代码来源:nao_intelectual.py



注:本文中的word2vec.load函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python wordcloud.STOPWORDS类代码示例发布时间:2022-05-26
下一篇:
Python expect.expect_json_contains函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap