本文整理汇总了Python中word2vec.load函数的典型用法代码示例。如果您正苦于以下问题:Python load函数的具体用法?Python load怎么用?Python load使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了load函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: get_w2v_labels
def get_w2v_labels(y_original, dim=200):
y_new = np.zeros((y_original.shape[0], dim))
if dim == 200:
model = word2vec.load(root + 'word2vec/vectors.bin')
elif dim in [100,50,25,10]:
model = word2vec.load(root + 'semantic-network/data/text8-%s.bin'%dim)
else:
raise NotImplementedError
for i, label in enumerate(y_original):
y_new[i,:] = model[classes[label]]
return y_new
开发者ID:bjkomer,项目名称:semantic-network,代码行数:12,代码来源:network_utils.py
示例2: test
def test():
# ------------ common between two measurments ---------------------------- #
t1 = "a quick brown dog jumps over the lazy fox"
t2 = "a quick brown fox jumps over the lazy dog"
t2 = "jumps over the lazy fox is a quick brown dog"
#t1 = "Amrozi accused his brother, whom he called the witness, of deliberately distorting his evidence.".lower()
#t2 = "Referring to him as only the witness, Amrozi accused his brother of deliberately distorting his evidence.".lower()
#t1 = "i have to find you, tell me you need me."
#t2 = "don't wanna know who is taking you home"
t1 = getWords(t1)
t2 = getWords(t2)
t1 = flex(t1)
t2 = flex(t2)
t = union(t1, t2)
#t = ["a", "brown", "jumps", "the", "fox", "dog", "quick", "over", "lazy"]
print t
model = word2vec.load('./latents.bin')
# -------------- sementic similarity between two sentences --------------- #
similarity_ssv = ssv(t, t1, t2, model)
print 'ssv ', similarity_ssv
# ----------------- word similarity between sentences -------------------- #
similarity_wo = wo(t, t1, t2, model)
print 'wo ', similarity_wo
alpha = 0.8
print alpha*similarity_ssv + (1-alpha)*similarity_wo
开发者ID:ruchir594,项目名称:yelpbot,代码行数:28,代码来源:ssvwo.py
示例3: embed
def embed(sentences):
model = word2vec.load('~/word2vec_models/GoogleNews-vectors-negative300.bin')
embedded_sentences = []
tokenized_sentences = []
max_len = 0
for sentence in sentences:
tokenized_sentence = sent_tokenize(sentence)
tokenized_sentences.append(tokenized_sentence)
if len(tokenized_sentence) > max_len:
max_len = len(tokenized_sentence)
for sentence in sentences:
tokenized_sentence = sent_tokenize(sentence)
embedded_words = []
for word in tokenized_sentence:
try:
word = model['word']
except:
word = np.zeros(300)
embedded_words.append(word)
#padding
for i in range(max_len - len(embedded_words)):
embedded_words.append(np.zeros(300))
embedded_sentences.append(embedded_words)
embedded_sentences = np.array(embedded_sentences)
return embedded_sentences
开发者ID:RemedyHealthcare,项目名称:cnn-text-classification-tf,代码行数:33,代码来源:data_helpers.py
示例4: predict
def predict():
model = word2vec.load('./latents.bin')
predictions = []
with open('MSRParaphraseCorpus/MSR_easy.txt') as f:
data = f.readlines()
block = []
for each in data:
block.append(flex(getWords(each.lower())))
i = 1
while i+1 < len(block):
if int(block[i][0]) - int(block[i+1][0]) < 200 and int(block[i][0]) - int(block[i+1][0]) > -200:
t1 = block[i][1:]
t2 = block[i+1][1:]
t = union(t1, t2)
# -------------- sementic similarity between two sentences ------- #
similarity_ssv = ssv(t, t1, t2, model)
#print 'ssv ', similarity_ssv
# ----------------- word similarity between sentences ------------ #
similarity_wo = wo(t, t1, t2, model)
#print 'wo ', similarity_wo
alpha = 0.8
similarity = alpha*similarity_ssv + (1-alpha)*similarity_wo
print similarity, str(block[i][0]), str(block[i+1][0])
predictions.append([similarity, str(block[i][0]), str(block[i+1][0])])
i = i + 2
else:
i = i + 1
开发者ID:ruchir594,项目名称:yelpbot,代码行数:29,代码来源:ssvwo.py
示例5: test_distance
def test_distance():
model = word2vec.load(output_txt)
metrics = model.distance("the", "the", "the")
assert len(metrics) == 3
for item in metrics:
# There should be 3 items per record
assert len(item) == 3
开发者ID:danielfrg,项目名称:word2vec,代码行数:7,代码来源:test_word2vec.py
示例6: get_char_embedding
def get_char_embedding():
"""提取字向量,并保存至 ../data/char_embedding.npy"""
print('getting the char_embedding.npy')
wv = word2vec.load('../raw_data/char_embedding.txt')
char_embedding = wv.vectors
chars = wv.vocab
n_special_sym = len(SPECIAL_SYMBOL)
sr_id2char = pd.Series(chars, index=range(n_special_sym, n_special_sym + len(chars)))
sr_char2id = pd.Series(range(n_special_sym, n_special_sym + len(chars)), index=chars)
# 添加特殊符号:<PAD>:0, <UNK>:1
embedding_size = 256
vec_special_sym = np.random.randn(n_special_sym, embedding_size)
for i in range(n_special_sym):
sr_id2char[i] = SPECIAL_SYMBOL[i]
sr_char2id[SPECIAL_SYMBOL[i]] = i
char_embedding = np.vstack([vec_special_sym, char_embedding])
# 保存字向量
save_path = '../data/'
if not os.path.exists(save_path):
os.makedirs(save_path)
np.save(save_path + 'char_embedding.npy', char_embedding)
# 保存字与id的对应关系
with open(save_path + 'sr_char2id.pkl', 'wb') as outp:
pickle.dump(sr_id2char, outp)
pickle.dump(sr_char2id, outp)
print('Saving the char_embedding.npy to ../data/char_embedding.npy')
开发者ID:brucexia6116,项目名称:zhihu-text-classification,代码行数:28,代码来源:embed2ndarray.py
示例7: loadArg1
def loadArg1():
model=word2vec.load("/mnt/mint_share/text8.bin")
data=np.empty((17572,1,100,100),dtype='float64')
label=np.empty((17472,),dtype='uint8')
with codecs.open("/mnt/mint_share/train_pdtb.json","rU","utf-8") as f:
for i,line in enumerate(f):
unit=json.loads(line)
len1 = len(unit['Arg1']['Word'])
if(len1 <100):
for j in range(len1):
try:
j_ = model[unit['Arg1']['Word'][j]]
except:
j_ = model['fillin']
data[i,:,j,:]= j_
for j in range(100- len1):
data[i,:,len1+j,:]=model['fillin']
else:
for j in range(100):
try:
j_ = model[unit['Arg1']['Word'][j]]
except:
j_ = model['fillin']
data[i,:,j,:]= j_
with open("arg1_image_100","wb") as f1:
# dill.dump(data,f1)
cPickle.dump(data,f1,protocol=2)
开发者ID:sjtu-lyj,项目名称:nlp_keras_nn,代码行数:27,代码来源:trainmakeImage.py
示例8: create_voabulary
def create_voabulary(simple=None,word2vec_model_path='zhihu-word2vec-title-desc.bin-100',name_scope=''): #zhihu-word2vec-multilabel.bin-100
cache_path ='cache_vocabulary_label_pik/'+ name_scope + "_word_voabulary.pik"
print("cache_path:",cache_path,"file_exists:",os.path.exists(cache_path))
if os.path.exists(cache_path):#如果缓存文件存在,则直接读取
with open(cache_path, 'r') as data_f:
vocabulary_word2index, vocabulary_index2word=pickle.load(data_f)
return vocabulary_word2index, vocabulary_index2word
else:
vocabulary_word2index={}
vocabulary_index2word={}
if simple is not None:
word2vec_model_path='zhihu-word2vec.bin-100'
print("create vocabulary. word2vec_model_path:",word2vec_model_path)
model=word2vec.load(word2vec_model_path,kind='bin')
vocabulary_word2index['PAD_ID']=0
vocabulary_index2word[0]='PAD_ID'
special_index=0
if 'biLstmTextRelation' in name_scope:
vocabulary_word2index['EOS']=1 # a special token for biLstTextRelation model. which is used between two sentences.
vocabulary_index2word[1]='EOS'
special_index=1
for i,vocab in enumerate(model.vocab):
vocabulary_word2index[vocab]=i+1+special_index
vocabulary_index2word[i+1+special_index]=vocab
#save to file system if vocabulary of words is not exists.
if not os.path.exists(cache_path): #如果不存在写到缓存文件中
with open(cache_path, 'a') as data_f:
pickle.dump((vocabulary_word2index,vocabulary_index2word), data_f)
return vocabulary_word2index,vocabulary_index2word
开发者ID:AmjadHisham,项目名称:text_classification,代码行数:30,代码来源:data_util_zhihu.py
示例9: save_latent_features_of_tagsjson
def save_latent_features_of_tagsjson():
model = word2vec.load('../lib/word2vec/vectors.bin')
all_tags = []
with open('tags.json', 'r') as f:
data = json.load(f)
i=0
while i < len(data['item']):
all_tags = all_tags + data['item'][i]['tag_text'].replace('"','').lower().split('|')
all_tags = all_tags + data['item'][i]['tag_query'].replace('"','').lower().split('|')
i=i+1
i=0
while i < len(all_tags):
if all_tags[i][0] == ' ':
all_tags[i] = all_tags[i][1:]
i=i-1
i=i+1
print all_tags
latent_tags=[]
latent_model=[]
for i in all_tags:
try:
a=model[str(i)]
latent_tags.append(str(i))
latent_model.append(a)
except Exception, e:
print i
print e
开发者ID:ruchir594,项目名称:allevents,代码行数:28,代码来源:w2v.py
示例10: assign_pretrained_word_embedding
def assign_pretrained_word_embedding(sess,vocabulary_index2word,vocab_size,model,word2vec_model_path=None):
print("using pre-trained word emebedding.started.word2vec_model_path:",word2vec_model_path)
# word2vecc=word2vec.load('word_embedding.txt') #load vocab-vector fiel.word2vecc['w91874']
word2vec_model = word2vec.load(word2vec_model_path, kind='bin')
word2vec_dict = {}
for word, vector in zip(word2vec_model.vocab, word2vec_model.vectors):
word2vec_dict[word] = vector
word_embedding_2dlist = [[]] * vocab_size # create an empty word_embedding list.
word_embedding_2dlist[0] = np.zeros(FLAGS.embed_size) # assign empty for first word:'PAD'
bound = np.sqrt(6.0) / np.sqrt(vocab_size) # bound for random variables.
count_exist = 0;
count_not_exist = 0
for i in range(1, vocab_size): # loop each word
word = vocabulary_index2word[i] # get a word
embedding = None
try:
embedding = word2vec_dict[word] # try to get vector:it is an array.
except Exception:
embedding = None
if embedding is not None: # the 'word' exist a embedding
word_embedding_2dlist[i] = embedding;
count_exist = count_exist + 1 # assign array to this word.
else: # no embedding for this word
word_embedding_2dlist[i] = np.random.uniform(-bound, bound, FLAGS.embed_size);
count_not_exist = count_not_exist + 1 # init a random value for the word.
word_embedding_final = np.array(word_embedding_2dlist) # covert to 2d array.
word_embedding = tf.constant(word_embedding_final, dtype=tf.float32) # convert to tensor
t_assign_embedding = tf.assign(model.Embedding,word_embedding) # assign this value to our embedding variables of our model.
sess.run(t_assign_embedding);
print("word. exists embedding:", count_exist, " ;word not exist embedding:", count_not_exist)
print("using pre-trained word emebedding.ended...")
开发者ID:brucexia6116,项目名称:text_classification,代码行数:31,代码来源:a8_train.py
示例11: test_load_txt
def test_load_txt():
model = word2vec.load(output_txt)
vocab = model.vocab
vectors = model.vectors
assert vectors.shape[0] == vocab.shape[0]
assert vectors.shape[0] > 3000
assert vectors.shape[1] == 10
开发者ID:danielfrg,项目名称:word2vec,代码行数:8,代码来源:test_word2vec.py
示例12: load
def load(modelpath):
model = word2vec.load(modelpath)
nvocab = [ unicode(i,'utf-8') for i in model.vocab ]
index = { v:n for n,v in enumerate(nvocab) }
l2norm = model.l2norm
return (index,l2norm)
开发者ID:renning22,项目名称:cortana,代码行数:9,代码来源:w2v.py
示例13: test_closest
def test_closest():
model = word2vec.load(output_txt)
indexes, metrics = model.closest(model["the"], n=30)
assert indexes.shape == (30,)
assert indexes.shape == metrics.shape
py_response = model.generate_response(indexes, metrics).tolist()
assert len(py_response) == 30
assert len(py_response[0]) == 2
开发者ID:danielfrg,项目名称:word2vec,代码行数:9,代码来源:test_word2vec.py
示例14: test_prediction
def test_prediction():
model = word2vec.load(output_bin)
indexes, metrics = model.cosine('the')
assert indexes.shape == (10,)
assert indexes.shape == metrics.shape
py_response = model.generate_response(indexes, metrics).tolist()
assert len(py_response) == 10
assert len(py_response[0]) == 2
开发者ID:MoherX,项目名称:word2vec,代码行数:9,代码来源:test_word2vec.py
示例15: test_analogy
def test_analogy():
model = word2vec.load(output_txt)
indexes, metrics = model.analogy(pos=["the", "the"], neg=["the"], n=20)
assert indexes.shape == (20,)
assert indexes.shape == metrics.shape
py_response = model.generate_response(indexes, metrics).tolist()
assert len(py_response) == 20
assert len(py_response[0]) == 2
开发者ID:danielfrg,项目名称:word2vec,代码行数:9,代码来源:test_word2vec.py
示例16: load_wv_model
def load_wv_model(word_vector_file, word_vector_type):
if word_vector_type == WordVectorTypes.glove.name:
#from glove import Glove
glove_model = GloveWrapper.load(word_vector_file)
wv_model = GloveWrapper(glove_model)
else:
import word2vec
w2v_model = word2vec.load(word_vector_file)
wv_model = W2VWrapper(w2v_model)
return wv_model
开发者ID:Lab41,项目名称:attalos,代码行数:10,代码来源:main.py
示例17: extract
def extract(dim, data, trained):
if(not trained):
word2vec.word2phrase(data, data+'-phrases', verbose=True)
word2vec.word2vec(data+'-phrases', data+'.bin', size=dim, verbose=True)
model = word2vec.load(data+'.bin')
keys = model.vocab
features = model.vectors
dic = dict(zip(keys,features))
print(len(dic))
return dic
开发者ID:We-can-apply-GPU,项目名称:aMeLiDoSu-HW3,代码行数:10,代码来源:extract.py
示例18: test_similar
def test_similar():
model = word2vec.load(output_bin)
indexes, metrics = model.similar("the")
assert indexes.shape == (10,)
assert indexes.shape == metrics.shape
py_response = model.generate_response(indexes, metrics).tolist()
print(py_response)
assert len(py_response) == 10
assert len(py_response[0]) == 2
开发者ID:danielfrg,项目名称:word2vec,代码行数:10,代码来源:test_word2vec.py
示例19: __init__
def __init__(self):
self.word2vec_model = None
self.cosine_similarity_map = {}
self.word_vectors_map = {}
#
print 'Loading word vectors into the python model ...'
start_time = time.time()
self.word2vec_model = wv.load(cap.absolute_path+'./wordvectors/pubmed.bin')
print 'The execution time for the loading was ', time.time()-start_time
print 'word2vec_model.vocab', self.word2vec_model.vocab
开发者ID:sgarg87,项目名称:big_mech_isi_gg,代码行数:10,代码来源:word_vectors.py
示例20: getanology
def getanology(second, first, third):
import word2vec
# Import the word2vec binary file: dataset
model = word2vec.load('/export/home/sysadmin/text8.bin')
# We can do simple queries to retreive words related to "word"
indexes, metrics = model.analogy(pos=[first, third], neg=[second], n=10)
#model.vocab[indexes]
related_word = model.vocab[indexes[0]]
return related_word
开发者ID:crbothe,项目名称:naoyadtk,代码行数:13,代码来源:nao_intelectual.py
注:本文中的word2vec.load函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论