本文整理汇总了Python中numpy.np_sum函数的典型用法代码示例。如果您正苦于以下问题:Python np_sum函数的具体用法?Python np_sum怎么用?Python np_sum使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了np_sum函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: train_batch_cbow
def train_batch_cbow(model, sentences, alpha, work=None, neu1=None):
result = 0
for sentence in sentences:
word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
for pos, word in enumerate(word_vocabs):
reduced_window = model.random.randint(model.window)
start = max(0, pos - model.window + reduced_window)
window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
word2_subwords = []
vocab_subwords_indices = []
ngrams_subwords_indices = []
for index in word2_indices:
vocab_subwords_indices += [index]
word2_subwords += model.wv.ngrams_word[model.wv.index2word[index]]
for subword in word2_subwords:
ngrams_subwords_indices.append(model.wv.ngrams[subword])
l1_vocab = np_sum(model.wv.syn0_vocab[vocab_subwords_indices], axis=0) # 1 x vector_size
l1_ngrams = np_sum(model.wv.syn0_ngrams[ngrams_subwords_indices], axis=0) # 1 x vector_size
l1 = np_sum([l1_vocab, l1_ngrams], axis=0)
subwords_indices = [vocab_subwords_indices] + [ngrams_subwords_indices]
if (subwords_indices[0] or subwords_indices[1]) and model.cbow_mean:
l1 /= (len(subwords_indices[0]) + len(subwords_indices[1]))
train_cbow_pair(model, word, subwords_indices, l1, alpha, is_ft=True) # train on the sliding window for target word
result += len(word_vocabs)
return result
开发者ID:jMonteroMunoz,项目名称:gensim,代码行数:33,代码来源:fasttext.py
示例2: mmr_geometricmedian_ker
def mmr_geometricmedian_ker(K):
m=K.shape[0]
Ka=mean(K,axis=1)
aKa=np_sum(Ka)/m
niter=1000
xeps=sqrt(np_sum(Ka**2))/100
xerr=2*xeps
e1=ones(m)
for iiter in range(niter):
## d2u=sqrt((zeros(m)+aKa)+diag(K)-2*Ka)
d2u_2=aKa+diag(K)-2*Ka
ineg=where(d2u_2<0)[0]
d2u_2[ineg]=0.0
d2u=sqrt(d2u_2)
inul=where(d2u<xeps)[0]
d2u[inul]=xeps
xdenom=np_sum(e1/d2u)
Kanext=np_sum(K/outer(d2u,e1),axis=0)/xdenom
aKanext=np_sum(Ka/d2u)/xdenom
if np_max(Kanext-Ka)<xerr:
Ka=copy(Kanext)
aKa=aKanext
break
Ka=copy(Kanext)
aKa=aKanext
return(Ka,aKa)
开发者ID:Senka2112,项目名称:squirrel_prediction_old,代码行数:31,代码来源:mmr_normalization_new.py
示例3: train_sentence_dm
def train_sentence_dm(model, sentence, lbls, alpha, work=None, neu1=None, train_words=True, train_lbls=True):
"""
Update distributed memory model by training on a single sentence.
The sentence is a list of Vocab objects (or None, where the corresponding
word is not in the vocabulary. Called internally from `Doc2Vec.train()`.
This is the non-optimized, Python version. If you have a C compiler, gensim
will use the optimized version from doc2vec_inner instead.
"""
lbl_indices = [lbl.index for lbl in lbls if lbl is not None]
lbl_sum = np_sum(model.syn0[lbl_indices], axis=0)
lbl_len = len(lbl_indices)
neg_labels = []
if model.negative:
# precompute negative labels
neg_labels = zeros(model.negative + 1)
neg_labels[0] = 1.
for pos, word in enumerate(sentence):
if word is None:
continue # OOV word in the input sentence => skip
reduced_window = random.randint(model.window) # `b` in the original doc2vec code
start = max(0, pos - model.window + reduced_window)
window_pos = enumerate(sentence[start : pos + model.window + 1 - reduced_window], start)
word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
l1 = np_sum(model.syn0[word2_indices], axis=0) + lbl_sum # 1 x layer1_size
if word2_indices and model.cbow_mean:
l1 /= (len(word2_indices) + lbl_len)
neu1e = train_cbow_pair(model, word, word2_indices, l1, alpha, neg_labels, train_words, train_words)
if train_lbls:
model.syn0[lbl_indices] += neu1e
return len([word for word in sentence if word is not None])
开发者ID:AmitShah,项目名称:gensim,代码行数:35,代码来源:doc2vec.py
示例4: train_sentence_dm
def train_sentence_dm(model, sentence, lbls, alpha, work=None, neu1=None, train_words=True, train_lbls=True):
"""
Update distributed memory model by training on a single sentence.
The sentence is a list of Vocab objects (or None, where the corresponding
word is not in the vocabulary. Called internally from `Doc2Vec.train()`.
This is the non-optimized, Python version. If you have cython installed, gensim
will use the optimized version from doc2vec_inner instead.
"""
lbl_indices = [lbl.index for lbl in lbls if lbl is not None]
if(len(lbl_indices) <= model.K):return 0
docIndxPos = int(model.index2word[lbl_indices[0]][5:])
topKTopics = argsort(model.w_ld[docIndxPos])[::-1][:4]
selected_lbl_indices = [lbl_indices[0]];
for i in range(2):
selected_lbl_indices.append(lbl_indices[topKTopics[i]+1])
lbl_sum = np_sum(model.syn0[lbl_indices[0]], axis=0)
## lbl_len = len(lbl_indices)
lbl_len = 1
neg_labels = []
if model.negative:
# precompute negative labels
neg_labels = zeros(model.negative + 1)
neg_labels[0] = 1.
for pos, word in enumerate(sentence):
if word is None:
continue # OOV word in the input sentence => skip
reduced_window = random.randint(model.window) # `b` in the original doc2vec code
start = max(0, pos - model.window + reduced_window)
window_pos = enumerate(sentence[start : pos + model.window + 1 - reduced_window], start)
word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
l1 = np_sum(model.syn0[word2_indices], axis=0) + lbl_sum # 1 x layer1_size
if word2_indices and model.cbow_mean:
l1 /= (len(word2_indices) + lbl_len)
neu1e = train_cbow_pair(model, word, word2_indices, l1, alpha, neg_labels, train_words, train_words)
if train_lbls:
model.syn0[selected_lbl_indices[0]] += neu1e
model.syn0[selected_lbl_indices[1:]] += (neu1e/model.noOfLabels)
word2_indices.append(word.index)
a_1 = np_sum(model.syn0[word2_indices], axis=0)/len(word2_indices)
docIndxNeg = selectNegativeDocs(docIndxPos)
myTrain(model, docIndxPos, docIndxNeg, a_1)
return len([word for word in sentence if word is not None])
开发者ID:avatarzhang,项目名称:NTM,代码行数:59,代码来源:doc2vec_TopicVector_NonDet.py
示例5: train_document_dm
def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None,
learn_doctags=True, learn_words=True, learn_hidden=True,
word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None):
"""
Update distributed memory model ("PV-DM") by training on a single document.
Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. This
method implements the DM model with a projection (input) layer that is
either the sum or mean of the context vectors, depending on the model's
`dm_mean` configuration field. See `train_dm_concat()` for the DM model
with a concatenated input layer.
The document is provided as `doc_words`, a list of word tokens which are looked up
in the model's vocab dictionary, and `doctag_indexes`, which provide indexes
into the doctag_vectors array.
Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to
prevent learning-updates to those respective model weights, as if using the
(partially-)frozen model to infer other compatible vectors.
This is the non-optimized, Python version. If you have a C compiler, gensim
will use the optimized version from doc2vec_inner instead.
"""
if word_vectors is None:
word_vectors = model.syn0
if word_locks is None:
word_locks = model.syn0_lockf
if doctag_vectors is None:
doctag_vectors = model.docvecs.doctag_syn0
if doctag_locks is None:
doctag_locks = model.docvecs.doctag_syn0_lockf
word_vocabs = [model.vocab[w] for w in doc_words if w in model.vocab and
model.vocab[w].sample_int > model.random.randint(2**32)]
doctag_sum = np_sum(doctag_vectors[doctag_indexes], axis=0)
doctag_len = len(doctag_indexes)
for pos, word in enumerate(word_vocabs):
reduced_window = model.random.randint(model.window) # `b` in the original doc2vec code
start = max(0, pos - model.window + reduced_window)
window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
word2_indexes = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
l1 = np_sum(word_vectors[word2_indexes], axis=0) + doctag_sum # 1 x layer1_size
if word2_indexes and model.cbow_mean:
l1 /= (len(word2_indexes) + doctag_len)
neu1e = train_cbow_pair(model, word, word2_indexes, l1, alpha,
learn_vectors=False, learn_hidden=learn_hidden)
if word2_indexes and not model.cbow_mean:
neu1e /= (len(word2_indexes) + doctag_len)
if learn_doctags:
doctag_vectors[doctag_indexes] += neu1e * \
np_repeat(doctag_locks[doctag_indexes], model.vector_size).reshape(-1, model.vector_size)
if learn_words:
word_vectors[word2_indexes] += neu1e * \
np_repeat(word_locks[word2_indexes], model.vector_size).reshape(-1, model.vector_size)
return len(word_vocabs)
开发者ID:nonva,项目名称:gensim,代码行数:58,代码来源:doc2vec.py
示例6: compute
def compute(self, today, assets, out, data, decay_rate):
weights = self.weights(len(data), decay_rate)
mean = average(data, axis=0, weights=weights)
variance = average((data - mean) ** 2, axis=0, weights=weights)
squared_weight_sum = np_sum(weights) ** 2
bias_correction = squared_weight_sum / (squared_weight_sum - np_sum(weights ** 2))
out[:] = sqrt(variance * bias_correction)
开发者ID:xiaojinyue,项目名称:zipline,代码行数:9,代码来源:technical.py
示例7: train_batch_cbow
def train_batch_cbow(model, sentences, alpha, work=None, neu1=None):
"""Update CBOW model by training on a sequence of sentences.
Called internally from :meth:`~gensim.models.fasttext.FastText.train`.
Notes
-----
This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version
from :mod:`gensim.models.fasttext_inner` instead.
Parameters
----------
model : :class:`~gensim.models.fasttext.FastText`
Model instance.
sentences : iterable of list of str
Iterable of the sentences.
alpha : float
Learning rate.
work : :class:`numpy.ndarray`, optional
UNUSED.
neu1 : :class:`numpy.ndarray`, optional
UNUSED.
Returns
-------
int
Effective number of words trained.
"""
result = 0
for sentence in sentences:
word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab and
model.wv.vocab[w].sample_int > model.random.rand() * 2 ** 32]
for pos, word in enumerate(word_vocabs):
reduced_window = model.random.randint(model.window)
start = max(0, pos - model.window + reduced_window)
window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
vocab_subwords_indices = []
ngrams_subwords_indices = []
for index in word2_indices:
vocab_subwords_indices += [index]
ngrams_subwords_indices.extend(model.wv.buckets_word[index])
l1_vocab = np_sum(model.wv.syn0_vocab[vocab_subwords_indices], axis=0) # 1 x vector_size
l1_ngrams = np_sum(model.wv.syn0_ngrams[ngrams_subwords_indices], axis=0) # 1 x vector_size
l1 = np_sum([l1_vocab, l1_ngrams], axis=0)
subwords_indices = [vocab_subwords_indices] + [ngrams_subwords_indices]
if (subwords_indices[0] or subwords_indices[1]) and model.cbow_mean:
l1 /= (len(subwords_indices[0]) + len(subwords_indices[1]))
# train on the sliding window for target word
train_cbow_pair(model, word, subwords_indices, l1, alpha, is_ft=True)
result += len(word_vocabs)
return result
开发者ID:dpritsos,项目名称:DoGSWrapper,代码行数:57,代码来源:fasttext.py
示例8: mmr_geometricmedian
def mmr_geometricmedian(X):
(m,n)=X.shape
u=mean(X,axis=0)
niter=1000
xeps=sqrt(np_sum(u**2))/1000
xerr=2*xeps
for i in range(niter):
d2u=sqrt(np_sum((X-tile(u,(m,1)))**2,axis=1))
inul=where(d2u<xeps)[0]
d2u[inul]=xeps
unext=np_sum(X/tile(d2u.reshape((m,1)),(1,n)),axis=0)/np_sum(ones(m)/d2u)
if np_max(unext-u)<xerr:
break
u=copy(unext)
return(unext,i,np_max(unext-u))
开发者ID:ipa-nhg,项目名称:kukadu,代码行数:15,代码来源:mmr_normalization_new.py
示例9: mmr_polypower_dn
def mmr_polypower_dn(ndim, maxdegree, ldegree):
maxdegree = int(maxdegree)
if len(ldegree) == 0:
ldegree = [maxdegree] * ndim
xpolydir = {}
xpower = zeros(ndim, dtype=int)
xpolydir[tuple(xpower)] = 1
istate = 1
while istate == 1:
for j in range(ndim):
if xpower[j] < min(maxdegree - np_sum(xpower[j + 1 :]), ldegree[j]):
xpower[j] += 1
xpolydir[tuple(xpower)] = 1
break
else:
if j < ndim - 1:
xpower[j] = 0
else:
istate = 0
xpolylist = [xpow for xpow in xpolydir.keys()]
xpolylist.sort()
xpolypower = array(xpolylist)
return xpolypower
开发者ID:ipa-nhg,项目名称:kukadu,代码行数:28,代码来源:mmr_polyfeature.py
示例10: mmr_polyfeature_dn
def mmr_polyfeature_dn(xdata, maxdegree, ldegree):
(m, ndim) = xdata.shape
maxdegree = int(maxdegree)
if len(ldegree) == 0:
ldegree = [maxdegree for i in range(ndim)]
xpolydir = {}
xpower = zeros(ndim, dtype=int)
xpolydir[tuple(xpower)] = ones(m)
istate = 1
while istate == 1:
for j in range(ndim):
if xpower[j] < min(maxdegree - np_sum(xpower[j + 1 :]), ldegree[j]):
xterm = xpolydir[tuple(xpower)]
xpower[j] += 1
xpolydir[tuple(xpower)] = xterm * xdata[:, j]
break
else:
if j < ndim - 1:
xpower[j] = 0
else:
istate = 0
xpolylist = [xpow for xpow in xpolydir.keys()]
xpolylist.sort()
nd = len(xpolylist)
xpolydata = zeros((m, nd))
for i in range(nd):
xpow = xpolylist[i]
xpolydata[:, i] = xpolydir[xpow]
return xpolydata
开发者ID:ipa-nhg,项目名称:kukadu,代码行数:35,代码来源:mmr_polyfeature.py
示例11: mmr_polyfeature_d
def mmr_polyfeature_d(xdata, ndegree):
(m, ndim) = xdata.shape
ndegree = int(ndegree)
## number of terms = \binomial(ndegree+ndim,ndim)
nd = 1
for i in range(ndim):
nd *= (ndegree + i + 1) / (i + 1)
nd = int(nd)
xpolydir = {}
xpower = zeros(ndim, dtype=int)
xpolydir[tuple(xpower)] = ones(m)
for i in range(nd):
for j in range(ndim):
if xpower[j] < ndegree - np_sum(xpower[j + 1 :]):
xterm = xpolydir[tuple(xpower)]
xpower[j] += 1
xpolydir[tuple(xpower)] = xterm * xdata[:, j]
break
else:
xpower[j] = 0
xpolydata = zeros((m, nd))
xpolylist = [xpow for xpow in xpolydir.keys()]
xpolylist.sort()
for i in range(nd):
xpow = xpolylist[i]
xpolydata[:, i] = xpolydir[xpow]
return xpolydata
开发者ID:ipa-nhg,项目名称:kukadu,代码行数:33,代码来源:mmr_polyfeature.py
示例12: mmr_polypower_d
def mmr_polypower_d(ndim, ndegree):
ndegree = int(ndegree)
## number of terms = \binomial(ndegree+ndim,ndim)
nd = 1
for i in range(ndim):
nd *= (ndegree + i + 1) / (i + 1)
nd = int(nd)
xpolydir = {}
xpower = zeros(ndim, dtype=int)
xpolydir[tuple(xpower)] = 1
for i in range(nd):
for j in range(ndim):
if xpower[j] < ndegree - np_sum(xpower[j + 1 :]):
xpower[j] += 1
xpolydir[tuple(xpower)] = 1
break
else:
xpower[j] = 0
xpolylist = [xpow for xpow in xpolydir.keys()]
xpolylist.sort()
xpolypower = array(xpolylist)
return xpolypower
开发者ID:ipa-nhg,项目名称:kukadu,代码行数:27,代码来源:mmr_polyfeature.py
示例13: train_sentence_cbow
def train_sentence_cbow(model, sentence, alpha, work=None, neu1=None):
"""
Update CBOW model by training on a single sentence.
The sentence is a list of Vocab objects (or None, where the corresponding
word is not in the vocabulary. Called internally from `Word2Vec.train()`.
This is the non-optimized, Python version. If you have cython installed, gensim
will use the optimized version from word2vec_inner instead.
"""
labels = []
if model.negative:
# precompute negative labels
labels = zeros(model.negative + 1)
labels[0] = 1.
for pos, word in enumerate(sentence):
if word is None:
continue # OOV word in the input sentence => skip
reduced_window = random.randint(model.window) # `b` in the original word2vec code
start = max(0, pos - model.window + reduced_window)
window_pos = enumerate(sentence[start : pos + model.window + 1 - reduced_window], start)
word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
l1 = np_sum(model.syn0[word2_indices], axis=0) # 1 x layer1_size
if word2_indices and model.cbow_mean:
l1 /= len(word2_indices)
train_cbow_pair(model, word, word2_indices, l1, alpha, labels)
return len([word for word in sentence if word is not None])
开发者ID:Bolaka,项目名称:word2vec-visualization,代码行数:30,代码来源:word2vec.py
示例14: rmsle
def rmsle(actual, predicted):
""" Root mean squared logarithmic error.
"""
actual, predicted = _preformat_inputs(actual, predicted)
count_of = predicted.shape[0]
square_logarithm_difference = log((actual + 1) / (predicted + 1)) ** 2
return sqrt((1 / count_of) * np_sum(square_logarithm_difference))
开发者ID:Neocher,项目名称:neupy,代码行数:7,代码来源:errors.py
示例15: _reduce_constraints
def _reduce_constraints(A, b):
""" Make the constraint non-singular
if the constraint is on the form:
dot(A,x) = b
A may be singular. to avoid this problem, we extract the
non-singular part of the equation thanks to svd:
A = U*S*Vh with U.T*U = I and Vh.T*Vh = I
if r is the rank of A, we have:
Ar = S[:r,:r]*Vh[:r,:]
br = U[:,:r].T*b
Hence:
Ar*x = br
"""
try:
u, s, vh = svd(A, full_matrices=False)
r = np_sum(where(s>1e-3, 1, 0)) # compute the rank of A
ur, sr, vhr = u[:, :r], s[:r], vh[:r, :]
Ar = dot(diag(sr), vhr)
br = dot(ur.T, b)
except (LinAlgError):
Ar = A.copy()
br = b.copy()
return Ar, br
开发者ID:mickey79,项目名称:LQPctrl,代码行数:25,代码来源:solver.py
示例16: train_sentence_cbow
def train_sentence_cbow(model, sentence,context_vector, alpha, work=None, neu1=None):
"""
Update CBOW model by training on a single sentence.
The sentence is a list of string tokens, which are looked up in the model's
vocab dictionary. Called internally from `word2mat.train()`.
This is the non-optimized, Python version. If you have cython installed, gensim
will use the optimized version from word2mat_inner instead.
"""
word_vocabs = [model.vocab[w] for w in sentence if w in model.vocab and
model.vocab[w].sample_int > model.random.rand() * 2**32]
for pos, word in enumerate(word_vocabs):
reduced_window = model.random.randint(model.window) # `b` in the original word2mat code
start = max(0, pos - model.window + reduced_window)
window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
l1 = np_sum(model.syn0[word2_indices], axis=0) # 1 x vector_size
l1 = l1.reshape(model.topic_size,model.vector_size)
l1 = l1.T.dot(context_vector)
if word2_indices and model.cbow_mean:
l1 /= len(word2_indices)
train_cbow_pair(model, word, word2_indices,context_vector, l1, alpha)
return len(word_vocabs)
开发者ID:Yelrose,项目名称:WordMatrix,代码行数:26,代码来源:word2mat_v3.py
示例17: train_sentence_sg
def train_sentence_sg(model, sentence, context_vector,alpha, work=None,neu1=None):
"""
Update skip-gram model by training on a single sentence.
The sentence is a list of string tokens, which are looked up in the model's
vocab dictionary. Called internally from `word2mat.train()`.
This is the non-optimized, Python version. If you have cython installed, gensim
will use the optimized version from word2mat_inner instead.
"""
word_vocabs = [(model.vocab[w],t) for w,t in sentence if w in model.vocab and
model.vocab[w].sample_int > model.random.rand() * 2**32]
for pos, item in enumerate(word_vocabs):
word,topic = item
reduced_window = model.random.randint(model.window) # `b` in the original word2mat code
topic_start = max(0, pos - model.topic_window)
for i in xrange(model.topic_size):
context_vector[i] = 0.
for pos2,item2 in enumerate(word_vocabs[topic_start:(pos+model.topic_window+1)],topic_start):
word2,topic2 =item2
context_vector[topic2] += 1
context_vector = context_vector / np_sum(context_vector)
# now go over all words from the (reduced) window, predicting each one in turn
start = max(0, pos - model.window + reduced_window)
for pos2, item2 in enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start):
word2,topic2 = item2
# don't train on the `word` itself
if pos2 != pos:
train_sg_pair(model, model.index2word[word2.index], word.index,context_vector, alpha)
return len(word_vocabs)
开发者ID:Yelrose,项目名称:WordMatrix,代码行数:33,代码来源:word2mat_v3.py
示例18: coding_bases
def coding_bases(self, seq_id):
"""Calculate number of coding bases in sequence."""
# check if sequence has any genes
if seq_id not in self.genes:
return 0
return np_sum(self.coding_mask[seq_id])
开发者ID:dparks1134,项目名称:GenomeTk,代码行数:8,代码来源:metadata_genes.py
示例19: kullback_leibler
def kullback_leibler(actual, predicted):
""" Kullback-Leibler error.
"""
actual, predicted = _preformat_inputs(actual, predicted)
count_of_inputs = actual.shape[0]
return (1. / count_of_inputs) * np_sum(
predicted * log(predicted / actual) +
(1 - predicted) * log((1 - predicted) / (1 - actual))
)
开发者ID:Neocher,项目名称:neupy,代码行数:9,代码来源:errors.py
示例20: train_cat_vec_cbow_pp
def train_cat_vec_cbow_pp(model, sent_vec, cat_vec, sentence, alpha, work=None, neu1=None, sent_vec_grad=None, cat_vec_grad=None):
"""
Update CBOW model by training on a single sentence.
The sentence is a list of Vocab objects (or None, where the corresponding
word is not in the vocabulary. Called internally from `Sent2Vec.train()`.
This is the non-optimized, Python version. If you have cython installed, gensim
will use the optimized version from word2vec_inner instead.
"""
w2vmodel = model.w2v
if model.negative:
# precompute negative labels
labels = zeros(model.negative + 1)
labels[0] = 1.
for pos, word in enumerate(sentence):
if word is None:
continue # OOV word in the input sentence => skip
reduced_window = random.randint(model.window) # `b` in the original word2vec code
start = max(0, pos - model.window + reduced_window)
window_pos = enumerate(sentence[start : pos + model.window + 1 - reduced_window], start)
word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
l1 = np_sum(w2vmodel.syn0[word2_indices], axis=0) # 1 x layer1_size
l1 += sent_vec + cat_vec
if word2_indices and model.cbow_mean:
l1 /= (len(word2_indices) + 1) ##modified by jmarui
neu1e = zeros(l1.shape)
if model.hs:
l2a = w2vmodel.syn1[word.point] # 2d matrix, codelen x layer1_size
fa = 1. / (1. + exp(-dot(l1, l2a.T))) # propagate hidden -> output
ga = (1. - word.code - fa) * alpha # vector of error gradients multiplied by the learning rate
if model.word_learn == 1: w2vmodel.syn1[word.point] += outer(ga, l1) # learn hidden -> output
neu1e += dot(ga, l2a) # save error
if model.negative:
# use this word (label = 1) + `negative` other random words not from this sentence (label = 0)
word_indices = [word.index]
while len(word_indices) < model.negative + 1:
w = w2vmodel.table[random.randint(w2vmodel.table.shape[0])]
if w != word.index:
word_indices.append(w)
l2b = w2vmodel.syn1neg[word_indices] # 2d matrix, k+1 x layer1_size
fb = 1. / (1. + exp(-dot(l1, l2b.T))) # propagate hidden -> output
gb = (labels - fb) * alpha # vector of error gradients multiplied by the learning rate
if model.word_learn == 1: w2vmodel.syn1neg[word_indices] += outer(gb, l1) # learn hidden -> output
neu1e += dot(gb, l2b) # save error
if model.word_learn == 1: w2vmodel.syn0[word2_indices] += neu1e # learn input -> hidden, here for all words in the window separately
sent_vec += neu1e # learn input -> hidden, here for all words in the window separately
if model.cat_learn == 1: cat_vec += neu1e # learn input -> hidden, here for all words in the window separately
return len([word for word in sentence if word is not None])
开发者ID:nathan2718,项目名称:category2vec,代码行数:55,代码来源:cat2vec_pp.py
注:本文中的numpy.np_sum函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论