本文整理汇总了Python中nn.math.make_onehot函数的典型用法代码示例。如果您正苦于以下问题:Python make_onehot函数的具体用法?Python make_onehot怎么用?Python make_onehot使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了make_onehot函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: backprop
def backprop(self,xs,ys,hs,y_hat):
ns = len(xs)
h_final = hs[ns-1]
delta = (y_hat -ys)
self.grads.b2 += delta
ht = h_final.reshape(len(h_final),1)
delta = delta.reshape(len(ys),1)
self.grads.U += delta.dot(ht.T)
# H and L
t = ns-1 # last t
current = self.params.U.T.dot(delta) * ht * (1-ht) # the common part
prev_ht = hs[t-1].reshape(len(hs[t-1]),1)
self.grads.H += current.dot(prev_ht.T)
self.grads.b1 += current.reshape((len(current),))
xt = make_onehot(xs[t],self.vdim).reshape(self.vdim,1)
self.sgrads.L[xs[t]] = xt.dot(current.T)[xs[t]]
for i in range(1,self.bptt):
if t<i: # so that h[-2] doesn't return anything
continue
ht_i = hs[t-i].reshape(len(hs[t-i]),1)
prev_ht_i = hs[t-i-1].reshape(len(hs[t-i-1]),1)
current = self.params.H.T.dot(current)*ht_i*(1-ht_i)
self.grads.H += current.dot(prev_ht_i.T)
self.grads.b1 += current.reshape((len(current),))
prev_xt = make_onehot(xs[t-i],self.vdim).reshape(self.vdim,1)
self.sgrads.L[xs[t-i]] = prev_xt.dot(current.T)[xs[t-i]]
开发者ID:laisun,项目名称:EntitySentiment,代码行数:27,代码来源:rnn_simple.py
示例2: forwardProp
def forwardProp(self,node, correct=[], guess=[]):
cost = total = 0.0
# this is exactly the same setup as forwardProp in rnn.py
if node.isLeaf == True:
node.fprop = True
node.hActs1 = self.L[:,node.word]
node.hActs2 = self.ReLU(self.W2.dot(node.hActs1)+self.b2)
node.probs = softmax(self.Ws.dot(node.hActs2)+self.bs)
p = node.probs*make_onehot(node.label,len(self.bs))
cost = -np.log(np.sum(p))
correct.append(node.label)
guess.append(np.argmax(node.probs))
return cost, 1
c1,t1 = self.forwardProp(node.left,correct,guess)
c2,t2 = self.forwardProp(node.right,correct,guess)
if node.left.fprop and node.right.fprop:
node.fprop = True
h = np.hstack([node.left.hActs1, node.right.hActs1])
node.hActs1 = self.ReLU(self.W1.dot(h) + self.b1)
node.hActs2 = self.ReLU(self.W2.dot(node.hActs1) + self.b2)
node.probs = softmax(self.Ws.dot(node.hActs2)+self.bs)
p = node.probs*make_onehot(node.label,len(self.bs))
cost = -np.log(np.sum(p))
correct.append(node.label)
guess.append(np.argmax(node.probs))
cost += c1
cost += c2
total += t1
total += t2
return cost, total + 1
开发者ID:alphadl,项目名称:cs224d,代码行数:32,代码来源:rnn2deep.py
示例3: forwardProp
def forwardProp(self,node,correct, guess):
cost = total = 0.0
if node.isLeaf == True:
node.fprop = True
node.hActs1 = self.L[:, node.word]
node.probs = softmax(self.Ws.dot(node.hActs1)+self.bs)
p = node.probs*make_onehot(node.label, len(self.bs))
cost = -np.log(np.sum(p))
correct.append(node.label)
guess.append(np.argmax(node.probs))
return cost, 1
c1,t1 = self.forwardProp(node.left,correct,guess)
c2,t2 = self.forwardProp(node.right,correct,guess)
if node.left.fprop and node.right.fprop:
node.fprop = True
h = np.hstack([node.left.hActs1, node.right.hActs1])
tmp = np.zeros(len(node.left.hActs1))
for i in range(len(tmp)):
tmp[i] = h.dot(self.V[i]).dot(h)
node.hActs1 = self.ReLU(self.W.dot(h) + self.b + tmp)
node.probs = softmax(self.Ws.dot(node.hActs1)+self.bs)
p = node.probs*make_onehot(node.label,len(self.bs))
cost = -np.log(np.sum(p))
correct.append(node.label)
guess.append(np.argmax(node.probs))
cost += c1
cost += c2
total += t1
total += t2
return cost, total + 1
开发者ID:alphadl,项目名称:cs224d,代码行数:32,代码来源:rntn.py
示例4: backprop
def backprop(self,xs,ys,hs_f,hs_b,y_hat):
inverted_xs = list(reversed(xs))
ns = len(xs)
ht_f = hs_f[ns-1].reshape(len(hs_f[ns-1]),1)
ht_b = hs_b[ns-1].reshape(len(hs_b[ns-1]),1)
delta = self.params.weights*(y_hat -ys)
self.grads.b2 += delta
delta = delta.reshape(len(ys),1)
self.grads.U += delta.dot(hstack([ht_f,ht_b]).reshape((1,2*len(ht_f))))
# H and L
t = ns-1 # last t
current_f = self.params.U.T.dot(delta)[:self.hdim] * ht_f * (1-ht_f)
current_b = self.params.U.T.dot(delta)[self.hdim:] * ht_b * (1-ht_b) # the common part
# update initial Hs
prev_ht_f = hs_f[t-1].reshape(len(hs_f[t-1]),1)
self.grads.H_f += current_f.dot(prev_ht_f.T)
self.grads.b1_f += current_f.reshape((len(current_f),))
prev_ht_b = hs_b[t-1].reshape(len(hs_b[t-1]),1)
self.grads.H_b += current_b.dot(prev_ht_b.T)
self.grads.b1_b += current_b.reshape((len(current_b),))
# update initial L
xt = make_onehot(xs[t],self.vdim).reshape(self.vdim,1)
self.sgrads.L[xs[t]] = xt.dot(current_f.T)[xs[t]]
inv_xt = make_onehot(inverted_xs[t],self.vdim).reshape(self.vdim,1)
self.sgrads.L[inverted_xs[t]] = inv_xt.dot(current_b.T)[inverted_xs[t]]
# update the rest
for i in range(1,self.bptt):
if t<i: # so that h[-2] doesn't return anything
continue
ht_f_i = hs_f[t-i].reshape(len(hs_f[t-i]),1)
prev_ht_f_i = hs_f[t-i-1].reshape(len(hs_f[t-i-1]),1)
current_f = self.params.H_f.T.dot(current_f)*ht_f_i*(1-ht_f_i)
self.grads.H_f += current_f.dot(prev_ht_f_i.T)
self.grads.b1_f += current_f.reshape((len(current_b),))
ht_b_i = hs_b[t-i].reshape(len(hs_b[t-i]),1)
prev_ht_b_i = hs_b[t-i-1].reshape(len(hs_b[t-i-1]),1)
current_b = self.params.H_b.T.dot(current_b)*ht_b_i*(1-ht_b_i)
self.grads.H_b += current_b.dot(prev_ht_b_i.T)
self.grads.b1_b += current_b.reshape((len(current_b),))
prev_xt = make_onehot(xs[t-i],self.vdim).reshape(self.vdim,1)
self.sgrads.L[xs[t-i]] = prev_xt.dot(current_f.T)[xs[t-i]]
prev_inv_xt = make_onehot(inverted_xs[t-i],self.vdim).reshape(self.vdim,1)
self.sgrads.L[inverted_xs[t-i]] = prev_inv_xt.dot(current_b.T)[inverted_xs[t-i]]
开发者ID:lu839684437,项目名称:EntitySentiment,代码行数:50,代码来源:brnn_weighted.py
示例5: backProp
def backProp(self,node,error=None):
# Clear nodes
node.fprop = False
################
# TODO: Implement the recursive backProp function
# - you should update self.dWs, self.dbs, self.dW, self.db, and self.dL[node.word] accordingly
# - node: your current node in the parse tree
# - error: error that has been passed down from a previous iteration
################
errorCur = node.probs - make_onehot(node.label,len(self.bs))
self.dWs += np.outer(errorCur, node.hActs1)
self.dbs += errorCur
errorCur = errorCur.dot(self.Ws)
if error is not None:
errorCur += error
if node.isLeaf == True:
self.dL[node.word] += errorCur
return
errorCur = errorCur*self.df(node.hActs1)
self.dW += np.outer(errorCur,np.hstack([node.left.hActs1, node.right.hActs1]))
self.db += errorCur
errorDown = errorCur.dot(self.W)
self.backProp(node.left,errorDown[:self.wvecDim])
self.backProp(node.right,errorDown[self.wvecDim:])
开发者ID:alphadl,项目名称:cs224d,代码行数:29,代码来源:rnn.py
示例6: backProp
def backProp(self,node,error=None):
# Clear nodes
node.fprop = False
# this is exactly the same setup as backProp in rnn.py
errorCur = node.probs - make_onehot(node.label,len(self.bs))
self.dWs += np.outer(errorCur,node.hActs2)
self.dbs += errorCur
errorCur = errorCur.dot(self.Ws)*self.df(node.hActs2)
self.dW2 += np.outer(errorCur,node.hActs1)
self.db2 += errorCur
errorCur = errorCur.dot(self.W2)
if error is not None:
errorCur += error
if node.isLeaf == True:
self.dL[node.word] += errorCur
return
errorCur = errorCur*self.df(node.hActs1)
tmp1 = np.ones(self.W1.shape).dot(np.diag(np.hstack([node.left.hActs1, node.right.hActs1])))
self.dW1 += np.diag(errorCur).dot(tmp1)
self.db1 += errorCur
errorCur = errorCur.dot(self.W1)
self.backProp(node.left,errorCur[:self.wvecDim])
self.backProp(node.right,errorCur[self.wvecDim:])
开发者ID:alphadl,项目名称:cs224d,代码行数:26,代码来源:rnn2deep.py
示例7: compute_loss
def compute_loss(self, windows, labels):
"""
Compute the loss for a given dataset.
windows = same as for predict_proba
labels = list of class labels, for each row of windows
"""
#### YOUR CODE HERE ####
print "windows shape ", windows.shape
x = self.sparams.L[windows[:,0]]
for i in range(len(windows[0])-1):
x = np.concatenate((x,self.sparams.L[windows[:,i+1]]),axis=1)
z = self.params.W.dot(x.T)+self.params.b1.reshape((self.params.b1.shape[0],1))
h = tanh(z)
p = softmax(self.params.U.dot(h)+self.params.b2.reshape((self.params.b2.shape[0],1)))
labelArray = np.zeros((len(labels),self.params.b2.shape[0]))
for i in range(len(labels)):
labelArray[i] = make_onehot(labels[i],self.params.b2.shape[0])
batch = len(labels)
p = p*labelArray.T
p = np.sum(p,axis=0)
J = np.sum(-np.log(p))
Jreg = batch*(self.lreg/2.0)*(np.sum(self.params.W**2)+np.sum(self.params.U**2))
J += Jreg
#### END YOUR CODE ####
return J
开发者ID:alphadl,项目名称:cs224d,代码行数:28,代码来源:nerwindow.py
示例8: _acc_grads
def _acc_grads(self, window, label):
"""
Accumulate gradients, given a training point
(window, label) of the format
window = [x_{i-1} x_{i} x_{i+1}] # three ints
label = {0,1,2,3,4} # single int, gives class
Your code should update self.grads and self.sgrads,
in order for gradient_check and training to work.
So, for example:
self.grads.U += (your gradient dJ/dU)
self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index
"""
xf = []
for idx in window:
xf.extend( self.sparams.L[idx]) # extract representation
tanhX = tanh(self.params.W.dot(xf) + self.params.b1)
softmaxP = softmax(self.params.U.dot(tanhX) + self.params.b2)
y = make_onehot(label, len(softmaxP))
delta2 = softmaxP -y
self.grads.U += outer(delta2, tanhX) + self.lreg * self.params.U
self.grads.b2 += delta2
delta1 = self.params.U.T.dot(delta2)*(1. - tanhX*tanhX)
self.grads.W += outer(delta1, xf) + self.lreg * self.params.W
self.grads.b1 += delta1
开发者ID:gargvinit,项目名称:cs224d,代码行数:27,代码来源:nerwindow.py
示例9: compute_loss
def compute_loss(self, windows, labels):
"""
Compute the loss for a given dataset.
windows = same as for predict_proba
labels = list of class labels, for each row of windows
"""
#### YOUR CODE HERE ####
if not hasattr(windows[0], "__iter__"):
windows = [windows]
labels = [labels]
N = len(windows)
# x = self.sparams.L[windows]
# x = x.reshape((N,x.shape[-2]*x.shape[-1]))
# z = x.dot(self.params.W.T) + self.params.b1
# h = tanh(z)
# z2 = h.dot(self.params.U.T) + self.params.b2
# p = softmax(z2)
# J -= sum(log(p[0][labels])
# J += (self.lreg / 2.0) * (sum(self.params.W**2.0) + sum(self.params.U**2.0))
J = 0
for n in xrange(N):
x = self.sparams.L[windows[n]]
x = reshape(x, x.shape[0]*x.shape[1])
h = tanh(self.params.W.dot(x) + self.params.b1)
y_hat = softmax(self.params.U.dot(h) + self.params.b2)
y = make_onehot(labels[n], len(y_hat))
J -= sum(y*log(y_hat))
J += (self.lreg / 2.0) * (sum(self.params.W**2.0) + sum(self.params.U**2.0))
#### END YOUR CODE ####
return J
开发者ID:mlong14,项目名称:CS224D-Project,代码行数:34,代码来源:nerwindow.py
示例10: b_prop
def b_prop(self, ys):
#L = self.params['L']
Wh = self.params['Wh']
#Wx = self.params['Wx']
U = self.params['U']
b1 = self.params['b1']
b2 = self.params['b2']
N = len(ys)
delta_above = np.zeros(self.hdim)
for t in xrange(N-1,-1, -1):
delta_3 = self.yhats[:,t] - make_onehot(ys[t], self.outdim)
self.grads['U'] += np.outer(delta_3, self.hs[:,t])
self.grads['b2'] += delta_3
dh = np.dot(np.transpose(U), delta_3) + delta_above
delta_2 = dh * (self.hs[:,t] > 0)
self.grads['b1'] += delta_2
self.grads['Wh'] += np.outer(delta_2, self.hs[:,t-1])
#self.grads['Wx'] += np.outer(delta_2, L[:,xs[t]])
#self.grads['L'][:,xs[t]] += np.dot(np.transpose(Wx), delta_2)
delta_below = np.dot(np.transpose(Wh), delta_2)
delta_above = delta_below
return delta_below
开发者ID:arthur-tsang,项目名称:EqnMaster,代码行数:25,代码来源:dec.py
示例11: backProp
def backProp(self,node,error=None):
# Clear nodes
node.fprop = False
errorCur = node.probs - make_onehot(node.label,len(self.bs))
self.dWs += np.outer(errorCur, node.hActs1)
self.dbs += errorCur
errorCur = errorCur.dot(self.Ws)
if error is not None:
errorCur += error
if node.isLeaf == True:
self.dL[node.word] += errorCur
return
errorCur = errorCur*self.df(node.hActs1)
LR = np.hstack([node.left.hActs1, node.right.hActs1])
self.dW += np.outer(errorCur,LR)
self.db += errorCur
S = np.zeros(len(LR))
for i in range(len(self.V)):
self.dV[i] += errorCur[i]*np.outer(LR,LR)
S += (self.V[i]+self.V[i].T).dot(LR)*errorCur[i]
errorDown = errorCur.dot(self.W) + S
self.backProp(node.left,errorDown[:self.wvecDim])
self.backProp(node.right,errorDown[self.wvecDim:])
开发者ID:alphadl,项目名称:cs224d,代码行数:28,代码来源:rntn.py
示例12: _acc_grads
def _acc_grads(self, window, label):
"""
Accumulate gradients, given a training point
(window, label) of the format
window = [x_{i-1} x_{i} x_{i+1}] # three ints
label = {0,1,2,3,4} # single int, gives class
Your code should update self.grads and self.sgrads,
in order for gradient_check and training to work.
So, for example:
self.grads.U += (your gradient dJ/dU)
self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index
"""
#### YOUR CODE HERE ####
##
# Forward propagation
x = hstack(self.sparams.L[window, :])
h = tanh(2*(self.params.W.dot(x)+self.params.b1))
p = softmax(self.params.U.dot(h)+self.params.b2)
##
y = make_onehot(label, 5)
delta = p - y
# Backpropagation
self.grads.U += outer(delta, h) + self.lreg * self.params.U
self.grads.b2 += delta
gradh = dot(self.params.U.T,delta) * (1-h**2)
self.grads.W += outer(gradh, x) + self.lreg * self.params.W
self.grads.b1 += gradh
dL = self.params.W.T.dot(gradh).reshape(self.window_size, self.word_vec_size)
for i in xrange(self.window_size):
self.sgrads.L[window[i], :] = dL[i]
开发者ID:jirachikai,项目名称:my_cs224d,代码行数:34,代码来源:nerwindow.py
示例13: _acc_grads
def _acc_grads(self, xs, ys):
"""
Accumulate gradients, given a pair of training sequences:
xs = [<indices>] # input words
ys = [<indices>] # output words (to predict)
Your code should update self.grads and self.sgrads,
in order for gradient_check and training to work.
So, for example:
self.grads.H += (your gradient dJ/dH)
self.sgrads.L[i] = (gradient dJ/dL[i]) # update row
Per the handout, you should:
- make predictions by running forward in time
through the entire input sequence
- for *each* output word in ys, compute the
gradients with respect to the cross-entropy
loss for that output word
- run backpropagation-through-time for self.bptt
timesteps, storing grads in self.grads (for H)
and self.sgrads (for L,U)
You'll want to store your predictions \hat{y}(t)
and the hidden layer values h(t) as you run forward,
so that you can access them during backpropagation.
At time 0, you should initialize the hidden layer to
be a vector of zeros.
"""
# Expect xs as list of indices
ns = len(xs) #3
# make matrix here of corresponding h(t)
# hs[-1] = initial hidden state (zeros)
hs = zeros((ns+1, self.hdim))
# predicted probas
ps = zeros((ns, self.vdim))
#### YOUR CODE HERE ####
##
# Forward propagation
# for each time step
for t in xrange(ns):
hs[t] = sigmoid(dot(self.params.H, hs[t - 1]) + self.sparams.L[xs[t]])
ps[t] = softmax(dot(self.params.U, hs[t]))
##
# Backward propagation through time
for j in xrange(ns):
y = make_onehot(ys[j], self.vdim)
y_hat_minus_y = ps[j] - y
self.grads.U += outer(y_hat_minus_y, hs[j])
delta = dot(self.params.U.T, y_hat_minus_y) * hs[j] * (1.0 - hs[j])
# start at j and go back self.bptt times (total self.bptt + 1 elements, including current one)
for t in xrange(j, j - self.bptt - 1, -1):
if t - 1 >= -1:
self.grads.H += outer(delta, hs[t - 1]) #See from above.. hs[-1] is list of zeros.
self.sgrads.L[xs[t]] = delta
delta = dot(self.params.H.T, delta) * hs[t - 1] * (1.0 - hs[t - 1])
开发者ID:ryu577,项目名称:base,代码行数:59,代码来源:msushkov_rnnlm.py
示例14: forwardProp
def forwardProp(self,node, correct=[], guess=[]):
cost = total = 0.0
# this is exactly the same setup as forwardProp in rnn.py
if node.isLeaf == True:
node.fprop = True
node.hActs1 = self.L[:,node.word]
#node.hActs2 = self.ReLU(self.W2.dot(node.hActs1)+self.b2)
tmp = node.hActs1*self.mask1
tmpMaxout = np.zeros((self.maxoutK, self.middleDim))
for i in range(self.maxoutK):
tmpMaxout[i] = self.W2[i].dot(tmp) + self.b2[i]
(node.hActs2, node.idx) = self.maxout(tmpMaxout)
node.probs = softmax(self.Ws.dot(node.hActs2*self.mask)+self.bs)
p = node.probs*make_onehot(node.label,len(self.bs))
cost = -np.log(np.sum(p))
correct.append(node.label)
guess.append(np.argmax(node.probs))
return cost, 1
c1,t1 = self.forwardProp(node.left,correct,guess)
c2,t2 = self.forwardProp(node.right,correct,guess)
if node.left.fprop and node.right.fprop:
node.fprop = True
h = np.hstack([node.left.hActs1, node.right.hActs1])
node.hActs1 = self.ReLU(self.W1.dot(h) + self.b1)
#node.hActs2 = self.ReLU(self.W2.dot(node.hActs1)+self.b2)
tmp = node.hActs1*self.mask1
tmpMaxout = np.zeros((self.maxoutK, self.middleDim))
for i in range(self.maxoutK):
tmpMaxout[i] = self.W2[i].dot(tmp) + self.b2[i]
(node.hActs2, node.idx) = self.maxout(tmpMaxout)
node.probs = softmax(self.Ws.dot(node.hActs2*self.mask)+self.bs)
p = node.probs*make_onehot(node.label,len(self.bs))
cost = -np.log(np.sum(p))
correct.append(node.label)
guess.append(np.argmax(node.probs))
cost += c1
cost += c2
total += t1
total += t2
return cost, total + 1
开发者ID:alphadl,项目名称:cs224d,代码行数:45,代码来源:rnn2deep_dropout_maxout.py
示例15: _acc_grads
def _acc_grads(self, window, label):
"""
Accumulate gradients, given a training point
(window, label) of the format
window = [x_{i-1} x_{i} x_{i+1}] # three ints
label = {0,1,2,3,4} # single int, gives class
Your code should update self.grads and self.sgrads,
in order for gradient_check and training to work.
So, for example:
self.grads.U += (your gradient dJ/dU)
self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index
"""
#### YOUR CODE HERE ####
L = self.sparams.L
U = self.params.U
W = self.params.W
b1 = self.params.b1
b2 = self.params.b2
windowSize = self.windowSize
wordVecLen = self.wordVecLen
lambda_ = self.lreg
alpha = self.alpha
##
# Forward propagation
x = hstack(L[window, :])
z1 = W.dot(x) + b1
h = tanh(z1)
z2 = U.dot(h) + b2
y_hat = softmax(z2)
##
# Backpropagation
target = make_onehot(label, len(y_hat))
delta = y_hat - target
#self.grads.U += delta.dot(h.T) + lambda_ * U
#outer函数很有用
self.grads.U += outer(delta, h) + lambda_ * U
self.grads.b2 += delta
grad_h = U.T.dot(delta) * (1 - h ** 2)
self.grads.W += outer(grad_h, x) + lambda_ * W
self.grads.b1 += grad_h
sgrad_L = W.T.dot(grad_h)
sgrad_L = sgrad_L.reshape(windowSize, wordVecLen)
for i in xrange(windowSize):
self.sgrads.L[window[i], :] = sgrad_L[i, :]
开发者ID:NeighborhoodWang,项目名称:CS224D-problem-set2,代码行数:52,代码来源:nerwindow.py
示例16: forwardProp
def forwardProp(self,node,correct=[], guess=[]):
cost = total = 0.0 # cost should be a running number and total is the total examples we have seen used in accuracy reporting later
################
# TODO: Implement the recursive forwardProp function
# - you should update node.probs, node.hActs1, node.fprop, and cost
# - node: your current node in the parse tree
# - correct: this is a running list of truth labels
# - guess: this is a running list of guess that our model makes
# (we will use both correct and guess to make our confusion matrix)
################
if node.isLeaf == True:
node.fprop = True
node.hActs1 = self.L[:, node.word]
node.probs = softmax(self.Ws.dot(node.hActs1) + self.bs)
p = node.probs*make_onehot(node.label, len(self.bs))
cost = -np.log(np.sum(p))
correct.append(node.label)
guess.append(np.argmax(node.probs))
return cost, 1
c1,t1 = self.forwardProp(node.left,correct,guess)
c2,t2 = self.forwardProp(node.right,correct,guess)
if node.left.fprop and node.right.fprop:
node.fprop = True
h = np.hstack([node.left.hActs1, node.right.hActs1])
node.hActs1 = self.ReLU(self.W.dot(h) + self.b)
node.probs = softmax(self.Ws.dot(node.hActs1)+self.bs)
p = node.probs*make_onehot(node.label,len(self.bs))
cost = -np.log(np.sum(p))
correct.append(node.label)
guess.append(np.argmax(node.probs))
cost += c1
cost += c2
total += t1
total += t2
return cost, total + 1
开发者ID:alphadl,项目名称:cs224d,代码行数:38,代码来源:rnn.py
示例17: _acc_grads
def _acc_grads(self, window, label):
"""
Accumulate gradients, given a training point
(window, label) of the format
window = [x_{i-1} x_{i} x_{i+1}] # three ints
label = {0,1,2,3,4} # single int, gives class
Your code should update self.grads and self.sgrads,
in order for gradient_check and training to work.
So, for example:
self.grads.U += (your gradient dJ/dU)
self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index
"""
#### YOUR CODE HERE ####
##
# Forward propagation
words = [self.sparams.L[window[0]], self.sparams.L[window[1]], self.sparams.L[window[2]]]
x = reshape(words, self.sparams.L.shape[1] *3) # 3n row vector
z2 = self.params.W.dot(x) + self.params.b1
a2 = tanh(z2)
z3 = self.params.U.dot(a2) + self.params.b2
a3 = softmax(z3)
##
# Backpropagation
y = make_onehot(label, len(a3))
delta3 = a3 - y
dJdU = outer(delta3, a2)
dJdb2 = delta3
delta2 = multiply((1 - square(a2)), self.params.U.T.dot(delta3))
dJdW = outer(delta2, x)
dJdb1 = delta2
# Regularization
regdJdW = self.lreg * self.params.W
regdJdU = self.lreg * self.params.U
self.grads.U += (dJdU + regdJdU)
self.grads.b2 += dJdb2
self.grads.W += (dJdW + regdJdW)
self.grads.b1 += dJdb1
dJdL = self.params.W.T.dot(delta2)
dJDL_shaped = reshape(dJdL, (3, self.sparams.L.shape[1]))
self.sgrads.L[window[0]] = dJDL_shaped[0]
self.sgrads.L[window[1]] = dJDL_shaped[1]
self.sgrads.L[window[2]] = dJDL_shaped[2]
开发者ID:NoamGit,项目名称:cs224d-solutions,代码行数:50,代码来源:nerwindow.py
示例18: _acc_grads
def _acc_grads(self, window, label):
"""
Accumulate gradients, given a training point
(window, label) of the format
window = [x_{i-1} x_{i} x_{i+1}] # three ints
label = {0,1,2,3,4} # single int, gives class
Your code should update self.grads and self.sgrads,
in order for gradient_check and training to work.
So, for example:
self.grads.U += (your gradient dJ/dU)
self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index
"""
#### YOUR CODE HERE ####
(H, X) = self.params.W.shape # (100, 150)
(Dy, H) = self.params.U.shape # (5, 100)
##
# Forward propagation
x = hstack(self.sparams.L[window]) # (150,) --> (X,)
a = dot(self.params.W, x) + self.params.b1 # (H,)
h = tanh(a) # (H,)
y_hat = softmax(dot(self.params.U, h) + self.params.b2) # (Dy,)
y = make_onehot(label, len(y_hat))
delta = y_hat - y
##
# Backpropagation
# dJ/db2
self.grads.b2 += delta
# dJ/dU
self.grads.U += outer(delta, h) + self.lreg * self.params.U
# dJ/dW, dJ/db1
# d_tanh(a) is (H,)
#x1 = dot(self.params.U.T, delta.reshape((Dy, 1))).reshape((H,)) * d_tanh(a)
x1 = dot(self.params.U.T, delta) * d_tanh(a)
self.grads.W += outer(x1, x) + self.lreg * self.params.W
self.grads.b1 += x1
dL_updates = dot(self.params.W.T, x1.reshape((H, 1)))
for pt in xrange(self.windowsize):
f = dL_updates[pt * self.word_vec_size : (pt + 1) * self.word_vec_size]
self.sgrads.L[window[pt]] = f.reshape((self.word_vec_size,))
开发者ID:ryu577,项目名称:base,代码行数:49,代码来源:nerwindow_msushkov.py
示例19: _acc_grads
def _acc_grads(self, xs, ys, d):
# Expect xs as list of indices
ns = len(xs)
# make matrix here of corresponding h(t)
# hs[-1] = initial hidden state (zeros)
hs = zeros((ns+1, self.hdim))
# predicted probas
ps = zeros((ns, self.vdim))
zs = zeros((ns+1, self.hdim))
##
# Forward propagation
d_vec = self.sparams.D[d]
for t in xrange(ns):
x_t = xs[t]
zs[t] = self.params.H.dot(hs[t-1]) + self.sparams.L[x_t] + d_vec
hs[t] = sigmoid(zs[t])
ps[t] = softmax(self.params.U.dot(hs[t]) + self.params.G.dot(d_vec.T).reshape(self.vdim,))
##
# Backward propagation through time
d_grad = zeros_like(self.sparams.D[0])
for t in reversed(xrange(ns)):
delta = zeros((ns, self.hdim))
p_t = ps[t]
eps_t = p_t - make_onehot(ys[t], len(p_t))
self.grads.U += outer(eps_t, hs[t])
self.grads.G += outer(eps_t, d_vec)
d_grad += self.params.G.T.dot(eps_t)
sig_prime_t = sigmoid(zs[t])*(1.-sigmoid(zs[t]))
delta[t] = sig_prime_t * self.params.U.T.dot(eps_t)
self.sgrads.L[xs[t]] = delta[t].copy()
d_grad += delta[t].copy()
self.grads.H += outer(delta[t], hs[t-1])
for i in xrange(1, self.bptt):
j = t-i
if j < 0: continue
sig_prime_j = sigmoid(zs[j])*(1.-sigmoid(zs[j]))
delta[j] = sig_prime_j * self.params.H.T.dot(delta[j+1])
self.sgrads.L[xs[j]] = delta[j].copy()
d_grad += delta[j].copy()
self.grads.H += outer(delta[j], hs[j-1])
self.sgrads.D[d] = d_grad.copy()
开发者ID:afgiel,项目名称:docvec,代码行数:49,代码来源:drnnlm.py
示例20: _acc_grads
def _acc_grads(self, window, label):
"""
Accumulate gradients, given a training point
(window, label) of the format
window = [x_{i-1} x_{i} x_{i+1}] # three ints
label = {0,1,2,3,4} # single int, gives class
Your code should update self.grads and self.sgrads,
in order for gradient_check and training to work.
So, for example:
self.grads.U += (your gradient dJ/dU)
self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index
"""
#### YOUR CODE HERE ####
##
# Forward propagation
a1 = hstack(self.sparams.L[window, :])
z2 = self.params.W.dot(a1) + self.params.b1
a2 = tanh(z2) # h
z3 = self.params.U.dot(a2) + self.params.b2
y_hat = softmax(z3)
y = make_onehot(label, len(y_hat))
delta3 = y_hat - y
##
# Backpropagation
# dJ/dU
self.grads.U += outer(delta3, a2) + self.lreg * self.params.U
# dJ/db2
self.grads.b2 += delta3
delta2 = self.params.U.T.dot(delta3) * d_tanh(z2)
# dJ/dW @TODO: check
self.grads.W += outer(delta2, a1) + self.lreg * self.params.W
# dJ/db1
self.grads.b1 += delta2
# dJ/dL
dL = self.params.W.T.dot(delta2).reshape(self.windowsize, -1)
for idx in xrange(self.windowsize):
self.sgrads.L[window[idx], :] = dL[idx]
开发者ID:Scitator,项目名称:NLP-CS224d,代码行数:48,代码来源:nerwindow.py
注:本文中的nn.math.make_onehot函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论