本文整理汇总了Python中theano.tensor.or_函数的典型用法代码示例。如果您正苦于以下问题:Python or_函数的具体用法?Python or_怎么用?Python or_使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了or_函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: get_updates
def get_updates(self, loss, lr, max_norm=1, beta1=0.9, beta2=0.999,
epsilon=1e-8, grads=None):
# Gradients
if grads is None:
grads = tensor.grad(loss, self.trainables)
# Clipping
norm = tensor.sqrt(sum([tensor.sqr(g).sum() for g in grads]))
m = theanotools.clipping_multiplier(norm, max_norm)
grads = [m*g for g in grads]
# Safeguard against numerical instability
new_cond = tensor.or_(tensor.or_(tensor.isnan(norm), tensor.isinf(norm)),
tensor.or_(norm < 0, norm > 1e10))
grads = [tensor.switch(new_cond, np.float32(0), g) for g in grads]
# Safeguard against numerical instability
#cond = tensor.or_(norm < 0, tensor.or_(tensor.isnan(norm), tensor.isinf(norm)))
#grads = [tensor.switch(cond, np.float32(0), g) for g in grads]
# New values
t = self.time + 1
lr_t = lr*tensor.sqrt(1. - beta2**t)/(1. - beta1**t)
means_t = [beta1*m + (1. - beta1)*g for g, m in zip(grads, self.means)]
vars_t = [beta2*v + (1. - beta2)*tensor.sqr(g) for g, v in zip(grads, self.vars)]
steps = [lr_t*m_t/(tensor.sqrt(v_t) + epsilon)
for m_t, v_t in zip(means_t, vars_t)]
# Updates
updates = [(x, x - step) for x, step in zip(self.trainables, steps)]
updates += [(m, m_t) for m, m_t in zip(self.means, means_t)]
updates += [(v, v_t) for v, v_t in zip(self.vars, vars_t)]
updates += [(self.time, t)]
return norm, grads, updates
开发者ID:frsong,项目名称:pyrl,代码行数:35,代码来源:sgd.py
示例2: __init__
def __init__(self, random_state=None, low=0.0, high=1.0):
super(Uniform, self).__init__(low=low, high=high,
random_state=random_state,
optimizer=None)
# pdf
self.pdf_ = T.switch(
T.or_(T.lt(self.X, self.low), T.ge(self.X, self.high)),
0.,
1. / (self.high - self.low)).ravel()
self.make_(self.pdf_, "pdf")
# -log pdf
self.nnlf_ = T.switch(
T.or_(T.lt(self.X, self.low), T.ge(self.X, self.high)),
np.inf,
T.log(self.high - self.low)).ravel()
self.make_(self.nnlf_, "nnlf")
# cdf
self.cdf_ = T.switch(
T.lt(self.X, self.low),
0.,
T.switch(
T.lt(self.X, self.high),
(self.X - self.low) / (self.high - self.low),
1.)).ravel()
self.make_(self.cdf_, "cdf")
# ppf
self.ppf_ = self.p * (self.high - self.low) + self.low
self.make_(self.ppf_, "ppf", args=[self.p])
开发者ID:ibab,项目名称:carl,代码行数:32,代码来源:uniform.py
示例3: get_train
def get_train(U_Ot, U_R, lenW, n_facts):
def phi_x1(x_t, L):
return T.concatenate([L[x_t].reshape((-1,)), zeros((2*lenW,)), zeros((3,))], axis=0)
def phi_x2(x_t, L):
return T.concatenate([zeros((lenW,)), L[x_t].reshape((-1,)), zeros((lenW,)), zeros((3,))], axis=0)
def phi_y(x_t, L):
return T.concatenate([zeros((2*lenW,)), L[x_t].reshape((-1,)), zeros((3,))], axis=0)
def phi_t(x_t, y_t, yp_t, L):
return T.concatenate([zeros(3*lenW,), T.stack(T.switch(T.lt(x_t,y_t), 1, 0), T.switch(T.lt(x_t,yp_t), 1, 0), T.switch(T.lt(y_t,yp_t), 1, 0))], axis=0)
def s_Ot(xs, y_t, yp_t, L):
result, updates = theano.scan(
lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), U_Ot.T),
T.dot(U_Ot, (phi_y(y_t, L) - phi_y(yp_t, L) + phi_t(x_t, y_t, yp_t, L)))),
sequences=[xs, T.arange(T.shape(xs)[0])])
return result.sum()
def sR(xs, y_t, L, V):
result, updates = theano.scan(
lambda x_t, t: T.dot(T.dot(T.switch(T.eq(t, 0), phi_x1(x_t, L).reshape((1,-1)), phi_x2(x_t, L).reshape((1,-1))), U_R.T),
T.dot(U_R, phi_y(y_t, V))),
sequences=[xs, T.arange(T.shape(xs)[0])])
return result.sum()
x_t = T.iscalar('x_t')
m = [x_t] + [T.iscalar('m_o%d' % i) for i in xrange(n_facts)]
f = [T.iscalar('f%d_t' % i) for i in xrange(n_facts)]
r_t = T.iscalar('r_t')
gamma = T.scalar('gamma')
L = T.fmatrix('L') # list of messages
V = T.fmatrix('V') # vocab
r_args = T.stack(*m)
cost_arr = [0] * 2 * (len(m)-1)
updates_arr = [0] * 2 * (len(m)-1)
for i in xrange(len(m)-1):
cost_arr[2*i], updates_arr[2*i] = theano.scan(
lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)-1)), 0, T.largest(gamma - s_Ot(T.stack(*m[:i+1]), f[i], t, L), 0)),
sequences=[L, T.arange(T.shape(L)[0])])
cost_arr[2*i+1], updates_arr[2*i+1] = theano.scan(
lambda f_bar, t: T.switch(T.or_(T.eq(t, f[i]), T.eq(t, T.shape(L)-1)), 0, T.largest(gamma + s_Ot(T.stack(*m[:i+1]), t, f[i], L), 0)),
sequences=[L, T.arange(T.shape(L)[0])])
cost1, u1 = theano.scan(
lambda r_bar, t: T.switch(T.eq(r_t, t), 0, T.largest(gamma - sR(r_args, r_t, L, V) + sR(r_args, t, L, V), 0)),
sequences=[V, T.arange(T.shape(V)[0])])
cost = cost1.sum()
for c in cost_arr:
cost += c.sum()
g_uo, g_ur = T.grad(cost, [U_Ot, U_R])
train = theano.function(
inputs=[r_t, gamma, L, V] + m + f,
outputs=[cost],
updates=[(U_Ot, U_Ot-alpha*g_uo), (U_R, U_R-alpha*g_ur)])
return train
开发者ID:amiltonwong,项目名称:memnn,代码行数:56,代码来源:main.py
示例4: get_output_for
def get_output_for(self, input, deterministic=False, **kwargs):
if deterministic or self.p == 0:
return T.ones_like(self.retain, dtype=input.dtype)
else:
# Using theano constant to prevent upcasting
# one = T.constant(1)
# retain_prob = one - self.p
# if self.rescale:
# input /= retain_prob
# use nonsymbolic shape for dropout mask if possible
mask_shape = self.input_shape
if any(s is None for s in mask_shape):
mask_shape = input.shape
# apply dropout, respecting shared axes
if self.shared_axes:
shared_axes = tuple(a if a >= 0 else a + input.ndim
for a in self.shared_axes)
mask_shape = tuple(1 if a in shared_axes else s
for a, s in enumerate(mask_shape))
mask = self._srng.binomial(mask_shape, p=self.retain,
dtype=input.dtype)
mask = T.or_(mask, self.previous_mask)
if self.shared_axes:
bcast = tuple(bool(s == 1) for s in mask_shape)
mask = T.patternbroadcast(mask, bcast)
return mask
开发者ID:flyrae,项目名称:neural-dep-srl,代码行数:29,代码来源:WordDropout.py
示例5: compute_cost_log_in_parallel
def compute_cost_log_in_parallel(original_rnn_outputs, labels, func, x_ends, y_ends):
mask = T.log(1 - T.or_(T.eq(labels, T.zeros_like(labels)), T.eq(labels, shift_matrix(labels, 2))))
initial_state = T.log(T.zeros_like(labels))
initial_state = T.set_subtensor(initial_state[:,0], 0)
def select_probabilities(rnn_outputs, label):
return rnn_outputs[:,label]
rnn_outputs, _ = theano.map(select_probabilities, [original_rnn_outputs, labels])
rnn_outputs = T.log(rnn_outputs.dimshuffle((1,0,2)))
def forward_step(probabilities, last_probabilities):
all_forward_probabilities = T.stack(
last_probabilities + probabilities,
log_shift_matrix(last_probabilities, 1) + probabilities,
log_shift_matrix(last_probabilities, 2) + probabilities + mask,
)
result = func(all_forward_probabilities, 0)
return result
forward_probabilities, _ = theano.scan(fn = forward_step, sequences = rnn_outputs, outputs_info = initial_state)
forward_probabilities = forward_probabilities.dimshuffle((1,0,2))
def compute_cost(forward_probabilities, x_end, y_end):
return -func(forward_probabilities[x_end-1,y_end-2:y_end])
return theano.map(compute_cost, [forward_probabilities, x_ends, y_ends])[0]
开发者ID:choko,项目名称:ctc,代码行数:29,代码来源:ctc.py
示例6: truncated_normal
def truncated_normal(size, avg, std, lbound, ubound, theano_rng, dtype):
def phi(x):
erfarg = (x - avg) / (std * SQRT2)
rval = 0.5 * (1. + T.erf(erfarg))
return rval.astype(dtype)
def phi_inv(phi_x):
erfinv_input = T.clip(2. * phi_x - 1., -1.+1e-6, 1.-1e-6)
rval = avg + std * SQRT2 * T.erfinv(erfinv_input)
return rval.astype(dtype)
# center lower and upper bounds based on mean
u = theano_rng.uniform(size=size, dtype=dtype)
cdf_range = phi(ubound) - phi(lbound)
sample = phi_inv(phi(lbound) + u * cdf_range)
# if avg >> ubound, return ubound
# if avg << lbound, return lbound
# else return phi(lbound) + u * [phi(ubound) - phi(lbound)]
rval = T.switch(
T.or_(sample < lbound, sample > ubound),
T.switch(avg >= ubound, ubound, lbound),
sample)
return rval
开发者ID:LeonBai,项目名称:lisa_emotiw-1,代码行数:27,代码来源:truncated.py
示例7: adamgc_
def adamgc_(cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8, max_magnitude=5.0, infDecay=0.1):
updates = []
grads = T.grad(cost, params)
norm = norm_gs(params, grads)
sqrtnorm = T.sqrt(norm)
not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.0)
i = shared(floatX(0.0))
i_t = i + 1.0
fix1 = 1.0 - (1.0 - b1) ** i_t
fix2 = 1.0 - (1.0 - b2) ** i_t
lr_t = lr * (T.sqrt(fix2) / fix1)
for p, g in zip(params, grads):
g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
m = shared(p.get_value() * 0.0)
v = shared(p.get_value() * 0.0)
m_t = (b1 * g) + ((1.0 - b1) * m)
v_t = (b2 * T.sqr(g)) + ((1.0 - b2) * v)
g_t = m_t / (T.sqrt(v_t) + e)
p_t = p - (lr_t * g_t)
# e_t = shared(p.get_value() * 0.)
# de_t = (srnd.normal(p.shape, std = 0.05, dtype=theano.config.floatX)*p_t - e_t)*0.05 #*p_t
# p_t = p_t + de_t
# updates.append((e_t, e_t + de_t))
updates.append((m, m_t))
updates.append((v, v_t))
updates.append((p, p_t))
updates.append((i, i_t))
return updates, norm
开发者ID:ronvohra,项目名称:Theano-Lights,代码行数:33,代码来源:toolbox.py
示例8: theano_metrics
def theano_metrics(y_pred, y_true, n_classes, void_labels):
"""
Returns the intersection I and union U (to compute the jaccard I/U) and the accuracy.
:param y_pred: tensor of predictions. shape (b*0*1, c) with c = n_classes
:param y_true: groundtruth, shape (b,0,1) or (b,c,0,1) with c=1
:param n_classes: int
:param void_labels: list of indexes of void labels
:return: return tensors I and U of size (n_classes), and scalar acc
"""
# Put y_pred and y_true under the same shape
y_true = T.flatten(y_true)
y_pred = T.argmax(y_pred, axis=1)
# We use not_void in case the prediction falls in the void class of the groundtruth
for i in range(len(void_labels)):
if i == 0:
not_void = T.neq(y_true, void_labels[i])
else:
not_void = not_void * T.neq(y_true, void_labels[i])
I = T.zeros(n_classes)
U = T.zeros(n_classes)
for i in range(n_classes):
y_true_i = T.eq(y_true, i)
y_pred_i = T.eq(y_pred, i)
I = T.set_subtensor(I[i], T.sum(y_true_i * y_pred_i))
U = T.set_subtensor(U[i], T.sum(T.or_(y_true_i, y_pred_i) * not_void))
accuracy = T.sum(I) / T.sum(not_void)
return I, U, accuracy
开发者ID:XiongDuan,项目名称:FC-DenseNet,代码行数:34,代码来源:metrics.py
示例9: exe
def exe(self, mainloop):
"""
.. todo::
WRITEME
"""
grads = mainloop.grads
"""
for p, g in grads.items():
grads[p] = g / self.batch_size
g_norm = 0.
for g in grads.values():
g_norm += (g**2).sum()
"""
g_norm = 0.
for p, g in grads.items():
g /= self.batch_size
grads[p] = g
g_norm += (g**2).sum()
not_finite = T.or_(T.isnan(g_norm), T.isinf(g_norm))
g_norm = T.sqrt(g_norm)
scaler = self.scaler / T.maximum(self.scaler, g_norm)
for p, g in grads.items():
grads[p] = T.switch(not_finite, 0.1 * p, g * scaler)
mainloop.grads = grads
开发者ID:anirudh9119,项目名称:cle,代码行数:25,代码来源:ext.py
示例10: mcmc
def mcmc(ll, *frvs):
full_observations = dict(observations)
full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, frvs)]))
loglik = -full_log_likelihood(full_observations)
proposals = free_RVs_prop
H = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + loglik
# -- this should be an inner loop
g = []
g.append(tensor.grad(loglik, frvs))
proposals = [(p - epsilon*gg[0]/2.) for p, gg in zip(proposals, g)]
rvsp = [(rvs + epsilon*rvp) for rvs,rvp in zip(frvs, proposals)]
full_observations = dict(observations)
full_observations.update(dict([(rv, s) for rv, s in zip(free_RVs, rvsp)]))
new_loglik = -full_log_likelihood(full_observations)
gnew = []
gnew.append(tensor.grad(new_loglik, rvsp))
proposals = [(p - epsilon*gn[0]/2.) for p, gn in zip(proposals, gnew)]
# --
Hnew = tensor.add(*[tensor.sum(tensor.sqr(p)) for p in proposals])/2. + new_loglik
dH = Hnew - H
accept = tensor.or_(dH < 0., U < tensor.exp(-dH))
return [tensor.switch(accept, -new_loglik, ll)] + \
[tensor.switch(accept, p, f) for p, f in zip(rvsp, frvs)], \
{}, theano.scan_module.until(accept)
开发者ID:helson73,项目名称:MonteTheano,代码行数:34,代码来源:sample.py
示例11: graves_rmsprop_updates
def graves_rmsprop_updates(self, params, grads, learning_rate=1e-4, alpha=0.9, epsilon=1e-4, chi=0.95):
"""
Alex Graves' RMSProp [1]_.
.. math ::
n_{i} &= \chi * n_i-1 + (1 - \chi) * grad^{2}\\
g_{i} &= \chi * g_i-1 + (1 - \chi) * grad\\
\Delta_{i} &= \alpha * Delta_{i-1} - learning_rate * grad /
sqrt(n_{i} - g_{i}^{2} + \epsilon)\\
w_{i} &= w_{i-1} + \Delta_{i}
References
----------
.. [1] Graves, Alex.
"Generating Sequences With Recurrent Neural Networks", p.23
arXiv:1308.0850
"""
updates = []
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads)))
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
for n, (param, grad) in enumerate(zip(params, grads)):
grad = T.switch(not_finite, 0.1 * param, grad)
old_square = self.running_square_[n]
old_avg = self.running_avg_[n]
old_memory = self.memory_[n]
new_square = chi * old_square + (1. - chi) * grad ** 2
new_avg = chi * old_avg + (1. - chi) * grad
new_memory = alpha * old_memory - learning_rate * grad / T.sqrt(new_square - \
new_avg ** 2 + epsilon)
updates.append((old_square, new_square))
updates.append((old_avg, new_avg))
updates.append((old_memory, new_memory))
updates.append((param, param + new_memory))
return updates
开发者ID:chiggum,项目名称:Neural-Turing-Machines,代码行数:32,代码来源:rmsprop_orig.py
示例12: exe
def exe(self, mainloop):
"""
.. todo::
WRITEME
"""
grads = mainloop.grads
g_norm = 0.
for p, g in grads.items():
g /= T.cast(self.batch_size, dtype=theano.config.floatX)
grads[p] = g
g_norm += (g**2).sum()
if self.check_nan:
not_finite = T.or_(T.isnan(g_norm), T.isinf(g_norm))
g_norm = T.sqrt(g_norm)
scaler = self.scaler / T.maximum(self.scaler, g_norm)
if self.check_nan:
for p, g in grads.items():
grads[p] = T.switch(not_finite, 0.1 * p, g * scaler)
else:
for p, g in grads.items():
grads[p] = g * scaler
mainloop.grads = grads
开发者ID:Beronx86,项目名称:cle,代码行数:28,代码来源:ext.py
示例13: tnormal_icdf
def tnormal_icdf(size, avg, std, lbound, ubound, theano_rng, dtype):
"""
Alternative Method:
sample = -Phi_inv(Phi(-lbound)*(1-u) + Phi(-ubound)*u)
"""
def Phi(x):
erfarg = (x - avg) / (std * SQRT2)
rval = 0.5 * (1. + T.erf(erfarg))
return rval.astype(dtype)
def Phi_inv(y, eps=3e-8):
""" eps was calibrated for cublas.erfinv using float32 """
temp = 2. * y - 1.
erfinv_input = T.clip(temp, -1+eps, 1-eps)
rval = avg + std * SQRT2 * T.erfinv(erfinv_input)
return rval.astype(dtype)
# center lower and upper bounds based on mean
u = theano_rng.uniform(size=size, dtype=dtype)
# Inverse CDF method. When method becomes numerically unstable, we simply
# return the bounds based on whether avg < lbound, or ubound < avg.
cdf_range = Phi(ubound) - Phi(lbound)
sample = T.switch(
T.or_(
T.lt(cdf_range, 3e-8),
T.gt(cdf_range, 1-3e-8)),
T.switch(
T.lt(avg, lbound),
lbound,
ubound),
Phi_inv(Phi(lbound) + u * cdf_range))
return sample
开发者ID:gdesjardins,项目名称:hossrbm,代码行数:35,代码来源:truncated.py
示例14: minimize
def minimize(self, loss, momentum, rescale):
super(RMSPropOptimizer, self).minimize(loss)
grads = self.gradparams
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads)))
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
grad_norm = T.sqrt(grad_norm)
scaling_num = rescale
scaling_den = T.maximum(rescale, grad_norm)
# Magic constants
combination_coeff = 0.9
minimum_grad = 1E-4
updates = []
params = self.params
for n, (param, grad) in enumerate(zip(params, grads)):
grad = T.switch(not_finite, 0.1 * param,
grad * (scaling_num / scaling_den))
old_square = self.running_square_[n]
new_square = combination_coeff * old_square + (
1. - combination_coeff) * T.sqr(grad)
old_avg = self.running_avg_[n]
new_avg = combination_coeff * old_avg + (
1. - combination_coeff) * grad
rms_grad = T.sqrt(new_square - new_avg ** 2)
rms_grad = T.maximum(rms_grad, minimum_grad)
memory = self.memory_[n]
update = momentum * memory - self.lr * grad / rms_grad
update2 = momentum * momentum * memory - (
1 + momentum) * self.lr * grad / rms_grad
updates.append((old_square, new_square))
updates.append((old_avg, new_avg))
updates.append((memory, update))
updates.append((param, param + update2))
return updates
开发者ID:tomokishii,项目名称:Qiita-posts,代码行数:34,代码来源:music_scale_classify_old.py
示例15: adamgc
def adamgc(cost, params, lr=0.0002, b1=0.1, b2=0.001, e=1e-8, max_magnitude=5.0, infDecay=0.1):
updates = []
grads = T.grad(cost, params)
norm = norm_gs(params, grads)
sqrtnorm = T.sqrt(norm)
not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)
i = shared(floatX(0.))
i_t = i + 1.
fix1 = 1. - (1. - b1)**i_t
fix2 = 1. - (1. - b2)**i_t
lr_t = lr * (T.sqrt(fix2) / fix1)
for p, g in zip(params, grads):
g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
m = shared(p.get_value() * 0.)
v = shared(p.get_value() * 0.)
m_t = (b1 * g) + ((1. - b1) * m)
v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
g_t = m_t / (T.sqrt(v_t) + e)
p_t = p - (lr_t * g_t)
updates.append((m, m_t))
updates.append((v, v_t))
updates.append((p, p_t))
updates.append((i, i_t))
return updates, norm
开发者ID:Weichern,项目名称:Theano-Lights,代码行数:27,代码来源:toolbox.py
示例16: compute_updates
def compute_updates(self, training_cost, params):
updates = []
grads = T.grad(training_cost, params)
grads = OrderedDict(zip(params, grads))
# Clip stuff
c = numpy.float32(self.cutoff)
clip_grads = []
norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))
for p, g in grads.items():
clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))
grads = OrderedDict(clip_grads)
if self.updater == 'adagrad':
updates = Adagrad(grads, self.lr)
elif self.updater == 'sgd':
raise Exception("Sgd not implemented!")
elif self.updater == 'adadelta':
updates = Adadelta(grads)
elif self.updater == 'rmsprop':
updates = RMSProp(grads, self.lr)
elif self.updater == 'adam':
updates = Adam(grads)
else:
raise Exception("Updater not understood!")
return updates
开发者ID:npow,项目名称:hed-dlg,代码行数:32,代码来源:dialog_encdec.py
示例17: updates
def updates(self, cost, params, learning_rate = 0.1, momentum= 0.95, rescale=5.):
grads = T.grad(cost, params)
grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), grads)))
not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
grad_norm = T.sqrt(grad_norm)
scaling_num = rescale
scaling_den = T.maximum(rescale, grad_norm)
# Magic constants
combination_coeff = 0.9
minimum_grad = 1e-4
updates = []
for n, (param, grad) in enumerate(zip(params, grads)):
grad = T.switch(not_finite, 0.1 * param,
grad * (scaling_num / scaling_den))
old_square = self.running_square_[n]
new_square = combination_coeff * old_square + (
1. - combination_coeff) * T.sqr(grad)
old_avg = self.running_avg_[n]
new_avg = combination_coeff * old_avg + (
1. - combination_coeff) * grad
rms_grad = T.sqrt(new_square - new_avg ** 2)
rms_grad = T.maximum(rms_grad, minimum_grad)
memory = self.memory_[n]
update = momentum * memory - learning_rate * grad / rms_grad
update2 = momentum * momentum * memory - (
1 + momentum) * learning_rate * grad / rms_grad
updates.append((old_square, new_square))
updates.append((old_avg, new_avg))
updates.append((memory, update))
updates.append((param, param + update2))
return updates
开发者ID:cauchyturing,项目名称:DeepMONA,代码行数:31,代码来源:update_func.py
示例18: get_gradients
def get_gradients(self, model, data, ** kwargs):
cost = self.expr(model=model, data=data, **kwargs)
params = list(model.get_params())
grads = T.grad(cost, params, disconnected_inputs='ignore')
gradients = OrderedDict(izip(params, grads))
if self.gradient_clipping:
norm_gs = 0.
for grad in gradients.values():
norm_gs += (grad ** 2).sum()
not_finite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))
norm_gs = T.sqrt(norm_gs)
norm_gs = T.switch(T.ge(norm_gs, self.max_magnitude),
self.max_magnitude / norm_gs,
1.)
for param, grad in gradients.items():
gradients[param] = T.switch(not_finite,
.1 * param,
grad * norm_gs)
updates = OrderedDict()
return gradients, updates
开发者ID:Sandy4321,项目名称:librnn,代码行数:28,代码来源:rnn.py
示例19: abs
def abs(x, axis=0):
"""
Takes the matrix/vector x and finds the absolute along the axis.
:param x: T.matrix
:return: Absolute along the given axis. T.vector
"""
x = assert_op(x, T.or_(T.eq(x.ndim, 2), T.eq(x.ndim, 1)))
return T.sqrt(T.sum(T.sqr(x), axis))
开发者ID:Azrael1,项目名称:Seq-Gen,代码行数:8,代码来源:treelstmupdated.py
示例20: find_right_bound
def find_right_bound(prev_func_output, step, maxstep):
func_output = f(step)
is_output_decrease = T.gt(prev_func_output, func_output)
step = ifelse(is_output_decrease, T.minimum(2.0 * step, maxstep), step)
is_output_increse = T.lt(prev_func_output, func_output)
stoprule = theano.scan_module.until(T.or_(is_output_increse, step > maxstep))
return [func_output, step], stoprule
开发者ID:itdxer,项目名称:neupy,代码行数:8,代码来源:golden_search.py
注:本文中的theano.tensor.or_函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论