• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python theano.grad函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中theano.grad函数的典型用法代码示例。如果您正苦于以下问题:Python grad函数的具体用法?Python grad怎么用?Python grad使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了grad函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_dnn_conv_merge

def test_dnn_conv_merge():
    if not cuda.dnn.dnn_available():
        raise SkipTest(cuda.dnn.dnn_available.msg)
    img = T.ftensor4()
    kern = T.ftensor4()
    out = T.ftensor4()

    b = 1
    c = 4
    f = 3
    ih = 5
    iw = 8
    kh = 2
    kw = 6
    img_val = numpy.random.random((b, c, ih, iw)).astype("float32")
    kern_val = numpy.random.random((f, c, kh, kw)).astype("float32")
    out_val = numpy.random.random((b, f, ih - kh + 1, iw - kw + 1)).astype("float32")

    conv = dnn.dnn_conv(img, kern)
    gw = theano.grad(conv.sum(), kern)
    gi = theano.grad(conv.sum(), img)

    lr = numpy.asarray(0.05, dtype="float32")

    if cuda.dnn.version() == -1:
        # Can't merge alpha with cudnn v1
        fr = conv + out
        wr = kern + gw
        ir = img + gi
    else:
        fr = lr * (conv + out)
        wr = kern + lr * gw
        ir = img + lr * gi

    f1 = theano.function([img, kern, out], [fr, wr, ir], mode=mode_with_gpu)
    assert isinstance(f1.maker.fgraph.outputs[0].owner.inputs[0].owner.op, dnn.GpuDnnConv)
    assert isinstance(f1.maker.fgraph.outputs[1].owner.inputs[0].owner.op, dnn.GpuDnnConvGradW)
    assert isinstance(f1.maker.fgraph.outputs[2].owner.inputs[0].owner.op, dnn.GpuDnnConvGradI)

    mode = mode_with_gpu
    mode = mode.excluding("local_dnn_conv_alpha_merge")
    mode = mode.excluding("local_dnn_convw_alpha_merge")
    mode = mode.excluding("local_dnn_convi_alpha_merge")
    mode = mode.excluding("local_dnn_conv_output_merge")
    mode = mode.excluding("local_dnn_convw_output_merge")
    mode = mode.excluding("local_dnn_convi_output_merge")

    f2 = theano.function([img, kern, out], [fr, wr, ir], mode=mode)

    assert not isinstance(f2.maker.fgraph.outputs[0].owner.inputs[0].owner.op, dnn.GpuDnnConv)
    assert not isinstance(f2.maker.fgraph.outputs[1].owner.inputs[0].owner.op, dnn.GpuDnnConvGradW)
    assert not isinstance(f2.maker.fgraph.outputs[2].owner.inputs[0].owner.op, dnn.GpuDnnConvGradI)

    out_f1 = f1(img_val, kern_val, out_val)
    out_f2 = f2(img_val, kern_val, out_val)

    assert len(out_f1) == len(out_f2)

    for v1, v2 in zip(out_f1, out_f2):
        utt.assert_allclose(v1, v2)
开发者ID:dapeng2018,项目名称:Theano,代码行数:60,代码来源:test_dnn.py


示例2: test_grad_types

    def test_grad_types(self):
        # This function simply tests the behaviour of the AbstractConv
        # Ops, not their optimizations
        cpu_input = tensor.ftensor4()
        cpu_filters = tensor.ftensor4()
        cpu_topgrad = tensor.ftensor4()
        gpu_input = gpu_ftensor4()
        gpu_filters = gpu_ftensor4()
        gpu_topgrad = gpu_ftensor4()

        out_shape = tensor.lvector()

        # Check the gradient of the forward conv2d
        for input, filters in itertools.product((cpu_input, gpu_input), (cpu_filters, gpu_filters)):
            output = conv.conv2d(input, filters)
            grad_input, grad_filters = theano.grad(output.sum(), wrt=(input, filters))
            assert grad_input.type == input.type, (grad_input, grad_input.type, input, input.type)
            assert grad_filters.type == filters.type, (grad_filters, grad_filters.type, filters, filters.type)

        # Check the gradient of gradweight
        for input, topgrad in itertools.product((cpu_input, gpu_input), (cpu_topgrad, gpu_topgrad)):
            grad_filters = conv.AbstractConv2d_gradWeights()(input, topgrad, out_shape)
            grad_input, grad_topgrad = theano.grad(grad_filters.sum(), wrt=(input, topgrad))

            assert grad_input.type == input.type, (grad_input, grad_input.type, input, input.type)
            assert grad_topgrad.type == topgrad.type, (grad_topgrad, grad_topgrad.type, topgrad, topgrad.type)

        # Check the gradient of gradinputs
        for filters, topgrad in itertools.product((cpu_filters, gpu_filters), (cpu_topgrad, gpu_topgrad)):
            grad_input = conv.AbstractConv2d_gradInputs()(filters, topgrad, out_shape)
            grad_filters, grad_topgrad = theano.grad(grad_input.sum(), wrt=(filters, topgrad))

            assert grad_filters.type == filters.type, (grad_filters, grad_filters.type, filters, filters.type)
            assert grad_topgrad.type == topgrad.type, (grad_topgrad, grad_topgrad.type, topgrad, topgrad.type)
开发者ID:emillynge,项目名称:Theano,代码行数:34,代码来源:test_abstractconv.py


示例3: test_fill_grad

 def test_fill_grad(self):
     # Fix bug reported at
     # https://groups.google.com/d/topic/theano-users/nQshB8gUA6k/discussion
     x = TensorType(config.floatX, [0, 1, 0])('x')
     y = TensorType(config.floatX, [0, 1, 0])('y')
     e = tensor.second(x, y)
     theano.grad(e.sum(), y)
开发者ID:AI-Cdrone,项目名称:Theano,代码行数:7,代码来源:test_elemwise.py


示例4: make_w_updates

    def make_w_updates(self, loss, params):
        w_updates = OrderedDict()
        
        params_tilde = [theano.shared(x.get_value()) for x in params] 
        loss_tilde = theano.clone(loss, replace=zip(params, params_tilde))

        grads = theano.grad(loss, params)
        grads_tilde = theano.grad(loss_tilde, params_tilde)

        it_num = theano.shared(np.cast['int16'](0))
        it = it_num + 1

        for param, grad, mu, param_tilde, grad_tilde in zip(params, grads, self.mu, params_tilde, grads_tilde):
#            new_param = param - self.learning_rate * (grad - grad_tilde + mu)

            new_param = param - (1. / self.L) * (grad - grad_tilde + mu)
            w_updates[param] = new_param
            w_updates[param_tilde] = ifelse(T.eq(it % self.m, 0), new_param, param_tilde)
            
            w_updates[self.counted_gradient] = self.counted_gradient + 2
        
        if self.adaptive:
            w_updates[self.L] = self.L / 2

        self.it_num = it_num
        
        w_updates[it_num] = it
        return w_updates
开发者ID:myt00seven,项目名称:svrg,代码行数:28,代码来源:SVRGOptimizer.py


示例5: test_prod_no_zeros_in_input

    def test_prod_no_zeros_in_input(self):
        x = theano.tensor.dmatrix()
        x_val = numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype='float32')
        pwz = Prod(axis=1, no_zeros_in_input=True)(x)
        fn = theano.function([x], pwz, mode=self.mode)

        assert numpy.allclose(fn(x_val), [6, 120, 504])

        pwz = Prod(no_zeros_in_input=True)(x)
        g = theano.grad(pwz, x)
        gg = theano.grad(g.sum(), x)
        fn = theano.function([x], g, mode=self.mode)
        assert numpy.allclose(fn(x_val),
                              [[362880., 181440., 120960.],
                               [90720., 72576., 60480.],
                               [51840., 45360., 40320.]])
        fn = theano.function([x], gg, mode=self.mode)
        assert numpy.allclose(fn(x_val),
                              [[663696., 422568., 301872.],
                               [233964., 190800., 161016.],
                               [139248., 122652., 109584.]])
        unittest_tools.verify_grad(Prod(axis=1, no_zeros_in_input=True),
                                   [x_val],
                                   mode=self.mode)
        unittest_tools.verify_grad(Prod(no_zeros_in_input=True), [x_val],
                                   mode=self.mode)

        def second_deriv(x):
            return theano.grad(Prod(no_zeros_in_input=True)(x), x)
        unittest_tools.verify_grad(second_deriv, [x_val],
                                   mode=self.mode)
开发者ID:AI-Cdrone,项目名称:Theano,代码行数:31,代码来源:test_elemwise.py


示例6: update_opt

    def update_opt(
        self, loss, target, leq_constraint, inputs, extra_inputs=None, constraint_name="constraint", *args, **kwargs
    ):
        """
        :param loss: Symbolic expression for the loss function.
        :param target: A parameterized object to optimize over. It should implement methods of the
        :class:`rllab.core.paramerized.Parameterized` class.
        :param leq_constraint: A constraint provided as a tuple (f, epsilon), of the form f(*inputs) <= epsilon.
        :param inputs: A list of symbolic variables as inputs, which could be subsampled if needed. It is assumed
        that the first dimension of these inputs should correspond to the number of data points
        :param extra_inputs: A list of symbolic variables as extra inputs which should not be subsampled
        :return: No return value.
        """

        inputs = tuple(inputs)
        if extra_inputs is None:
            extra_inputs = tuple()
        else:
            extra_inputs = tuple(extra_inputs)

        constraint_term, constraint_value = leq_constraint

        params = target.get_params(trainable=True)
        grads = theano.grad(loss, wrt=params)
        flat_grad = ext.flatten_tensor_variables(grads)

        constraint_grads = theano.grad(constraint_term, wrt=params)
        xs = tuple([ext.new_tensor_like("%s x" % p.name, p) for p in params])
        Hx_plain_splits = TT.grad(TT.sum([TT.sum(g * x) for g, x in itertools.izip(constraint_grads, xs)]), wrt=params)
        Hx_plain = TT.concatenate([TT.flatten(s) for s in Hx_plain_splits])

        self._target = target
        self._max_constraint_val = constraint_value
        self._constraint_name = constraint_name

        if self._debug_nan:
            from theano.compile.nanguardmode import NanGuardMode

            mode = NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
        else:
            mode = None

        self._opt_fun = ext.lazydict(
            f_loss=lambda: ext.compile_function(
                inputs=inputs + extra_inputs, outputs=loss, log_name="f_loss", mode=mode
            ),
            f_grad=lambda: ext.compile_function(
                inputs=inputs + extra_inputs, outputs=flat_grad, log_name="f_grad", mode=mode
            ),
            f_Hx_plain=lambda: ext.compile_function(
                inputs=inputs + extra_inputs + xs, outputs=Hx_plain, log_name="f_Hx_plain", mode=mode
            ),
            f_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs, outputs=constraint_term, log_name="constraint", mode=mode
            ),
            f_loss_constraint=lambda: ext.compile_function(
                inputs=inputs + extra_inputs, outputs=[loss, constraint_term], log_name="f_loss_constraint", mode=mode
            ),
        )
开发者ID:yenchenlin,项目名称:rllab,代码行数:59,代码来源:conjugate_gradient_optimizer.py


示例7: conv_grad

def conv_grad(mode, bs, ch, nf, rImg1, rImg2, rFlt1, rFlt2, subsample, op):
    ishape = (bs, ch, rImg1, rImg2)
    kshape = (nf, ch, rFlt1, rFlt2)

    npy_img = theano._asarray(numpy.random.rand(*ishape), dtype='float32')
    npy_kern = theano._asarray(numpy.random.rand(*kshape), dtype='float32')

    i = cuda.CudaNdarrayType(
        broadcastable=[sh == 1 for sh in npy_img.shape])()
    k = cuda.CudaNdarrayType(
        broadcastable=[sh == 1 for sh in npy_kern.shape])()

    # TODO: also test custom pad values
    corr_op = op(mode, subsample)(i, k)
    # try to compile reference implementation without shape,
    # so we don't have to compile hundreds of versions
    conv_op = tensor.nnet.conv2d(i, k[:, :, ::-1, ::-1],
                                 border_mode=mode, subsample=subsample)
    try:
        conv_op_di = theano.grad(conv_op.sum(), i)
        conv_op_dk = theano.grad(conv_op.sum(), k)
    except Exception:
        # compile with shape information only when needed
        conv_op = tensor.nnet.conv2d(i, k[:, :, ::-1, ::-1],
                                     ishape, kshape, mode, subsample)
    conv_op_di = theano.grad(conv_op.sum(), i)
    conv_op_dk = theano.grad(conv_op.sum(), k)
    corr_op_di = theano.grad(corr_op.sum(), i)
    corr_op_dk = theano.grad(corr_op.sum(), k)
    outputs = [corr_op, conv_op,
               corr_op_di, conv_op_di,
               corr_op_dk, conv_op_dk]
    try:
        conv_op_dik = theano.grad(conv_op_di.sum(), k)
        conv_op_dki = theano.grad(conv_op_dk.sum(), i)
        corr_op_dik = theano.grad(corr_op_di.sum(), k)
        corr_op_dki = theano.grad(corr_op_dk.sum(), i)
        outputs.extend([corr_op_dik, conv_op_dik,
                        corr_op_dki, conv_op_dki])
    except Exception:
        # skip if the reference implementation can't do it
        pass

    f = theano.function([i, k], outputs, mode=theano_mode.excluding('conv_dnn', 'conv_gemm'))

    allvals = f(npy_img, npy_kern)

    for a, b, oa, ob, p in zip(allvals[::2], allvals[1::2],
                               outputs[::2], outputs[1::2],
                               ('top', 'dtop/dbottom', 'dtop/dweight',
                                'dtop/dbottom/dweight', 'dtop/dweight/dbottom')):
        assert oa.type.broadcastable[:2] == ob.type.broadcastable[:2]

        assert_allclose(a, b, rtol=1e-4)
开发者ID:aalmah,项目名称:Theano,代码行数:54,代码来源:test_conv_cuda_ndarray.py


示例8: oneStep

        def oneStep(w):
            t = rng.choice(size=(1,), a=n)

            loss_part_tilde = objective(getpred(data[t], param), target[t])
            loss_part_tilde = loss_part_tilde.mean()
            g_tilde = theano.grad(loss_part_tilde, param)
        
            loss_part = objective(getpred(data[t], w), target[t])
            loss_part = loss_part.mean()
            g = theano.grad(loss_part, w)

            w = w - learning_rate * (g - g_tilde + mu)
            return w
开发者ID:myt00seven,项目名称:svrg,代码行数:13,代码来源:large_gpu_cifar10_ffn.py


示例9: test_normal_logEI

def test_normal_logEI():
    #rng = np.random.RandomState(123)

    N = 2000
    thresh = np.linspace(-10, 50, N)
    #N = 100
    #thresh = np.linspace(37, 38, N)
    mean = thresh * 0
    var = thresh * 0 + 1

    s_t, s_m, s_v = theano.tensor.dvectors('tmv')

    fn = theano.function([s_t, s_m, s_v],
                         gpr_math.s_normal_logEI(s_t, s_m, s_v))

    if 0:
        #print zip(thresh, fn(thresh, mean, var))
        #print 
        a = theano.tensor.dvector()
        y = s_t ** 2 * a[2] + s_t * a[1] + a[0]
        cost = ((y - gpr_math.s_normal_logEI(s_t, s_m, s_v)) ** 2).sum()
        da = theano.grad(cost, a)
        foo = theano.function([a, s_t, s_m, s_v], [cost, da])
        res = scipy.optimize.minimize(foo, [0, -1, -1], jac=True,
                                      args=(thresh, mean, var),
                                      method='L-BFGS-B')
        print res.x

    from hyperopt.criteria import logEI_gaussian
    if 0:
        import matplotlib.pyplot as plt
        y = fn(thresh, mean, var)
        z = logEI_gaussian(mean, var, thresh)
        plt.plot(thresh, y)
        plt.plot(thresh, z)
        plt.show()

    # -- the gpr_math logEI uses a quadratic approximation for very
    #    hopeless points, which gives the right derivative, but the
    #    slightly wrong value
    assert np.allclose(logEI_gaussian(mean, var, thresh),
                       fn(thresh, mean, var),
                       atol=1e-3, rtol=1e-4)

    if 0:
        d_t = theano.grad(gpr_math.s_normal_logEI(s_t, s_m, s_v).sum(), s_t)
        d_fn = theano.function([s_t, s_m, s_v], d_t)

        import matplotlib.pyplot as plt
        plt.plot(thresh, d_fn(thresh, mean, var))
        plt.show()
开发者ID:gopal-m,项目名称:hyperopt-gpsmbo,代码行数:51,代码来源:test_gpr_math.py


示例10: get_gradients

 def get_gradients(self):
     dot = theano.dot
     _dO = theano.grad(self.netS, self.outputs)
     _b2 = T.sum(_dO, axis=0)
     H = self.layers[-3]
     _dW2 = dot(H.T, _dO)
     _dH = dot(_dO, self.seg.params["W2"].T)
     I = self.layers[0]
     _dA = _dH * (H - H * H)
     _b1 = T.sum(_dA, axis=0)
     _dW1 = dot(I.T, _dA)
     _I = dot(_dA, self.seg.params["W1"].T)
     _C = theano.grad(T.sum(I * _I), self.seg.params["C"])
     return [_C, _dW1, _b1, _dW2, _b2]
开发者ID:sugaton,项目名称:theano_feedforwardNN,代码行数:14,代码来源:wordseg.py


示例11: _collins_grad

 def _collins_grad(scores):
     trans_p = [self.params["A"]]
     net_p = [p for k, p in self.params.items() if k != "A"]
     net_S = [ns for ns, ts in scores]
     trans_S = [ts for ns, ts in scores]
     # transition score updates
     transg = [theano.grad(S, trans_p) for S in trans_S]
     trans_grad = [sum([transg[i][j] for i in range(len(transg))]) / self.batchsize for j in range(len(trans_p))]
     trans_upd = [(p, p + self.alfa[p].getupdate(g)) for p, g in zip(trans_p, trans_grad)]
     # network parameters update
     netsg = [theano.grad(S, net_p) for S in net_S]
     net_grad = [sum([netsg[i][j] for i in range(len(netsg))]) / self.batchsize for j in range(len(net_p))]
     # net_grad = [theano.grad(net_S[i], p) for p in net_p]
     net_upd = [(p, p + self.alfa[p].getupdate(g)) for p, g in zip(net_p, net_grad)]
     return trans_upd + net_upd
开发者ID:sugaton,项目名称:theano_feedforwardNN,代码行数:15,代码来源:wordseg.py


示例12: get_or_compute_grads

def get_or_compute_grads(loss_or_grads, params, regularizers={}):
    """Helper function returning a list of gradients.

    Parameters
    ----------
    loss_or_grads : symbolic expression or list of expressions
        A scalar loss expression, or a list of gradient expressions
    params : list of shared variables
        The variables to return the gradients for
    regularizers : dict 
        'c' : clip_norm(g, c, n)
        'func' : l2 or l1
    Returns
    -------
    list of expressions
        If `loss_or_grads` is a list, it is assumed to be a list of
        gradients and returned as is, unless it does not match the length
        of `params`, in which case a `ValueError` is raised.
        Otherwise, `loss_or_grads` is assumed to be a cost expression and
        the function returns `theano.grad(loss_or_grads, params)`.
    """
    if isinstance(loss_or_grads, list):
        if not len(loss_or_grads) == len(params):
            raise ValueError("Got %d gradient expressions for %d parameters" %
                             (len(loss_or_grads), len(params)))
        return loss_or_grads
    else:
        c = regularizers.get('c', 0.0)
        regularizers_funcs = regularizers.get('func', [])
        if len(regularizers_funcs) == 0 and c == 0.0:
            return theano.grad(loss_or_grads, params)
        else:

            grads = theano.grad(loss_or_grads, params)
            # Max-Norm
            if c > 0:
                norm = T.sqrt(sum([T.sum(g**2) for g in grads]))
                grads = [clip_norm(g, c, norm) for g in grads]

            new_grads = []
            for p, g, r in zip(params, grads, regularizers_funcs):
                if r is None:
                    new_grads.append(g)
                else:
                    # L1 or L2 func
                    new_grads.append(r(g, p))

            return new_grads
开发者ID:abnering,项目名称:dynamic_cnn_for_nlp,代码行数:48,代码来源:sgd.py


示例13: gen_updates_sgd

def gen_updates_sgd(loss, all_parameters, learning_rate):
  
    all_grads = [theano.grad(loss, param) for param in all_parameters]
    updates = []
    for param_i, grad_i in zip(all_parameters, all_grads):
        updates.append((param_i - param_i * learning_rate * grad_i))
    return updates
开发者ID:matsaragas,项目名称:learning,代码行数:7,代码来源:layers.py


示例14: get_partial_diff

 def get_partial_diff(self, differentiable_var_name):
     diff_var = self.var_lookup[differentiable_var_name]
     grad = theano.function(self.variables,
                            theano.grad(self.output_expression,
                                        diff_var),
                            allow_input_downcast=True)
     return self.f, grad
开发者ID:grahamsdoman,项目名称:pysterior,代码行数:7,代码来源:energy.py


示例15: forward_jacobian_log_det

 def forward_jacobian_log_det(self, x):
     y_sum = self.forward_map(x).sum()
     dy_dx = th.grad(y_sum, x)
     if self.fudge != 0.:
         return tt.log(dy_dx + self.fudge).sum()
     else:
         return tt.log(dy_dx).sum()
开发者ID:matt-graham,项目名称:differentiable-generator-networks,代码行数:7,代码来源:invertible_layers.py


示例16: test_grad

    def test_grad(self):
        eps = 1e-7
        f, args, vals = self.get_args()
        output0 = f(*vals)

        # Go through and backpropagate all of the gradients from the outputs
        grad0 = []
        for i in range(len(output0) - 2):
            grad0.append([])
            for j in range(output0[i].size):
                ind = np.unravel_index(j, output0[i].shape)

                g = theano.function(
                    args, theano.grad(self.op(*args)[i][ind], args))
                grad0[-1].append(g(*vals))

        # Loop over each input and numerically compute the gradient
        for k in range(len(vals)):
            for l in range(vals[k].size):
                inner = np.unravel_index(l, vals[k].shape)
                vals[k][inner] += eps
                plus = f(*vals)
                vals[k][inner] -= 2*eps
                minus = f(*vals)
                vals[k][inner] += eps

                # Compare to the backpropagated gradients
                for i in range(len(output0) - 2):
                    for j in range(output0[i].size):
                        ind = np.unravel_index(j, output0[i].shape)
                        delta = 0.5 * (plus[i][ind] - minus[i][ind]) / eps
                        ref = grad0[i][j][k][inner]
                        assert np.abs(delta - ref) < 2*eps, \
                            "{0}".format((k, l, i, j, delta, ref, delta-ref))
开发者ID:dfm,项目名称:exoplanet,代码行数:34,代码来源:solve_test.py


示例17: build_updates_with_micro

def build_updates_with_micro(loss, all_params, learning_rate,  beta1=0.1, beta2=0.001,
                    epsilon=1e-8):
    """ Adam update rule by Kingma and Ba, ICLR 2015. """
    all_grads = theano.grad(loss, all_params)
    updates, micro_updates = [], []
    # all_grads = nn.updates.total_norm_constraint(all_grads, 1)
    t = theano.shared(1) # timestep, for bias correction
    for param_i, grad_i in zip(all_params, all_grads):
        zeros = np.zeros(param_i.get_value(borrow=True).shape, dtype=theano.config.floatX)
        mparam_i = theano.shared(zeros) # 1st moment
        vparam_i = theano.shared(zeros.copy()) # 2nd moment
        sum_grad_i = theano.shared(zeros.copy())

        micro_updates.append((sum_grad_i, sum_grad_i+grad_i))

        grad = sum_grad_i / np.float32(mini_batch_size//batch_size)
        m = beta1 * grad + (1 - beta1) * mparam_i # new value for 1st moment estimate
        v = beta2 * T.sqr(grad) + (1 - beta2) * vparam_i # new value for 2nd moment estimate

        m_unbiased = m / (1 - (1 - beta1) ** t.astype(theano.config.floatX))
        v_unbiased = v / (1 - (1 - beta2) ** t.astype(theano.config.floatX))
        w = param_i - learning_rate * m_unbiased / (T.sqrt(v_unbiased) + epsilon) # new parameter values

        updates.append((mparam_i, m))
        updates.append((vparam_i, v))
        updates.append((param_i, w))
        updates.append((sum_grad_i, zeros.copy()))
    updates.append((learning_rate, learning_rate * (1-learning_rate_decay)))
    updates.append((t, t + 1))

    return updates, micro_updates
开发者ID:lpigou,项目名称:ijcv16,代码行数:31,代码来源:3dlstm.py


示例18: _training_updates

    def _training_updates(self, **kwargs):
        """Returns the update expression for updating the model parameters
        during training. The formula for updating an argument is
            
        .. math:
            
           \theta^{(k+1)} = \theta^{(k)} - learning\_rate * \frac{\partial cost}{\partial \theta} 

        Expects a 'learning_rate' and 'cost' kwarg.
            
        :type learning_rate: theano.config.floatX
        :param learning_rate: The learning rate for parameter updates.
                                  
        :type cost: theano.tensor.TensorType
        :param cost: The cost function of which we are computing
                     the gradient.
                         
        :returns: A list of pairs (parameter, update_expression), to
                  be passed directly to ``theano.function`` as the
                  ``updates`` parameter.
        """
        utils.check_kwargs(kwargs, ['learning_rate', 'cost'])

        learning_rate = kwargs['learning_rate']
        bound_cost = kwargs['cost']

        updates = []
        for param in self.params:
            gradient = theano.grad(cost = bound_cost, wrt = param)
            updates.append((param, param - learning_rate * gradient))

        return updates
开发者ID:hajicj,项目名称:safire,代码行数:32,代码来源:base_supervised_model.py


示例19: gradients_and_updates

    def gradients_and_updates(self, grad_normalize):
        """Compute gradients (t_gparams) using cost and trainable weights (t_params).
        """

        # ------ Compute gradient parameters
        self.t_gparams = OrderedDict({'g_' + k: theano.grad(cost=self.t_outputs['T_cost'], wrt=p)
                                      for k, p in self.t_params.iteritems()})

        # ------ Compute norm and stack it like a vector (to analyze outside)
        # self.out_debug = self.t_gparams['g_T_B']
        self.out_gnorm = T.stack([T.sqrt(T.sum(gp ** 2)) for gp in self.t_gparams.values()])

        # ------ Normalize gradients
        self.g_norm = {}
        if grad_normalize.has_key('max_norm'):      # maximum gradient norm limited
            mn = grad_normalize['max_norm']
            for k in self.t_gparams.keys():
                self.g_norm[k] = T.sqrt(T.sum(self.t_gparams[k] ** 2))
                self.t_gparams[k] = ifel(T.gt(self.g_norm[k], mn),
                                         mn * self.t_gparams[k] / (self.g_norm[k] + 1e-6),
                                         self.t_gparams[k])

        # ------ Update parameters (SGD!)
        self.update_params = []
        for k in self.t_params.keys():
            self.update_params.append([self.t_params[k],
                                       self.t_params[k] - self.t_inputs['T_lr'] * self.t_gparams['g_' + k]])
开发者ID:makarandtapaswi,项目名称:MovieQA_CVPR2016,代码行数:27,代码来源:memN2N_text.py


示例20: _get_rmsprop_updates

	def _get_rmsprop_updates(self, loss, params, lr, grad_momentum
							, sqr_momentum, min_grad):
		# Modified from the Lasagne package:
		# 	https://github.com/Lasagne/Lasagne/blob/master/lasagne/updates.py

		grads = theano.grad(loss, params)
		scale_factor = 1.0
		if self.max_norm > 0:
			scale_factor = self._clip_gradient_norm(grads, self.max_norm)
		updates = OrderedDict()

		# Using theano constant to prevent upcasting of float32
		one = T.constant(1)
		for param, grad in zip(params, grads):
			value = param.get_value(borrow=True)
			accu_sqr = theano.shared(np.zeros(value.shape, dtype=value.dtype),
				broadcastable=param.broadcastable)
			accu_sqr_new = sqr_momentum * accu_sqr + \
							(one - sqr_momentum) * grad ** 2

			accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
				broadcastable=param.broadcastable)
			accu_new = grad_momentum * accu + (one - grad_momentum) * grad

			updates[accu] = accu_new
			updates[accu_sqr] = accu_sqr_new
			updates[param] = param - (lr * grad * scale_factor /
				T.sqrt(accu_sqr_new - accu_new ** 2 + min_grad))
		return updates
开发者ID:dbchiem,项目名称:dqnet,代码行数:29,代码来源:network.py



注:本文中的theano.grad函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python theano.ifelse函数代码示例发布时间:2022-05-27
下一篇:
Python theano.function函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap