• 设为首页
  • 点击收藏
  • 手机版
  • 关注官方公众号

Python array_ops.stop_gradient函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中tensorflow.python.ops.array_ops.stop_gradient函数的典型用法代码示例。如果您正苦于以下问题:Python stop_gradient函数的具体用法?Python stop_gradient怎么用?Python stop_gradient使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


示例1: _logits_cumulative

  def _logits_cumulative(self, inputs, stop_gradient):
    """Evaluate logits of the cumulative densities.

      inputs: The values at which to evaluate the cumulative densities, expected
        to be a `Tensor` of shape `(channels, 1, batch)`.
      stop_gradient: Boolean. Whether to add `array_ops.stop_gradient` calls so
        that the gradient of the output with respect to the density model
        parameters is disconnected (the gradient with respect to `inputs` is
        left untouched).

      A `Tensor` of the same shape as `inputs`, containing the logits of the
      cumulative densities evaluated at the given inputs.
    logits = inputs

    for i in range(len(self.filters) + 1):
      matrix = self._matrices[i]
      if stop_gradient:
        matrix = array_ops.stop_gradient(matrix)
      logits = math_ops.matmul(matrix, logits)

      bias = self._biases[i]
      if stop_gradient:
        bias = array_ops.stop_gradient(bias)
      logits += bias

      if i < len(self._factors):
        factor = self._factors[i]
        if stop_gradient:
          factor = array_ops.stop_gradient(factor)
        logits += factor * math_ops.tanh(logits)

    return logits

示例2: _create_value

  def _create_value(self):
    """Create the value Tensor based on the value type, store as self._value."""

    if isinstance(self._value_type, MeanValue):
      value_tensor = self._dist.mean()
    elif isinstance(self._value_type, SampleValue):
      value_tensor = self._dist.sample(self._value_type.shape)
      raise TypeError("Unrecognized Distribution Value Type: %s",

    if self._value_type.stop_gradient:
      # stop_gradient is being enforced by the value type
      return array_ops.stop_gradient(value_tensor)

    if isinstance(self._value_type, MeanValue):
      return value_tensor  # Using pathwise-derivative for this one.
    if self._dist.is_continuous and (
        is distribution.FULLY_REPARAMETERIZED):
      return value_tensor  # Using pathwise-derivative for this one.
      # Will have to perform some variant of score function
      # estimation.  Call stop_gradient on the sampler just in case we
      # may accidentally leak some gradient from it.
      return array_ops.stop_gradient(value_tensor)

示例3: _run_test

 def _run_test(self, x_, use_deferred_shape=False, **kwargs):
   x_ = np.asarray(x_)
   with self.cached_session() as sess:
     static_shape = None if use_deferred_shape else x_.shape
     x_pl = array_ops.placeholder_with_default(x_, shape=static_shape)
     # Add `zeros_like(x)` such that x's value and gradient are identical. We
     # do this so we can ensure each gradient value is mapped to the right
     # gradient location.  (Not doing this means the gradient wrt `x` is simple
     # `ones_like(x)`.)
     # Note:
     #   zeros_like_x_pl == zeros_like(x_pl)
     #   gradient(zeros_like_x_pl, x_pl) == x_pl - 1
     zeros_like_x_pl = (x_pl * array_ops.stop_gradient(x_pl - 1.)
                        - array_ops.stop_gradient(x_pl * (x_pl - 1.)))
     x = x_pl + zeros_like_x_pl
     actual = du.fill_triangular(x, **kwargs)
     grad_actual = gradients_impl.gradients(actual, x_pl)[0]
     [actual_, grad_actual_] = sess.run([actual, grad_actual],
                                        feed_dict={x_pl: x_})
   expected = self._fill_triangular(x_, **kwargs)
   if use_deferred_shape:
     self.assertEqual(None, actual.shape)
     self.assertAllEqual(expected.shape, actual.shape)
   self.assertAllClose(expected, actual_, rtol=1e-8, atol=1e-9)
   self.assertAllClose(x_, grad_actual_, rtol=1e-8, atol=1e-9)

示例4: compute_spectral_norm

def compute_spectral_norm(w_tensor, power_iteration_rounds=1, name=None):
  """Estimates the largest singular value in the weight tensor.

    w_tensor: The weight matrix whose spectral norm should be computed.
    power_iteration_rounds: The number of iterations of the power method to
      perform. A higher number yields a better approximation.
    name: An optional scope name.

    The largest singular value (the spectral norm) of w.
  with variable_scope.variable_scope(name, 'spectral_norm'):
    # The paper says to flatten convnet kernel weights from
    # (C_out, C_in, KH, KW) to (C_out, C_in * KH * KW). But TensorFlow's Conv2D
    # kernel weight shape is (KH, KW, C_in, C_out), so it should be reshaped to
    # (KH * KW * C_in, C_out), and similarly for other layers that put output
    # channels as last dimension.
    # n.b. this means that w here is equivalent to w.T in the paper.
    w = array_ops.reshape(w_tensor, (-1, w_tensor.get_shape()[-1]))

    # Persisted approximation of first left singular vector of matrix `w`.
    u_var = variable_scope.get_variable(
        shape=(w.shape[0], 1),
    u = u_var

    # Use power iteration method to approximate spectral norm.
    for _ in range(power_iteration_rounds):
      # `v` approximates the first right singular vector of matrix `w`.
      v = nn.l2_normalize(math_ops.matmul(array_ops.transpose(w), u))
      u = nn.l2_normalize(math_ops.matmul(w, v))

    # Update persisted approximation.
    with ops.control_dependencies([u_var.assign(u, name='update_u')]):
      u = array_ops.identity(u)

    u = array_ops.stop_gradient(u)
    v = array_ops.stop_gradient(v)

    # Largest singular value of `w`.
    spectral_norm = math_ops.matmul(
        math_ops.matmul(array_ops.transpose(u), w), v)
    spectral_norm.shape.assert_is_compatible_with([1, 1])

    return spectral_norm[0][0]

示例5: _MakeGraph

    def _MakeGraph(rng, stop_gradients=()):
      def _FunctionOf(xs, k=3):
        return ops.convert_to_tensor(
            sum(math_ops.matmul(rng.rand(k, k), x) for x in xs)
            + rng.rand(k, k))

      a = _FunctionOf([])
      if "a" in stop_gradients: a = array_ops.stop_gradient(a)
      b = _FunctionOf([a])
      if "b" in stop_gradients: b = array_ops.stop_gradient(b)
      c = _FunctionOf([a, b])
      if "c" in stop_gradients: c = array_ops.stop_gradient(c)
      d = _FunctionOf([b, c])
      if "d" in stop_gradients: d = array_ops.stop_gradient(d)
      return dict(a=a, b=b, c=c, d=d)

示例6: loop_function

  def loop_function(prev, i, log_beam_probs, beam_path, beam_symbols):
    if output_projection is not None:
      prev = nn_ops.xw_plus_b(
          prev, output_projection[0], output_projection[1])
    # prev= prev.get_shape().with_rank(2)[1]

    probs  = tf.log(tf.nn.softmax(prev))

    if i > 1:

        probs = tf.reshape(probs + log_beam_probs[-1],
                               [-1, beam_size * num_symbols])

    best_probs, indices = tf.nn.top_k(probs, beam_size)
    indices = tf.stop_gradient(tf.squeeze(tf.reshape(indices, [-1, 1])))
    best_probs = tf.stop_gradient(tf.reshape(best_probs, [-1, 1]))

    symbols = indices % num_symbols # Which word in vocabulary.
    beam_parent = indices // num_symbols # Which hypothesis it came from.


    # Note that gradients will not propagate through the second parameter of
    # embedding_lookup.

    emb_prev = embedding_ops.embedding_lookup(embedding, symbols)
    emb_prev  = tf.reshape(emb_prev,[beam_size,embedding_size])
    # emb_prev = embedding_ops.embedding_lookup(embedding, symbols)
    if not update_embedding:
      emb_prev = array_ops.stop_gradient(emb_prev)
    return emb_prev

示例7: _statistics

def _statistics(x, axes):
  """Calculate the mean and mean square of `x`.

  Modified from the implementation of `tf.nn.moments`.

    x: A `Tensor`.
    axes: Array of ints.  Axes along which to compute mean and

    Two `Tensor` objects: `mean` and `square mean`.
  # The dynamic range of fp16 is too limited to support the collection of
  # sufficient statistics. As a workaround we simply perform the operations
  # on 32-bit floats before converting the mean and variance back to fp16
  y = math_ops.cast(x, dtypes.float32) if x.dtype == dtypes.float16 else x

  # Compute true mean while keeping the dims for proper broadcasting.
  shift = array_ops.stop_gradient(math_ops.reduce_mean(y, axes, keepdims=True))

  shifted_mean = math_ops.reduce_mean(y - shift, axes, keepdims=True)
  mean = shifted_mean + shift
  mean_squared = math_ops.reduce_mean(math_ops.square(y), axes, keepdims=True)

  mean = array_ops.squeeze(mean, axes)
  mean_squared = array_ops.squeeze(mean_squared, axes)
  if x.dtype == dtypes.float16:
    return (math_ops.cast(mean, dtypes.float16),
            math_ops.cast(mean_squared, dtypes.float16))
    return (mean, mean_squared)

示例8: score_function

def score_function(stochastic_tensor, value, loss, baseline=None,
  """Score function estimator.

  Computes the integrand of the score function with a baseline:
  `p.log_prob(value) * (loss - baseline)`.

  It will add a `stop_gradient` to the advantage `(loss - baseline)`.

    stochastic_tensor: `StochasticTensor` p(x).
    value: `Tensor` x. Samples from p(x).
    loss: `Tensor`.
    baseline: `Tensor` broadcastable to `loss`.
    name: name to prepend ops with.

    `Tensor` `p.log_prob(x) * (loss - b)`. Taking the gradient yields the score
    function estimator.
  with ops.name_scope(name, values=[value, loss, baseline]):
    value = ops.convert_to_tensor(value)
    loss = ops.convert_to_tensor(loss)
    if baseline is not None:
      baseline = ops.convert_to_tensor(baseline)
      advantage = loss - baseline
      advantage = loss

    advantage = array_ops.stop_gradient(advantage)
    return stochastic_tensor.distribution.log_prob(value) * advantage

示例9: surrogate_loss

def surrogate_loss(sample_losses,
  """Surrogate loss for stochastic graphs.

  This function will call `loss_fn` on each `StochasticTensor`
  upstream of `sample_losses`, passing the losses that it influenced.

  Note that currently `surrogate_loss` does not work with `StochasticTensor`s
  instantiated in `while_loop`s or other control structures.

    sample_losses: a list or tuple of final losses. Each loss should be per
      example in the batch (and possibly per sample); that is, it should have
      dimensionality of 1 or greater. All losses should have the same shape.
    stochastic_tensors: a list of `StochasticTensor`s to add loss terms for.
      If None, defaults to all `StochasticTensor`s in the graph upstream of
      the `Tensor`s in `sample_losses`.
    name: the name with which to prepend created ops.

    `Tensor` loss, which is the sum of `sample_losses` and the
    `loss_fn`s returned by the `StochasticTensor`s.

    TypeError: if `sample_losses` is not a list or tuple, or if its elements
      are not `Tensor`s.
    ValueError: if any loss in `sample_losses` does not have dimensionality 1
      or greater.
  with ops.op_scope(sample_losses, name):
    fixed_losses = []
    if not isinstance(sample_losses, (list, tuple)):
      raise TypeError("sample_losses must be a list or tuple")
    for loss in sample_losses:
      if not isinstance(loss, ops.Tensor):
        raise TypeError("loss is not a Tensor: %s" % loss)
      ndims = loss.get_shape().ndims
      if not (ndims is not None and ndims >= 1):
        raise ValueError("loss must have dimensionality 1 or greater: %s" %

    stoch_dependencies_map = _stochastic_dependencies_map(
        fixed_losses, stochastic_tensors=stochastic_tensors)
    if not stoch_dependencies_map:
          "No collection of Stochastic Tensors found for current graph.")
      return math_ops.add_n(sample_losses)

    # Iterate through all of the stochastic dependencies, adding
    # surrogate terms where necessary.
    sample_losses = [ops.convert_to_tensor(loss) for loss in sample_losses]
    loss_terms = sample_losses
    for (stoch_node, dependent_losses) in stoch_dependencies_map.items():
      loss_term = stoch_node.loss(list(dependent_losses))
      if loss_term is not None:

    return math_ops.add_n(loss_terms)

示例10: resample_at_rate

def resample_at_rate(inputs, rates, scope=None, seed=None, back_prop=False):
  """Given `inputs` tensors, stochastically resamples each at a given rate.

  For example, if the inputs are `[[a1, a2], [b1, b2]]` and the rates
  tensor contains `[3, 1]`, then the return value may look like `[[a1,
  a2, a1, a1], [b1, b2, b1, b1]]`. However, many other outputs are
  possible, since this is stochastic -- averaged over many repeated
  calls, each set of inputs should appear in the output `rate` times
  the number of invocations.

    inputs: A list of tensors, each of which has a shape of `[batch_size, ...]`
    rates: A tensor of shape `[batch_size]` contiaining the resampling rates
       for each input.
    scope: Scope for the op.
    seed: Random seed to use.
    back_prop: Whether to allow back-propagation through this op.

    Selections from the input tensors.
  with ops.name_scope(scope, default_name='resample_at_rate',
                      values=list(inputs) + [rates]):
    rates = ops.convert_to_tensor(rates, name='rates')
    # random_poisson does not support rates of size 0 (b/36076216)
    sample_counts = math_ops.cast(control_flow_ops.cond(
        array_ops.shape(rates)[0] > 0,
        lambda: random_ops.random_poisson(rates, (), rates.dtype, seed=seed),
        lambda: array_ops.zeros(shape=[0], dtype=rates.dtype)), dtypes.int32)
    sample_indices = _repeat_range(sample_counts)
    if not back_prop:
      sample_indices = array_ops.stop_gradient(sample_indices)
    return [array_ops.gather(x, sample_indices) for x in inputs]

示例11: _tree_train_op_fn

    def _tree_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      if dnn_to_tree_distillation_param:
        loss_weight, loss_fn = dnn_to_tree_distillation_param
        weight_tensor = head_lib._weight_tensor(  # pylint: disable=protected-access
            features, head.weight_column_name)
        dnn_logits_fixed = array_ops.stop_gradient(dnn_logits)

        if loss_fn is None:
          # we create the loss_fn similar to the head loss_fn for
          # multi_class_head used previously as the default one.
          n_classes = 2 if head.logits_dimension == 1 else head.logits_dimension
          loss_fn = distillation_loss.create_dnn_to_tree_cross_entropy_loss_fn(

        dnn_to_tree_distillation_loss = loss_weight * loss_fn(
            dnn_logits_fixed, tree_logits, weight_tensor)
        loss += dnn_to_tree_distillation_loss

      update_op = gbdt_model.train(loss, predictions_dict, labels)
      with ops.control_dependencies(
          [update_op]), (ops.colocate_with(global_step)):
        update_op = state_ops.assign_add(global_step, 1).op
        return update_op

示例12: _logspace_mean

def _logspace_mean(log_values):
  """Evaluate `Log[E[values]]` in a stable manner.

    log_values:  `Tensor` holding `Log[values]`.

    `Tensor` of same `dtype` as `log_values`, reduced across dim 0.
  # center = Max[Log[values]],  with stop-gradient
  # The center hopefully keep the exponentiated term small.  It is cancelled
  # from the final result, so putting stop gradient on it will not change the
  # final result.  We put stop gradient on to eliminate unnecessary computation.
  center = array_ops.stop_gradient(_sample_max(log_values))

  # centered_values = exp{Log[values] - E[Log[values]]}
  centered_values = math_ops.exp(log_values - center)

  # log_mean_of_values = Log[ E[centered_values] ] + center
  #                    = Log[ E[exp{log_values - E[log_values]}] ] + center
  #                    = Log[E[values]] - E[log_values] + center
  #                    = Log[E[values]]
  log_mean_of_values = math_ops.log(_sample_mean(centered_values)) + center

  return log_mean_of_values

示例13: _AvgPoolGradGrad

def _AvgPoolGradGrad(op, grad):
  return (array_ops.stop_gradient(op.inputs[0]), gen_nn_ops._avg_pool(

示例14: additional_score_function_losses

def additional_score_function_losses(sample_losses, name=None):
  with ops.op_scope(sample_losses, name, "SampleLosses"):
    fixed_losses = []
    if not isinstance(sample_losses, (list, tuple)):
      raise TypeError("sample_losses must be a list or tuple")
    for loss in sample_losses:
      if not isinstance(loss, ops.Tensor):
        raise TypeError("loss is not a Tensor: %s" % loss)
      ndims = loss.get_shape().ndims
      if not (ndims is not None and ndims <= 1):
        raise ValueError(
            "loss must be a scalar or batch-length vector loss: %s" % loss)

    stoch_dependencies_map = _stochastic_dependencies_map(fixed_losses)
    if not stoch_dependencies_map:
          "No collection of Stochastic Tensors found for current graph.")
      return []

    score_function_losses = []

    # Iterate through all of the stochastic dependencies, adding
    # surrogate terms where necessary.
    for (stoch_node, dependent_losses) in stoch_dependencies_map.items():
      score_function = stoch_node.score_function(list(dependent_losses))
      if score_function is not None:
        with ops.name_scope("ScoreFunction_%s" % stoch_node.name):

    return score_function_losses

示例15: extract_argmax_and_embed

 def extract_argmax_and_embed(prev, _):
   """Loop_function that extracts the symbol from prev and embeds it."""
   if output_projection is not None:
     prev = nn_ops.xw_plus_b(
         prev, output_projection[0], output_projection[1])
   prev_symbol = array_ops.stop_gradient(math_ops.argmax(prev, 1))
   return embedding_ops.embedding_lookup(embedding, prev_symbol)

示例16: softmax_cross_entropy

def softmax_cross_entropy(
    onehot_labels, logits, weights=1.0, label_smoothing=0, scope=None,
  """Creates a cross-entropy loss using tf.nn.softmax_cross_entropy_with_logits.

  `weights` acts as a coefficient for the loss. If a scalar is provided,
  then the loss is simply scaled by the given value. If `weights` is a
  tensor of shape `[batch_size]`, then the loss weights apply to each
  corresponding sample.

  If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
      new_onehot_labels = onehot_labels * (1 - label_smoothing)
                          + label_smoothing / num_classes

    onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels.
    logits: `[batch_size, num_classes]` logits outputs of the network .
    weights: Optional `Tensor` whose rank is either 0, or rank 1 and is
      broadcastable to the loss which is a `Tensor` of shape `[batch_size]`.
    label_smoothing: If greater than 0 then smooth the labels.
    scope: the scope for the operations performed in computing the loss.
    loss_collection: collection to which the loss will be added.
    reduction: Type of reduction to apply to loss.

    Weighted loss `Tensor` of the same type as `logits`. If `reduction` is
    `NONE`, this has shape `[batch_size]`; otherwise, it is scalar.

    ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
      or if the shape of `weights` is invalid or if `weights` is None.  Also if
      `onehot_labels` or `logits` is None.
  if onehot_labels is None:
    raise ValueError("onehot_labels must not be None.")
  if logits is None:
    raise ValueError("logits must not be None.")
  with ops.name_scope(scope, "softmax_cross_entropy_loss",
                      (logits, onehot_labels, weights)) as scope:
    logits = ops.convert_to_tensor(logits)
    onehot_labels = math_ops.cast(onehot_labels, logits.dtype)

    if label_smoothing > 0:
      num_classes = math_ops.cast(
          array_ops.shape(onehot_labels)[1], logits.dtype)
      smooth_positives = 1.0 - label_smoothing
      smooth_negatives = label_smoothing / num_classes
      onehot_labels = onehot_labels * smooth_positives + smooth_negatives

    onehot_labels = array_ops.stop_gradient(
        onehot_labels, name="labels_stop_gradient")
    losses = nn.softmax_cross_entropy_with_logits_v2(
        labels=onehot_labels, logits=logits, name="xentropy")

    return compute_weighted_loss(
        losses, weights, scope, loss_collection, reduction=reduction)

示例17: evaluate

 def evaluate(self):
   """Evaluate the loss function on the targets."""
   if self.targets is not None:
     # We treat the targets as "constant".  It's only the inputs that get
     # "back-propped" through.
     return self._evaluate(array_ops.stop_gradient(self.targets))
     raise Exception("Cannot evaluate losses with unspecified targets.")

示例18: _rev_layer_backward

def _rev_layer_backward(ys, grad_ys, f, g, f_vars, f_side_input, g_vars,
  """Backprop for 1 layer."""
  y1, y2 = ys
  grad_y1, grad_y2 = grad_ys

  # Reconstruct intermediates and inputs (x1, x2)
  # stop_gradients required on fn inputs to prevent infinite recursion into this
  # grad function on the calls to gradients.
  y1_stop = array_ops.stop_gradient(y1)
  g_side_input = [array_ops.stop_gradient(t) for t in g_side_input]
  gy1 = g(y1_stop, g_side_input) if g_side_input else g(y1_stop)

  x2 = y2 - gy1
  x2_stop = array_ops.stop_gradient(x2)
  f_side_input = [array_ops.stop_gradient(t) for t in f_side_input]
  fx2 = f(x2_stop, f_side_input) if f_side_input else f(x2_stop)

  x1 = y1 - fx2

  # Compute gradients wrt to inputs
  # dL/dy2 * dG(y1)/y1
  grad_gy1_y2 = gradients_impl.gradients(gy1, y1_stop, grad_y2)[0]
  grad_x1 = grad_y1 + grad_gy1_y2
  grad_x2 = (
      gradients_impl.gradients(fx2, x2_stop, grad_y1)[0] + grad_y2 +
      gradients_impl.gradients(fx2, x2_stop, grad_gy1_y2)[0])

  # Compute gradients wrt to vars and side inputs in f and g
  grads1 = gradients_impl.gradients(gy1, g_vars + g_side_input, grad_y2)
  grad_g_vars, grad_g_side = grads1[:len(g_vars)], grads1[len(g_vars):]
  grads2 = gradients_impl.gradients(fx2, f_vars + f_side_input, grad_y1)
  grad_f_y1, grad_f_side1 = grads2[:len(f_vars)], grads2[len(f_vars):]
  grads3 = gradients_impl.gradients(fx2, f_vars + f_side_input, grad_gy1_y2)
  grad_f_y2, grad_f_side2 = grads3[:len(f_vars)], grads3[len(f_vars):]
  grad_f_vars = _acc_grads(grad_f_y1, grad_f_y2)

  grad_f_side = _acc_grads(grad_f_side1, grad_f_side2)

  # Put returns in a tuple to ensure a constant memory budget (i.e. don't want
  # the subsequent layer to start computing and consuming memory based on a
  # subset of these values).
  outputs = ((x1, x2), (grad_x1, grad_x2), (grad_f_vars, grad_f_side),
             (grad_g_vars, grad_g_side))
  tupled = control_flow_ops.tuple(nest.flatten(outputs))
  return nest.pack_sequence_as(outputs, tupled)

示例19: _create_value

  def _create_value(self):
    """Create the value Tensor based on the value type, store as self._value."""

    if isinstance(self._value_type, MeanValue):
      value_tensor = self._dist.mean()
    elif isinstance(self._value_type, SampleValue):
      value_tensor = self._dist.sample(self._value_type.n)
    elif isinstance(self._value_type, SampleAndReshapeValue):
      if self._value_type.n == 1:
        value_tensor = array_ops.squeeze(self._dist.sample(1), [0])
        samples = self._dist.sample(self._value_type.n)
        samples_shape = array_ops.shape(samples)
        samples_static_shape = samples.get_shape()
        new_batch_size = samples_shape[0] * samples_shape[1]
        value_tensor = array_ops.reshape(
            samples, array_ops.concat(0, ([new_batch_size], samples_shape[2:])))
        if samples_static_shape.ndims is not None:
          # Update the static shape for shape inference purposes
          shape_list = samples_static_shape.as_list()
          new_shape = tensor_shape.vector(
              shape_list[0] * shape_list[1]
              if shape_list[0] is not None and shape_list[1] is not None
              else None)
          new_shape = new_shape.concatenate(samples_static_shape[2:])
      raise TypeError(
          "Unrecognized Distribution Value Type: %s", self._value_type)

    stop_gradient = self._value_type.stop_gradient

    if stop_gradient:
      # stop_gradient is being enforced by the value type
      return array_ops.stop_gradient(value_tensor)

    if isinstance(self._value_type, MeanValue):
      return value_tensor  # Using pathwise-derivative for this one.
    if (isinstance(self._dist, distributions.ContinuousDistribution)
        and self._dist.is_reparameterized):
      return value_tensor  # Using pathwise-derivative for this one.
      # Will have to perform some variant of score function
      # estimation.  Call stop_gradient on the sampler just in case we
      # may accidentally leak some gradient from it.
      return array_ops.stop_gradient(value_tensor)

示例20: rnn_decoder

def rnn_decoder(decoder_inputs, initial_state, cell, loop_function=None,
  """RNN decoder for the sequence-to-sequence model.

    decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    initial_state: 2D Tensor with shape [batch_size x cell.state_size].
    cell: RNNCell defining the cell function and size.
    loop_function: if not None, this function will be applied to i-th output
      in order to generate i+1-th input, and decoder_inputs will be ignored,
      except for the first element ("GO" symbol). This can be used for decoding,
      but also for training to emulate http://arxiv.org/pdf/1506.03099v2.pdf.
      Signature -- loop_function(prev, i) = next
        * prev is a 2D Tensor of shape [batch_size x cell.output_size],
        * i is an integer, the step number (when advanced control is needed),
        * next is a 2D Tensor of shape [batch_size x cell.input_size].
    scope: VariableScope for the created subgraph; defaults to "rnn_decoder".

    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x cell.output_size] containing generated outputs.
    states: The state of each cell in each time-step. This is a list with
      length len(decoder_inputs) -- one item for each time-step.
      Each item is a 2D Tensor of shape [batch_size x cell.state_size].
      (Note that in some cases, like basic RNN cell or GRU cell, outputs and
       states can be the same. They are different for LSTM cells though.)
  with vs.variable_scope(scope or "rnn_decoder"):
    states = [initial_state]
    outputs = []
    prev = None
    for i in xrange(len(decoder_inputs)):
      inp = decoder_inputs[i]
      if loop_function is not None and prev is not None:
        with vs.variable_scope("loop_function", reuse=True):
          # We do not propagate gradients over the loop function.
          inp = array_ops.stop_gradient(loop_function(prev, i))
      if i > 0:
      output, new_state = cell(inp, states[-1])
      if loop_function is not None:
        prev = array_ops.stop_gradient(output)
  return outputs, states









Python array_ops.strided_slice函数代码示例发布时间:2022-05-27
Python array_ops.stack函数代码示例发布时间:2022-05-27





在线客服(服务时间 9:00~18:00)


Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap