Python math_ops.tanh函数代码示例

本文整理汇总了Python中tensorflow.python.ops.math_ops.tanh函数的典型用法代码示例。


示例1: body

    def body(i, prev_c, prev_h, actions, log_probs):
      # pylint: disable=g-long-lambda
      signal = control_flow_ops.cond(
          math_ops.equal(i, 0),
          lambda: array_ops.tile(device_go_embedding,
                                 [self.hparams.num_children, 1]),
          lambda: embedding_ops.embedding_lookup(device_embeddings,
                                                 actions.read(i - 1))
      if self.hparams.keep_prob is not None:
        signal = nn_ops.dropout(signal, self.hparams.keep_prob)
      next_c, next_h = lstm(signal, prev_c, prev_h, w_lstm, forget_bias)
      query = math_ops.matmul(next_h, attn_w_2)
      query = array_ops.reshape(
          query, [self.hparams.num_children, 1, self.hparams.hidden_size])
      query = math_ops.tanh(query + attn_mem)
      query = array_ops.reshape(query, [
          self.hparams.num_children * self.num_groups, self.hparams.hidden_size
      query = math_ops.matmul(query, attn_v)
      query = array_ops.reshape(query,
                                [self.hparams.num_children, self.num_groups])
      query = nn_ops.softmax(query)
      query = array_ops.reshape(query,
                                [self.hparams.num_children, self.num_groups, 1])
      query = math_ops.reduce_sum(attn_mem * query, axis=1)
      query = array_ops.concat([next_h, query], axis=1)
      logits = math_ops.matmul(query, device_softmax)
      logits /= self.hparams.temperature
      if self.hparams.tanh_constant > 0:
        logits = math_ops.tanh(logits) * self.hparams.tanh_constant
      if self.hparams.logits_std_noise > 0:
        num_in_logits = math_ops.cast(
            array_ops.size(logits), dtype=dtypes.float32)
        avg_norm = math_ops.divide(
            linalg_ops.norm(logits), math_ops.sqrt(num_in_logits))
        logits_noise = random_ops.random_normal(
            stddev=self.hparams.logits_std_noise * avg_norm)
        logits = control_flow_ops.cond(
            self.global_step > self.hparams.stop_noise_step, lambda: logits,
            lambda: logits + logits_noise)

      if mode == "sample":
        next_y = random_ops.multinomial(logits, 1, seed=self.hparams.seed)
      elif mode == "greedy":
        next_y = math_ops.argmax(logits, 1)
      elif mode == "target":
        next_y = array_ops.slice(y, [0, i], [-1, 1])
        raise NotImplementedError
      next_y = math_ops.to_int32(next_y)
      next_y = array_ops.reshape(next_y, [self.hparams.num_children])
      actions = actions.write(i, next_y)
      log_probs += nn_ops.sparse_softmax_cross_entropy_with_logits(
          logits=logits, labels=next_y)
      return i + 1, next_c, next_h, actions, log_probs

示例2: LSTMCell

 def LSTMCell(cls, x, mprev, cprev, weights):
   xm = array_ops.concat([x, mprev], 1)
   i_i, i_g, f_g, o_g = array_ops.split(
       value=math_ops.matmul(xm, weights), num_or_size_splits=4, axis=1)
   new_c = math_ops.sigmoid(f_g) * cprev + math_ops.sigmoid(
       i_g) * math_ops.tanh(i_i)
   new_c = clip_ops.clip_by_value(new_c, -50.0, 50.0)
   new_m = math_ops.sigmoid(o_g) * math_ops.tanh(new_c)
   return new_m, new_c

示例3: _bahdanau_score

def _bahdanau_score(processed_query, keys, normalize):
  """Implements Bahdanau-style (additive) scoring function.

  This attention has two forms.  The first is Bhandanau attention,
  as described in:

  Dzmitry Bahdanau, Kyunghyun Cho, Yoshua Bengio.
  "Neural Machine Translation by Jointly Learning to Align and Translate."
  ICLR 2015. https://arxiv.org/abs/1409.0473

  The second is the normalized form.  This form is inspired by the
  weight normalization article:

  Tim Salimans, Diederik P. Kingma.
  "Weight Normalization: A Simple Reparameterization to Accelerate
   Training of Deep Neural Networks."

  To enable the second form, set `normalize=True`.

    processed_query: Tensor, shape `[batch_size, num_units]` to compare to keys.
    keys: Processed memory, shape `[batch_size, max_time, num_units]`.
    normalize: Whether to normalize the score function.

    A `[batch_size, max_time]` tensor of unnormalized score values.
  dtype = processed_query.dtype
  # Get the number of hidden units from the trailing dimension of keys
  num_units = keys.shape[2].value or array_ops.shape(keys)[2]
  # Reshape from [batch_size, ...] to [batch_size, 1, ...] for broadcasting.
  processed_query = array_ops.expand_dims(processed_query, 1)
  v = variable_scope.get_variable(
      "attention_v", [num_units], dtype=dtype)
  if normalize:
    # Scalar used in weight normalization
    g = variable_scope.get_variable(
        "attention_g", dtype=dtype,
        initializer=math.sqrt((1. / num_units)))
    # Bias added prior to the nonlinearity
    b = variable_scope.get_variable(
        "attention_b", [num_units], dtype=dtype,
    # normed_v = g * v / ||v||
    normed_v = g * v * math_ops.rsqrt(
    return math_ops.reduce_sum(
        normed_v * math_ops.tanh(keys + processed_query + b), [2])
    return math_ops.reduce_sum(v * math_ops.tanh(keys + processed_query), [2])

示例4: attention

    def attention(decoder_state, coverage=None):
      """Calculate the context vector and attention distribution from the decoder state.

        decoder_state: state of the decoder
        coverage: Optional. Previous timestep's coverage vector, shape (batch_size, attn_len, 1, 1).

        context_vector: weighted sum of encoder_states
        attn_dist: attention distribution
        coverage: new coverage vector. shape (batch_size, attn_len, 1, 1)
      with variable_scope.variable_scope("Attention"):
        # Pass the decoder state through a linear layer (this is W_s s_t + b_attn in the paper)
        decoder_features = linear(decoder_state, attention_vec_size, True) # shape (batch_size, attention_vec_size)
        decoder_features = tf.expand_dims(tf.expand_dims(decoder_features, 1), 1) # reshape to (batch_size, 1, 1, attention_vec_size)

        def masked_attention(e):
          """Take softmax of e then apply enc_padding_mask and re-normalize"""
          attn_dist = nn_ops.softmax(e) # take softmax. shape (batch_size, attn_length)
          attn_dist *= enc_padding_mask # apply mask
          masked_sums = tf.reduce_sum(attn_dist, axis=1) # shape (batch_size)
          return attn_dist / tf.reshape(masked_sums, [-1, 1]) # re-normalize

        if use_coverage and coverage is not None: # non-first step of coverage
          # Multiply coverage vector by w_c to get coverage_features.
          coverage_features = nn_ops.conv2d(coverage, w_c, [1, 1, 1, 1], "SAME") # c has shape (batch_size, attn_length, 1, attention_vec_size)

          # Calculate v^T tanh(W_h h_i + W_s s_t + w_c c_i^t + b_attn)
          e = math_ops.reduce_sum(v * math_ops.tanh(encoder_features + decoder_features + coverage_features), [2, 3])  # shape (batch_size,attn_length)

          # Calculate attention distribution
          attn_dist = masked_attention(e)

          # Update coverage vector
          coverage += array_ops.reshape(attn_dist, [batch_size, -1, 1, 1])
          # Calculate v^T tanh(W_h h_i + W_s s_t + b_attn)
          e = math_ops.reduce_sum(v * math_ops.tanh(encoder_features + decoder_features), [2, 3]) # calculate e

          # Calculate attention distribution
          attn_dist = masked_attention(e)

          if use_coverage: # first step of training
            coverage = tf.expand_dims(tf.expand_dims(attn_dist,2),2) # initialize coverage

        # Calculate the context vector from attn_dist and encoder_states
        context_vector = math_ops.reduce_sum(array_ops.reshape(attn_dist, [batch_size, -1, 1, 1]) * encoder_states, [1, 2]) # shape (batch_size, attn_size).
        context_vector = array_ops.reshape(context_vector, [-1, attn_size])

      return context_vector, attn_dist, coverage

示例5: __call__

  def __call__(self, inputs, state, scope=None):
    """Long short-term memory cell (LSTM)."""
    with vs.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
      # Parameters of gates are concatenated into one multiply for efficiency.
      c, h = array_ops.split(1, 2, state)
      concat = linear([inputs, h], 4 * self._num_units, True)

      # i = input_gate, j = new_input, f = forget_gate, o = output_gate
      i, j, f, o = array_ops.split(1, 4, concat)

      new_c = c * sigmoid(f + self._forget_bias) + sigmoid(i) * tanh(j)
      new_h = tanh(new_c) * sigmoid(o)

      return new_h, array_ops.concat(1, [new_c, new_h])

示例6: attention

 def attention(query):
   """Put attention masks on hidden using hidden_features and query."""
   ds = []  # Results of attention reads will be stored here.
   if nest.is_sequence(query):  # If the query is a tuple, flatten it.
     query_list = nest.flatten(query)
     for q in query_list:  # Check that ndims == 2 if specified.
       ndims = q.get_shape().ndims
       if ndims:
         assert ndims == 2
     query = array_ops.concat(1, query_list)
   for i in xrange(num_heads):
     with variable_scope.variable_scope("Attention_%d" % i):                  
       y = linear(query, attention_vec_size, True)
       y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
       # Attention mask is a softmax of v^T * tanh(...).
       s = math_ops.reduce_sum(
           v[i] * math_ops.tanh(hidden_features[i] + y), [2, 3])
       # multiply with source mask, then do softmax
       if src_mask is not None:
         s = s * src_mask
       a = nn_ops.softmax(s)
       # Now calculate the attention-weighted vector d.
       d = math_ops.reduce_sum(
           array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
           [1, 2])                  
       ds.append(array_ops.reshape(d, [-1, attn_size]))
   return ds            

示例7: embed

  def embed(self, func, embedding_classes, embedding_size, inputs, dtype=None, scope=None,
            keep_prob=1.0, initializer=None):
    embedder_cell = func(self._cell, embedding_classes, embedding_size, initializer=initializer)

    # Like rnn(..) in rnn.py, but we call only the Embedder, not the RNN cell
    outputs = []
    with vs.variable_scope(scope or "Embedder") as varscope:
      if varscope.caching_device is None:
        varscope.set_caching_device(lambda op: op.device)

      for time, input_ in enumerate(inputs):
        if time > 0: vs.get_variable_scope().reuse_variables()
        embedding = embedder_cell.__call__(input_, scope)
        if keep_prob < 1:
          embedding = tf.nn.dropout(embedding, keep_prob)

        # annotation = C~_t = tanh ( E(x_t) + b_c)
        b_c = tf.get_variable("annotation_b", [embedding_size])
        annotation = tanh(tf.nn.bias_add(embedding, b_c))

        # weighted annotation = i_t * C~_t
        # i = sigmoid ( E(x_t) + b_i)
        b_i = tf.get_variable("input_b", [embedding_size])
        i = sigmoid(tf.nn.bias_add(embedding, b_i))
        w_annotation = i * annotation

      # return empty state, will be initialized by decoder
      batch_size = array_ops.shape(inputs[0])[0]
      state = self._cell.zero_state(batch_size, dtype)
      return (outputs, state)

示例8: attention

 def attention(query, use_attention=False):
   """Put attention masks on hidden using hidden_features and query."""
   attn_weights = []
   ds = []  # Results of attention reads will be stored here.
   for i in xrange(num_heads):
     with variable_scope.variable_scope("Attention_%d" % i):
       y = rnn_cell._linear(query, attention_vec_size, True)
       y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
       # Attention mask is a softmax of v^T * tanh(...).
       s = math_ops.reduce_sum(
           v[i] * math_ops.tanh(hidden_features[i] + y), [2, 3])
       if use_attention is False: # apply mean pooling
           weights = tf.tile(sequence_length, tf.stack([attn_length]))
           weights = array_ops.reshape(weights, tf.shape(s))
           a = array_ops.ones(tf.shape(s), dtype=dtype) / math_ops.to_float(weights)
           # a = array_ops.ones(tf.shape(s), dtype=dtype) / math_ops.to_float(tf.shape(s)[1])
         a = nn_ops.softmax(s)
       # Now calculate the attention-weighted vector d.
       d = math_ops.reduce_sum(
           array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
           [1, 2])
       ds.append(array_ops.reshape(d, [-1, attn_size]))
   return attn_weights, ds

示例9: call

  def call(self, inputs, state):
    (c_prev, m_prev) = state
    self._batch_size = inputs.shape[0].value or array_ops.shape(inputs)[0]
    scope = vs.get_variable_scope()
    with vs.variable_scope(scope, initializer=self._initializer):
      x = array_ops.concat([inputs, m_prev], axis=1)
      with vs.variable_scope("first_gemm"):
        if self._linear1 is None:
          # no bias for bottleneck
          self._linear1 = _Linear(x, self._fact_size, False)
        R_fact = self._linear1(x)
      with vs.variable_scope("second_gemm"):
        if self._linear2 is None:
          self._linear2 = _Linear(R_fact, 4*self._num_units, True)
        R = self._linear2(R_fact)
      i, j, f, o = array_ops.split(R, 4, 1)

      c = (math_ops.sigmoid(f + self._forget_bias) * c_prev +
           math_ops.sigmoid(i) * math_ops.tanh(j))
      m = math_ops.sigmoid(o) * self._activation(c)

    if self._num_proj is not None:
      with vs.variable_scope("projection"):
        if self._linear3 is None:
          self._linear3 = _Linear(m, self._num_proj, False)
        m = self._linear3(m)

    new_state = rnn_cell_impl.LSTMStateTuple(c, m)
    return m, new_state

示例10: _logits_cumulative

  def _logits_cumulative(self, inputs, stop_gradient):
    """Evaluate logits of the cumulative densities.

      inputs: The values at which to evaluate the cumulative densities, expected
        to be a `Tensor` of shape `(channels, 1, batch)`.
      stop_gradient: Boolean. Whether to add `array_ops.stop_gradient` calls so
        that the gradient of the output with respect to the density model
        parameters is disconnected (the gradient with respect to `inputs` is
        left untouched).

      A `Tensor` of the same shape as `inputs`, containing the logits of the
      cumulative densities evaluated at the given inputs.
    logits = inputs

    for i in range(len(self.filters) + 1):
      matrix = self._matrices[i]
      if stop_gradient:
        matrix = array_ops.stop_gradient(matrix)
      logits = math_ops.matmul(matrix, logits)

      bias = self._biases[i]
      if stop_gradient:
        bias = array_ops.stop_gradient(bias)
      logits += bias

      if i < len(self._factors):
        factor = self._factors[i]
        if stop_gradient:
          factor = array_ops.stop_gradient(factor)
        logits += factor * math_ops.tanh(logits)

    return logits

示例11: testOptimizerInit

  def testOptimizerInit(self):
    with ops.Graph().as_default():
      layer_collection = lc.LayerCollection()

      inputs = array_ops.ones((2, 1)) * 2
      weights_val = np.ones((1, 1), dtype=np.float32) * 3.
      weights = variable_scope.get_variable(
          'w', initializer=array_ops.constant(weights_val))
      bias = variable_scope.get_variable(
          'b', initializer=init_ops.zeros_initializer(), shape=(1, 1))
      output = math_ops.matmul(inputs, weights) + bias

      layer_collection.register_fully_connected((weights, bias), inputs, output)

      logits = math_ops.tanh(output)
      targets = array_ops.constant([[0.], [1.]])
      output = math_ops.reduce_mean(
          nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets))



示例12: __call__

  def __call__(self, inputs, state, scope=None):
    with _checked_scope(self, scope or "rwa_cell", reuse=self._reuse):
      h, n, d, a_max = state

      with vs.variable_scope("u"):
        u = _linear(inputs, self._num_units, True)

      with vs.variable_scope("g"):
        g = _linear([inputs, h], self._num_units, True)

      with vs.variable_scope("a"):
        a = _linear([inputs, h], self._num_units, False) # The bias term when factored out of the numerator and denominator cancels and is unnecessary

      z = tf.multiply(u, tanh(g))

      a_newmax = tf.maximum(a_max, a)
      exp_diff = tf.exp(a_max - a_newmax)
      exp_scaled = tf.exp(a - a_newmax)

      n = tf.multiply(n, exp_diff) + tf.multiply(z, exp_scaled)  # Numerically stable update of numerator
      d = tf.multiply(d, exp_diff) + exp_scaled  # Numerically stable update of denominator
      h_new = self._activation(tf.div(n, d))

      new_state = RWACellTuple(h_new, n, d, a_newmax)

    return h_new, new_state

示例13: downscale

 def downscale(self, inp):
   with vs.variable_scope("Downscale"):
     inp2d = tf.reshape(tf.transpose(inp, perm=[1, 0, 2]), [-1, 2 * self.size])
     out2d = rnn_cell.linear(inp2d, self.size, True, 1.0)
     out3d = tf.reshape(out2d, [self.batch_size, -1, self.size])
     out3d = tf.transpose(out3d, perm=[1, 0, 2])
     out = tanh(out3d)
   return out

示例14: __init__

 def __init__(self, num_units, encoder_output, scope=None):
   self.hs = encoder_output
   with vs.variable_scope(scope or type(self).__name__):
     with vs.variable_scope("Attn1"):
       hs2d = tf.reshape(self.hs, [-1, num_units])
       phi_hs2d = tanh(rnn_cell.linear(hs2d, num_units, True, 1.0))
       self.phi_hs = tf.reshape(phi_hs2d, tf.shape(self.hs))
   super(GRUCellAttn, self).__init__(num_units)

示例15: attention

 def attention(query):
     """Point on hidden using hidden_features and query."""
     with vs.variable_scope("Attention"):
         y = rnn_cell.linear(query, attention_vec_size, True)
         y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
         # Attention mask is a softmax of v^T * tanh(...).
         s = math_ops.reduce_sum(
             v * math_ops.tanh(hidden_features + y), [2, 3])
         return s

示例16: _GenerateOrderedInputs

 def _GenerateOrderedInputs(self, size, n):
   inputs = self._GenerateUnorderedInputs(size, 1)
   queue = data_flow_ops.FIFOQueue(
       capacity=1, dtypes=[inputs[0].dtype], shapes=[inputs[0].get_shape()])
   for _ in xrange(n - 1):
     op = queue.enqueue(inputs[-1])
     with ops.control_dependencies([op]):
       inputs.append(math_ops.tanh(1.0 + queue.dequeue()))
   return inputs

示例17: testIsSequence

 def testIsSequence(self):
   self.assertTrue(nest.is_sequence([1, 3, [4, 5]]))
   self.assertTrue(nest.is_sequence(((7, 8), (5, 6))))
   self.assertFalse(nest.is_sequence(set([1, 2])))
   ones = array_ops.ones([2, 3])
   self.assertFalse(nest.is_sequence(np.ones((4, 5))))

示例18: __call__

  def __call__(self, query, previous_alignments):
    """Score the query based on the keys and values.

      query: Tensor of dtype matching `self.values` and shape
        `[batch_size, query_depth]`.
      previous_alignments: Tensor of dtype matching `self.values` and shape
        `[batch_size, alignments_size]`
        (`alignments_size` is memory's `max_time`).

      alignments: Tensor of dtype matching `self.values` and shape
        `[batch_size, alignments_size]` (`alignments_size` is memory's
    with variable_scope.variable_scope(None, "bahdanau_attention", [query]):
      processed_query = self.query_layer(query) if self.query_layer else query
      dtype = processed_query.dtype
      # Reshape from [batch_size, ...] to [batch_size, 1, ...] for broadcasting.
      processed_query = array_ops.expand_dims(processed_query, 1)
      keys = self._keys
      v = variable_scope.get_variable(
          "attention_v", [self._num_units], dtype=dtype)
      if self._normalize:
        # Scalar used in weight normalization
        g = variable_scope.get_variable(
            "attention_g", dtype=dtype,
            initializer=math.sqrt((1. / self._num_units)))
        # Bias added prior to the nonlinearity
        b = variable_scope.get_variable(
            "attention_b", [self._num_units], dtype=dtype,
        # normed_v = g * v / ||v||
        normed_v = g * v * math_ops.rsqrt(
        score = math_ops.reduce_sum(
            normed_v * math_ops.tanh(keys + processed_query + b), [2])
        score = math_ops.reduce_sum(v * math_ops.tanh(keys + processed_query),

    alignments = self._probability_fn(score, previous_alignments)
    return alignments

示例19: testGradientThroughNewStep

  def testGradientThroughNewStep(self):
    with imperative_mode.ImperativeMode(self._target) as mode:
      x = constant_op.constant(np.random.rand(3))
      y = math_ops.tanh(x)

      with mode.new_step():
        z = constant_op.constant(np.random.rand(3))
        w = math_ops.multiply(y, z)
        dx = gradients_impl.gradients(w, x)
        self.assertAllClose(dx[0].value, z.value * (1.0 - y.value ** 2))

示例20: _lstm_cell

def _lstm_cell(prev_c, prev_h, x):
  """Create an LSTM cell."""
  # i: input gate
  # f: forget gate
  # o: output gate
  # c: cell state
  # x: input
  # h: embedding
  bias = _bias([4])
  w = _weight([8, 16])
  ifoc = math_ops.matmul(array_ops.concat([x, prev_h], axis=1), w)
  i, f, o, c = array_ops.split(ifoc, 4, axis=1)
  i = math_ops.sigmoid(nn.bias_add(i, bias))
  f = math_ops.sigmoid(nn.bias_add(f, bias))
  o = math_ops.sigmoid(nn.bias_add(o, bias))
  c = math_ops.tanh(nn.bias_add(c, bias))
  next_c = f * prev_c + i * c
  next_h = o * math_ops.tanh(next_c)
  return next_c, next_h









