• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python rnn_cell.linear函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中tensorflow.python.ops.rnn_cell.linear函数的典型用法代码示例。如果您正苦于以下问题:Python linear函数的具体用法?Python linear怎么用?Python linear使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了linear函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: __call__

 def __call__(self, inputs, state, scope=None):
   gru_out, gru_state = super(GRUCellAttn, self).__call__(inputs, state, scope)
   with vs.variable_scope(scope or type(self).__name__):
     with vs.variable_scope("Attn2"):
       gamma_h = tanh(rnn_cell.linear(gru_out, self._num_units, True, 1.0))
     weights = tf.reduce_sum(self.phi_hs * gamma_h, reduction_indices=2, keep_dims=True)
     weights = tf.exp(weights - tf.reduce_max(weights, reduction_indices=0, keep_dims=True))
     weights = weights / (1e-6 + tf.reduce_sum(weights, reduction_indices=0, keep_dims=True))
     context = tf.reduce_sum(self.hs * weights, reduction_indices=0)
     with vs.variable_scope("AttnConcat"):
       out = tf.nn.relu(rnn_cell.linear([context, gru_out], self._num_units, True, 1.0))
     self.attn_map = tf.squeeze(tf.slice(weights, [0, 0, 0], [-1, -1, 1]))
     return (out, out) 
开发者ID:hrishikeshvganu,项目名称:nlc,代码行数:13,代码来源:nlc_model.py


示例2: __call__

	def __call__(self, inputs, state, episodic_gate, scope=None):
		"""Gated recurrent unit (GRU) with nunits cells."""
		
		with vs.variable_scope("MGRUCell"):  # "GRUCell"
			with vs.variable_scope("Gates"):	# Reset gate and update gate.
				# We start with bias of 1.0 to not reset and not update.
				r = rnn_cell.linear([inputs, state], self._num_units, True, 1.0, scope=scope)
				r = sigmoid(r)
			with vs.variable_scope("Candidate"):
				c = tanh(rnn_cell.linear([inputs, r * state], self._num_units, True))
			
			new_h = tf.mul(episodic_gate, c) + tf.mul((1 - episodic_gate), state)
		return new_h, new_h
开发者ID:sufengniu,项目名称:DMN-tensorflow,代码行数:13,代码来源:cell.py


示例3: downscale

 def downscale(self, inp):
   with vs.variable_scope("Downscale"):
     inp2d = tf.reshape(tf.transpose(inp, perm=[1, 0, 2]), [-1, 2 * self.size])
     out2d = rnn_cell.linear(inp2d, self.size, True, 1.0)
     out3d = tf.reshape(out2d, [self.batch_size, -1, self.size])
     out3d = tf.transpose(out3d, perm=[1, 0, 2])
     out = tanh(out3d)
   return out
开发者ID:nipengmath,项目名称:nlc,代码行数:8,代码来源:nlc_model.py


示例4: __init__

 def __init__(self, num_units, encoder_output, scope=None):
   self.hs = encoder_output
   with vs.variable_scope(scope or type(self).__name__):
     with vs.variable_scope("Attn1"):
       hs2d = tf.reshape(self.hs, [-1, num_units])
       phi_hs2d = tanh(rnn_cell.linear(hs2d, num_units, True, 1.0))
       self.phi_hs = tf.reshape(phi_hs2d, tf.shape(self.hs))
   super(GRUCellAttn, self).__init__(num_units)
开发者ID:hrishikeshvganu,项目名称:nlc,代码行数:8,代码来源:nlc_model.py


示例5: attention

 def attention(query):
     """Point on hidden using hidden_features and query."""
     with vs.variable_scope("Attention"):
         y = rnn_cell.linear(query, attention_vec_size, True)
         y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
         # Attention mask is a softmax of v^T * tanh(...).
         s = math_ops.reduce_sum(
             v * math_ops.tanh(hidden_features + y), [2, 3])
         return s
开发者ID:heshizhu,项目名称:TensorFlow-Pointer-Networks,代码行数:9,代码来源:pointer.py


示例6: testLinear

  def testLinear(self):
    with self.test_session() as sess:
      with tf.variable_scope("root", initializer=tf.constant_initializer(1.0)):
        x = tf.zeros([1, 2])
        l = linear([x], 2, False)
        sess.run([tf.initialize_all_variables()])
        res = sess.run([l], {x.name: np.array([[1., 2.]])})
        self.assertAllClose(res[0], [[3.0, 3.0]])

        # Checks prevent you from accidentally creating a shared function.
        with self.assertRaises(ValueError):
          l1 = linear([x], 2, False)

        # But you can create a new one in a new scope and share the variables.
        with tf.variable_scope("l1") as new_scope:
          l1 = linear([x], 2, False)
        with tf.variable_scope(new_scope, reuse=True):
          linear([l1], 2, False)
        self.assertEqual(len(tf.trainable_variables()), 2)
开发者ID:0-T-0,项目名称:tensorflow,代码行数:19,代码来源:rnn_cell_test.py


示例7: build_encoder

    def build_encoder(self):
        """Inference Network. q(h|X)"""
        with tf.variable_scope("encoder"):
            self.l1_lin = linear(tf.expand_dims(self.x, 0), self.embed_dim, bias=True, scope="l1")
            self.l1 = tf.nn.relu(self.l1_lin)

            self.l2_lin = linear(self.l1, self.embed_dim, bias=True, scope="l2")
            self.l2 = tf.nn.relu(self.l2_lin)

            self.mu = linear(self.l2, self.h_dim, bias=True, scope="mu")
            self.log_sigma_sq = linear(self.l2, self.h_dim, bias=True, scope="log_sigma_sq")

            self.eps = tf.random_normal((1, self.h_dim), 0, 1, dtype=tf.float32)
            self.sigma = tf.sqrt(tf.exp(self.log_sigma_sq))

            self.h = tf.add(self.mu, tf.mul(self.sigma, self.eps))

            _ = tf.histogram_summary("mu", self.mu)
            _ = tf.histogram_summary("sigma", self.sigma)
            _ = tf.histogram_summary("h", self.h)
            _ = tf.histogram_summary("mu + sigma", self.mu + self.sigma)
开发者ID:tonydeep,项目名称:variational-text-tensorflow,代码行数:21,代码来源:nvdm.py


示例8: attention

 def attention(query): 
   """Put attention masks on hidden using hidden_features and query."""
   with vs.variable_scope("Attention"):
     # Attention mask is a softmax of h_in^T*decoder_hidden.
     dec_hid = array_ops.tile(query, [1, attn_length]) # replicate query for element-wise multiplication
     dec_hid = array_ops.reshape(dec_hid, [-1, attn_length, attention_vec_size])
     attn_weight = nn_ops.softmax(math_ops.reduce_sum(attention_states*dec_hid, [2])) # attn weights for every hidden states in encoder
     # Now calculate the attention-weighted vector (context vector) cc.
     cc = math_ops.reduce_sum(array_ops.reshape(attn_weight, [-1, attn_length, 1, 1])*hidden, [1,2])
     # attented hidden state
     with vs.variable_scope("AttnW1"):
       term1 = rnn_cell.linear(query, attn_size, False)
     with vs.variable_scope("AttnW2"):
       term2 = rnn_cell.linear(cc, attn_size, False)
     # environment representation
     if env: # 2D Tensor of shape [batch_size, env_size]
       with vs.variable_scope("Environment"):
         term3 = rnn_cell.linear(math_ops.to_float(env), attn_size, False)
       h_attn = math_ops.tanh(term1 + term2 + term3)
     else:
       h_attn = math_ops.tanh(term1 + term2)
   return h_attn, attn_weight
开发者ID:LittleYUYU,项目名称:RobotNavigateNLP,代码行数:22,代码来源:seq2seq_ops.py


示例9: attention

 def attention(query):
     """Put attention masks on hidden using hidden_features and query."""
     ds = []  # Results of attention reads will be stored here.
     for a in xrange(num_heads):
         with variable_scope.variable_scope("Attention_%d" % a):
             y = rnn_cell.linear(query, attention_vec_size, True)
             y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
             # Attention mask is a softmax of v^T * tanh(...).
             s = math_ops.reduce_sum(v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3])
             a = nn_ops.softmax(s)
             # Now calculate the attention-weighted vector d.
             d = math_ops.reduce_sum(array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden, [1, 2])
             ds.append(array_ops.reshape(d, [-1, attn_size]))
     return ds
开发者ID:sherrym,项目名称:tensorflow,代码行数:14,代码来源:seq2seq.py


示例10: setup_label_loss

    def setup_label_loss(self):
        with vs.variable_scope("LabelLogistic"):
            doshape = tf.shape(self.decoder_output)
            T, batch_size = doshape[0], doshape[1]

            # [batch_size, cell.state_size]
            # decoder_output: [batch_size, time_step, cell.state_size]
            last_state = self.decoder_output[:, -1, :]

            # projecting to label space
            # [batch_size, label_size]
            logits = rnn_cell.linear(last_state, self.label_size, True, 1.0)
            self.losses = tf.nn.softmax_cross_entropy_with_logits(logits, self.label_placeholder)
            self.predictions = logits
开发者ID:windweller,项目名称:Trident,代码行数:14,代码来源:story_model.py


示例11: setup_loss

  def setup_loss(self):
    with vs.variable_scope("Logistic"):
      do2d = tf.reshape(self.decoder_output, [-1, self.size])
      logits2d = rnn_cell.linear(do2d, self.vocab_size, True, 1.0)
      outputs2d = tf.nn.softmax(logits2d)
      self.outputs = tf.reshape(outputs2d, [-1, self.batch_size, self.vocab_size])

      targets_no_GO = tf.slice(self.target_tokens, [1, 0], [-1, -1])
      masks_no_GO = tf.slice(self.target_mask, [1, 0], [-1, -1])
      # easier to pad target/mask than to split decoder input since tensorflow does not support negative indexing
      labels1d = tf.reshape(tf.pad(targets_no_GO, [[0, 1], [0, 0]]), [-1])
      mask1d = tf.reshape(tf.pad(masks_no_GO, [[0, 1], [0, 0]]), [-1])
      losses1d = tf.nn.sparse_softmax_cross_entropy_with_logits(logits2d, labels1d) * tf.to_float(mask1d)
      losses2d = tf.reshape(losses1d, [-1, self.batch_size])
      self.losses = tf.reduce_sum(losses2d) / self.batch_size
开发者ID:wanjinchang,项目名称:nlc,代码行数:15,代码来源:nlc_model.py


示例12: downscale

  def downscale(self, inp, mask):
    with vs.variable_scope("Downscale"):
      inp2d = tf.reshape(tf.transpose(inp, perm=[1, 0, 2]), [-1, 2 * self.size])
      out2d = rnn_cell.linear(inp2d, self.size, True, 1.0)
      out3d = tf.reshape(out2d, [self.batch_size, -1, self.size])
      out3d = tf.transpose(out3d, perm=[1, 0, 2])
      out = tanh(out3d)

      mask = tf.transpose(mask)
      mask = tf.reshape(mask, [-1, 2])
      mask = tf.cast(mask, tf.bool)
      mask = tf.reduce_any(mask, reduction_indices=1)
      mask = tf.to_int32(mask)
      mask = tf.reshape(mask, [self.batch_size, -1])
      mask = tf.transpose(mask)
    return out, mask
开发者ID:wanjinchang,项目名称:nlc,代码行数:16,代码来源:nlc_model.py


示例13: basic_rnn_cell

def basic_rnn_cell(inputs, state, num_units, scope=None):
    if state is None:
        if inputs is not None:
            batch_size = inputs.get_shape()[0]
            dtype = inputs.dtype
        else:
            batch_size = 0
            dtype = tf.float32
        init_output = tf.zeros(tf.pack([batch_size, num_units]), dtype=dtype)
        init_state = tf.zeros(tf.pack([batch_size, num_units]), dtype=dtype)
        init_output.set_shape([batch_size, num_units])
        init_state.set_shape([batch_size, num_units])
        return init_output, init_state
    else:
        with tf.variable_op_scope([inputs, state], scope, "BasicRNNCell"):
            output = tf.tanh(linear([inputs, state], num_units, True))
        return output, output
开发者ID:RuhiSharma,项目名称:tensorflow,代码行数:17,代码来源:rnn_cell_test.py


示例14: downscale

  def downscale(self, inp, mask):
    with vs.variable_scope("Downscale"):
      inshape = tf.shape(inp)
      T, batch_size, dim = inshape[0], inshape[1], inshape[2]
      inp2d = tf.reshape(tf.transpose(inp, perm=[1, 0, 2]), [-1, 2 * self.size])
      out2d = rnn_cell.linear(inp2d, self.size, True, 1.0)
      out3d = tf.reshape(out2d, tf.pack((batch_size, tf.to_int32(T/2), dim)))
      out3d = tf.transpose(out3d, perm=[1, 0, 2])
      out3d.set_shape([None, None, self.size])
      out = tanh(out3d)

      mask = tf.transpose(mask)
      mask = tf.reshape(mask, [-1, 2])
      mask = tf.cast(mask, tf.bool)
      mask = tf.reduce_any(mask, reduction_indices=1)
      mask = tf.to_int32(mask)
      mask = tf.reshape(mask, tf.pack([batch_size, -1]))
      mask = tf.transpose(mask)
    return out, mask
开发者ID:hrishikeshvganu,项目名称:nlc,代码行数:19,代码来源:nlc_model.py


示例15: dnn

def dnn(tensor_in, hidden_units, activation=nn.relu, dropout=None):
  """Creates fully connected deep neural network subgraph.

  Args:
    tensor_in: tensor or placeholder for input features.
    hidden_units: list of counts of hidden units in each layer.
    activation: activation function between layers. Can be None.
    dropout: if not None, will add a dropout layer with given probability.

  Returns:
    A tensor which would be a deep neural network.
  """
  with vs.variable_scope('dnn'):
    for i, n_units in enumerate(hidden_units):
      with vs.variable_scope('layer%d' % i):
        tensor_in = rnn_cell.linear(tensor_in, n_units, True)
        if activation is not None:
          tensor_in = activation(tensor_in)
        if dropout is not None:
          tensor_in = dropout_ops.dropout(tensor_in, prob=(1.0 - dropout))
    return tensor_in
开发者ID:01bui,项目名称:tensorflow,代码行数:21,代码来源:dnn_ops.py


示例16: pointer_decoder

def pointer_decoder(decoder_inputs, initial_state, attention_states, cell,
                    feed_prev=True, dtype=dtypes.float32, scope=None):
    """RNN decoder with pointer net for the sequence-to-sequence model.
    Args:
      decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
      initial_state: 2D Tensor [batch_size x cell.state_size].
      attention_states: 3D Tensor [batch_size x attn_length x attn_size].
      cell: rnn_cell.RNNCell defining the cell function and size.
      dtype: The dtype to use for the RNN initial state (default: tf.float32).
      scope: VariableScope for the created subgraph; default: "pointer_decoder".
    Returns:
      outputs: A list of the same length as decoder_inputs of 2D Tensors of shape
        [batch_size x output_size]. These represent the generated outputs.
        Output i is computed from input i (which is either i-th decoder_inputs.
        First, we run the cell
        on a combination of the input and previous attention masks:
          cell_output, new_state = cell(linear(input, prev_attn), prev_state).
        Then, we calculate new attention masks:
          new_attn = softmax(V^T * tanh(W * attention_states + U * new_state))
        and then we calculate the output:
          output = linear(cell_output, new_attn).
      states: The state of each decoder cell in each time-step. This is a list
        with length len(decoder_inputs) -- one item for each time-step.
        Each item is a 2D Tensor of shape [batch_size x cell.state_size].
    """
    if not decoder_inputs:
        raise ValueError("Must provide at least 1 input to attention decoder.")
    if not attention_states.get_shape()[1:2].is_fully_defined():
        raise ValueError("Shape[1] and [2] of attention_states must be known: %s"
                         % attention_states.get_shape())

    with vs.variable_scope(scope or "point_decoder"):
        batch_size = array_ops.shape(decoder_inputs[0])[0]  # Needed for reshaping.
        input_size = decoder_inputs[0].get_shape()[1].value
        attn_length = attention_states.get_shape()[1].value
        attn_size = attention_states.get_shape()[2].value

        # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
        hidden = array_ops.reshape(
            attention_states, [-1, attn_length, 1, attn_size])

        attention_vec_size = attn_size  # Size of query vectors for attention.
        k = vs.get_variable("AttnW", [1, 1, attn_size, attention_vec_size])
        hidden_features = nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME")
        v = vs.get_variable("AttnV", [attention_vec_size])

        states = [initial_state]

        def attention(query):
            """Point on hidden using hidden_features and query."""
            with vs.variable_scope("Attention"):
                y = rnn_cell.linear(query, attention_vec_size, True)
                y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
                # Attention mask is a softmax of v^T * tanh(...).
                s = math_ops.reduce_sum(
                    v * math_ops.tanh(hidden_features + y), [2, 3])
                return s

        outputs = []
        prev = None
        batch_attn_size = array_ops.pack([batch_size, attn_size])
        attns = array_ops.zeros(batch_attn_size, dtype=dtype)

        attns.set_shape([None, attn_size])
        inps = []
        for i in xrange(len(decoder_inputs)):
            if i > 0:
                vs.get_variable_scope().reuse_variables()
            inp = decoder_inputs[i]

            if feed_prev and i > 0:
                inp = tf.pack(decoder_inputs)
                inp = tf.transpose(inp, perm=[1, 0, 2])
                inp = tf.reshape(inp, [-1, attn_length, input_size])
                inp = tf.reduce_sum(inp * tf.reshape(tf.nn.softmax(output), [-1, attn_length, 1]), 1)
                inp = tf.stop_gradient(inp)
                inps.append(inp)

            # Use the same inputs in inference, order internaly

            # Merge input and previous attentions into one vector of the right size.
            x = rnn_cell.linear([inp, attns], cell.input_size, True)
            # Run the RNN.
            cell_output, new_state = cell(x, states[-1])
            states.append(new_state)
            # Run the attention mechanism.
            output = attention(new_state)

            outputs.append(output)

    return outputs, states, inps
开发者ID:heshizhu,项目名称:TensorFlow-Pointer-Networks,代码行数:91,代码来源:pointer.py


示例17: attention_decoder

def attention_decoder(decoder_inputs, initial_state, attention_states, cell,
                      output_size=None, num_heads=1, loop_function=None,
                      dtype=dtypes.float32, scope=None):
  """RNN decoder with attention for the sequence-to-sequence model.

  Args:
    decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    initial_state: 2D Tensor [batch_size x cell.state_size].
    attention_states: 3D Tensor [batch_size x attn_length x attn_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    output_size: size of the output vectors; if None, we use cell.output_size.
    num_heads: number of attention heads that read from attention_states.
    loop_function: if not None, this function will be applied to i-th output
      in order to generate i+1-th input, and decoder_inputs will be ignored,
      except for the first element ("GO" symbol). This can be used for decoding,
      but also for training to emulate http://arxiv.org/pdf/1506.03099v2.pdf.
      Signature -- loop_function(prev, i) = next
        * prev is a 2D Tensor of shape [batch_size x cell.output_size],
        * i is an integer, the step number (when advanced control is needed),
        * next is a 2D Tensor of shape [batch_size x cell.input_size].
    dtype: The dtype to use for the RNN initial state (default: tf.float32).
    scope: VariableScope for the created subgraph; default: "attention_decoder".

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors of shape
      [batch_size x output_size]. These represent the generated outputs.
      Output i is computed from input i (which is either i-th decoder_inputs or
      loop_function(output {i-1}, i)) as follows. First, we run the cell
      on a combination of the input and previous attention masks:
        cell_output, new_state = cell(linear(input, prev_attn), prev_state).
      Then, we calculate new attention masks:
        new_attn = softmax(V^T * tanh(W * attention_states + U * new_state))
      and then we calculate the output:
        output = linear(cell_output, new_attn).
    states: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      Each item is a 2D Tensor of shape [batch_size x cell.state_size].

  Raises:
    ValueError: when num_heads is not positive, there are no inputs, or shapes
      of attention_states are not set.
  """
  if not decoder_inputs:
    raise ValueError("Must provide at least 1 input to attention decoder.")
  if num_heads < 1:
    raise ValueError("With less than 1 heads, use a non-attention decoder.")
  if not attention_states.get_shape()[1:2].is_fully_defined():
    raise ValueError("Shape[1] and [2] of attention_states must be known: %s"
                     % attention_states.get_shape())
  if output_size is None:
    output_size = cell.output_size

  with vs.variable_scope(scope or "attention_decoder"):
    batch_size = array_ops.shape(decoder_inputs[0])[0]  # Needed for reshaping.
    attn_length = attention_states.get_shape()[1].value
    attn_size = attention_states.get_shape()[2].value

    # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
    hidden = array_ops.reshape(
        attention_states, [-1, attn_length, 1, attn_size])
    hidden_features = []
    v = []
    attention_vec_size = attn_size  # Size of query vectors for attention.
    for a in xrange(num_heads):
      k = vs.get_variable("AttnW_%d" % a, [1, 1, attn_size, attention_vec_size])
      hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME"))
      v.append(vs.get_variable("AttnV_%d" % a, [attention_vec_size]))

    states = [initial_state]

    def attention(query):
      """Put attention masks on hidden using hidden_features and query."""
      ds = []  # Results of attention reads will be stored here.
      for a in xrange(num_heads):
        with vs.variable_scope("Attention_%d" % a):
          y = rnn_cell.linear(query, attention_vec_size, True)
          y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
          # Attention mask is a softmax of v^T * tanh(...).
          s = math_ops.reduce_sum(
              v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3])
          a = nn_ops.softmax(s)
          # Now calculate the attention-weighted vector d.
          d = math_ops.reduce_sum(
              array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
              [1, 2])
          ds.append(array_ops.reshape(d, [-1, attn_size]))
      return ds

    outputs = []
    prev = None
    batch_attn_size = array_ops.pack([batch_size, attn_size])
    attns = [array_ops.zeros(batch_attn_size, dtype=dtype)
             for _ in xrange(num_heads)]
    for a in attns:  # Ensure the second shape of attention vectors is set.
      a.set_shape([None, attn_size])
    for i in xrange(len(decoder_inputs)):
      if i > 0:
        vs.get_variable_scope().reuse_variables()
      inp = decoder_inputs[i]
      # If loop_function is set, we use it instead of decoder_inputs.
#.........这里部分代码省略.........
开发者ID:ttamada,项目名称:MachineTranslationWithVisualContexts,代码行数:101,代码来源:seq2seq.py


示例18: attention_encoder

def attention_encoder(decoder_inputs, initial_state, attention_states,
                      cell, num_heads=1,
                      output_size=None, dtype=dtypes.float32, scope=None,
                      initial_state_attention=False):
    """
    Encoder that receives attention from another encoder

    Parameters
    ----------
    decoder_inputs:
        second encoder's input we call it a decoder's input
        it should be already wrapped by add_embedding()
        it's A list of num_steps length 2D Tensors [batch_size, input_size = embed_size]
    initial_state:
        2D Tensor (batch_size x cell.state_size).
    attention_states:
        3D Tensor (batch_size x attn_length (seq_length) x attn_size)
    cell
    num_heads
    output_size
    dtype
    scope
    initial_state_attention

    Returns
    -------
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
        shape [batch_size x output_size] containing the generated outputs.
      state: The state of each decoder cell at the final time-step.
        It is a 2D Tensor of shape (batch_size x cell.state_size).

    """
    decoder_inputs = [decoder_inputs]  # in original model this is a bucket list of inputs

    with vs.variable_scope(scope or "attention_encoder"):
        batch_size = array_ops.shape(decoder_inputs[0])[0]
        attn_length = attention_states.get_shape()[1].value
        attn_size = attention_states.get_shape()[2].value

    v = []
    attention_vec_size = attn_size  # Size of query vectors for attention.
    hidden = array_ops.reshape(
        attention_states, [-1, attn_length, 1, attn_size])
    hidden_features = []
    for a in xrange(num_heads):
        k = vs.get_variable("AttnW_%d" % a,
                            [1, 1, attn_size, attention_vec_size])
        hidden_features.append(tf.nn.conv2d(hidden, k, [1, 1, 1, 1], "SAME"))
        v.append(vs.get_variable("AttnV_%d" % a, [attention_vec_size]))

    def attention(query):
        """Put attention masks on hidden using hidden_features and query."""
        ds = []  # Results of attention reads will be stored here.
        for a in xrange(num_heads):
            with vs.variable_scope("Attention_%d" % a):
                y = rnn_cell.linear(query, attention_vec_size, True)
                y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
                # Attention mask is a softmax of v^T * tanh(...).
                s = math_ops.reduce_sum(
                    v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3])
                a = tf.nn.softmax(s)
                # Now calculate the attention-weighted vector d.
                d = math_ops.reduce_sum(
                    array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
                    [1, 2])
                ds.append(array_ops.reshape(d, [-1, attn_size]))
        return ds

    outputs = []
    batch_attn_size = array_ops.pack([batch_size, attn_size])
    attns = [array_ops.zeros(batch_attn_size, dtype=dtype) for _ in xrange(num_heads)]

    for a in attns:  # Ensure the second shape of attention vectors is set.
        a.set_shape([None, attn_size])
    if initial_state_attention:
        attns = attention(initial_state)

    state = initial_state

    # this is now iterating on time steps
    for i, inp in enumerate(decoder_inputs):
        if i > 0:
            vs.get_variable_scope().reuse_variables()
        # Merge input and previous attentions into one vector of the right size.
        x = rnn_cell.linear([inp] + attns, cell.input_size, True)
        # Run the RNN.
        cell_output, state = cell(x, state)
        # Run the attention mechanism.
        if i == 0 and initial_state_attention:
            with vs.variable_scope(vs.get_variable_scope(), reuse=True):
                attns = attention(state)
        else:
            attns = attention(state)

        with vs.variable_scope("AttnOutputProjection"):
            output = rnn_cell.linear([cell_output] + attns, output_size, True)

        outputs.append(output)

#.........这里部分代码省略.........
开发者ID:windweller,项目名称:Trident,代码行数:101,代码来源:seq2seq.py


示例19: attention_decoder

def attention_decoder(decoder_inputs, initial_state, attention_states, cell,
                      output_size=None, num_heads=1, loop_function=None,
                      dtype=dtypes.float32, scope=None,
                      initial_state_attention=False):
  """RNN decoder with attention for the sequence-to-sequence model.

  In this context "attention" means that, during decoding, the RNN can look up
  information in the additional tensor attention_states, and it does this by
  focusing on a few entries from the tensor. This model has proven to yield
  especially good results in a number of sequence-to-sequence tasks. This
  implementation is based on http://arxiv.org/abs/1412.7449 (see below for
  details). It is recommended for complex sequence-to-sequence tasks.

  Args:
    decoder_inputs: A list of 2D Tensors [batch_size x cell.input_size].
    initial_state: 2D Tensor [batch_size x cell.state_size].
    attention_states: 3D Tensor [batch_size x attn_length x attn_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    output_size: Size of the output vectors; if None, we use cell.output_size.
    num_heads: Number of attention heads that read from attention_states.
    loop_function: If not None, this function will be applied to i-th output
      in order to generate i+1-th input, and decoder_inputs will be ignored,
      except for the first element ("GO" symbol). This can be used for decoding,
      but also for training to emulate http://arxiv.org/pdf/1506.03099v2.pdf.
      Signature -- loop_function(prev, i) = next
        * prev is a 2D Tensor of shape [batch_size x cell.output_size],
        * i is an integer, the step number (when advanced control is needed),
        * next is a 2D Tensor of shape [batch_size x cell.input_size].
    dtype: The dtype to use for the RNN initial state (default: tf.float32).
    scope: VariableScope for the created subgraph; default: "attention_decoder".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states -- useful when we wish to resume decoding from a previously
      stored decoder state and attention states.

  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors of
        shape [batch_size x output_size]. These represent the generated outputs.
        Output i is computed from input i (which is either the i-th element
        of decoder_inputs or loop_function(output {i-1}, i)) as follows.
        First, we run the cell on a combination of the input and previous
        attention masks:
          cell_output, new_state = cell(linear(input, prev_attn), prev_state).
        Then, we calculate new attention masks:
          new_attn = softmax(V^T * tanh(W * attention_states + U * new_state))
        and then we calculate the output:
          output = linear(cell_output, new_attn).
      state: The state of each decoder cell the final time-step.
        It is a 2D Tensor of shape [batch_size x cell.state_size].

  Raises:
    ValueError: when num_heads is not positive, there are no inputs, or shapes
      of attention_states are not set.
  """
  if not decoder_inputs:
    raise ValueError("Must provide at least 1 input to attention decoder.")
  if num_heads < 1:
    raise ValueError("With less than 1 heads, use a non-attention decoder.")
  if not attention_states.get_shape()[1:2].is_fully_defined():
    raise ValueError("Shape[1] and [2] of attention_states must be known: %s"
                     % attention_states.get_shape())
  if output_size is None:
    output_size = cell.output_size

  with variable_scope.variable_scope(scope or "attention_decoder"):
    batch_size = array_ops.shape(decoder_inputs[0])[0]  # Needed for reshaping.
    attn_length = attention_states.get_shape()[1].value
    attn_size = attention_states.get_shape()[2].value

    # To calculate W1 * h_t we use a 1-by-1 convolution, need to reshape before.
    hidden = array_ops.reshape(
        attention_states, [-1, attn_length, 1, attn_size])
    hidden_features = []
    v = []
    attention_vec_size = attn_size  # Size of query vectors for attention.
    for a in xrange(num_heads):
      k = variable_scope.get_variable("AttnW_%d" % a,
                                      [1, 1, attn_size, attention_vec_size])
      hidden_features.append(nn_ops.conv2d(hidden, k, [1, 1, 1, 1], "SAME"))
      v.append(variable_scope.get_variable("AttnV_%d" % a,
                                           [attention_vec_size]))

    state = initial_state

    def attention(query):
      """Put attention masks on hidden using hidden_features and query."""
      ds = []  # Results of attention reads will be stored here.
      for a in xrange(num_heads):
        with variable_scope.variable_scope("Attention_%d" % a):
          y = rnn_cell.linear(query, attention_vec_size, True)
          y = array_ops.reshape(y, [-1, 1, 1, attention_vec_size])
          # Attention mask is a softmax of v^T * tanh(...).
          s = math_ops.reduce_sum(
              v[a] * math_ops.tanh(hidden_features[a] + y), [2, 3])
          a = nn_ops.softmax(s)
          # Now calculate the attention-weighted vector d.
          d = math_ops.reduce_sum(
              array_ops.reshape(a, [-1, attn_length, 1, 1]) * hidden,
              [1, 2])
#.........这里部分代码省略.........
开发者ID:maxkarlovitz,项目名称:tensorflow,代码行数:101,代码来源:seq2seq.py


示例20: attention_decoder

def attention_decoder(decoder_inputs, initial_state, attention_states, cell, batch_size, state_size,
                      decoder_inputs_positions=None, decoder_inputs_maps=None, output_size=None, loop_function=None,
                      dtype=dtypes.float32, scope=None):
  """RNN decoder with attention for the sequence-to-sequence model.

  Args:
    decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. Embedded inputs.
    initial_state: 2D Tensor [batch_size x cell.state_size].
    attention_states: 3D Tensor [batch_size x attn_length x attn_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    batch_size: need to clarify batch size explicitly since env_state is updated one sample by one sample.
    state_size: size of environment state.
    decoder_inputs_positions: a list of 2D Tensors of shape [batch_size, 3],
       indicating intial positions of each example in a map. Default None.
    decoder_inputs_maps: a 1D Tensor of length batch_size indicating the map. Default None.
    output_size: size of the output vectors; if None, we use cell.output_size.
    loop_function: if not None, this function will be applied to i-th output
      in order to generate i+1-th input, and decoder_inputs will be ignored,
      except for the first element ("GO" symbol). This can be used for decoding,
      but also for training to emulate http://arxiv.org/pdf/1506.03099v2.pdf.
      Signature -- loop_function(prev, i) = next
        * prev is a 2D Tensor of shape [batch_size x cell.output_size],
        * i is an integer, the step number (when advanced control is needed),
        * next is a 2D Tensor of shape [batch_size x cell.input_size].
    dtype: The dtype to use for the RNN initial state (default: tf.float32).
    scope: VariableScope for the created subgraph; default: "attention_decoder".

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors of shape
      [batch_size x output_size]. These represent the generated outputs.
      Output i is computed from input i (which is either i-th decoder_inputs or
      loop_function(output {i-1}, i)) as follows. 
      First, we run the cell on the current decoder input or feed from previous output:
        cur_output, new_state = cell(input, prev_state).
      Then, we calculate new attention masks:
        new_attn = softmax(h_t^T * attention_states).
      Thus, the context vector:
        cont_vec = weighted_sum_of(attention_states), weighted by (new_attn),
      and then we calculate the attended output:
        attn_output = tanh(W1*current_output + W2*cont_vec + W3*env_state).
      The finally output for prediction:
        output = softmax(W*attn_output).
        This "output" should be a 1D Tensor of shape [num_symbols].
        Every item of the output refers to the probability of predicting certain symbol for the next step.
    states: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      Each item is a 2D Tensor of shape [batch_size x cell.state_size].

  Raises:
    ValueError: when num_heads is not positive, there are no inputs, or shapes
      of attention_states are not set.
  """
  if not decoder_inputs:
    raise ValueError("Must provide at least 1 input to attention decoder.")
  if not attention_states.get_shape()[1:2].is_fully_defined():
    raise ValueError("Shape[1] and [2] of attention_states must be known: %s"
                     % attention_states.get_shape())
  if output_size is None:
    output_size = cell.output_size

  with vs.variable_scope(scope or "attention_decoder"):
    attn_length = attention_states.get_shape()[1].value
    attn_size = attention_states.get_shape()[2].value
    mapIdx = array_ops.pack([map3.map_grid, map3.map_jelly, map3.map_one]) #map

    attention_vec_size = attn_size # size of query
    states = [initial_state]
    # current position and environment
    position, env = None, None

    hidden = array_ops.reshape(attention_states, [-1, attn_length, 1, attn_size]) # reshape for later computation

    def attention(query): 
      """Put attention masks on hidden using hidden_features and query."""
      with vs.variable_scope("Attention"):
        # Attention mask is a softmax of h_in^T*decoder_hidden.
        dec_hid = array_ops.tile(query, [1, attn_length]) # replicate query for element-wise multiplication
        dec_hid = array_ops.reshape(dec_hid, [-1, attn_length, attention_vec_size])
        attn_weight = nn_ops.softmax(math_ops.reduce_sum(attention_states*dec_hid, [2])) # attn weights for every hidden states in encoder
        # Now calculate the attention-weighted vector (context vector) cc.
        cc = math_ops.reduce_sum(array_ops.reshape(attn_weight, [-1, attn_length, 1, 1])*hidden, [1,2])
        # attented hidden state
        with vs.variable_scope("AttnW1"):
          term1 = rnn_cell.linear(query, attn_size, False)
        with vs.variable_scope("AttnW2"):
          term2 = rnn_cell.linear(cc, attn_size, False)
        # environment representation
 

鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python script_ops.eager_py_func函数代码示例发布时间:2022-05-27
下一篇:
Python rnn.rnn函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap