• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python common_hparams.basic_params1函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中tensor2tensor.layers.common_hparams.basic_params1函数的典型用法代码示例。如果您正苦于以下问题:Python basic_params1函数的具体用法?Python basic_params1怎么用?Python basic_params1使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了basic_params1函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: autoencoder_basic

def autoencoder_basic():
  """Basic autoencoder model."""
  hparams = common_hparams.basic_params1()
  hparams.optimizer = "Adam"
  hparams.learning_rate_constant = 0.0002
  hparams.learning_rate_warmup_steps = 500
  hparams.learning_rate_schedule = "constant * linear_warmup"
  hparams.label_smoothing = 0.0
  hparams.batch_size = 128
  hparams.hidden_size = 64
  hparams.num_hidden_layers = 5
  hparams.initializer = "uniform_unit_scaling"
  hparams.initializer_gain = 1.0
  hparams.weight_decay = 0.0
  hparams.kernel_height = 4
  hparams.kernel_width = 4
  hparams.dropout = 0.1
  hparams.add_hparam("max_hidden_size", 1024)
  hparams.add_hparam("bottleneck_bits", 128)
  hparams.add_hparam("bottleneck_noise", 0.1)
  hparams.add_hparam("bottleneck_warmup_steps", 3000)
  hparams.add_hparam("bottleneck_max_prob", 1.0)
  hparams.add_hparam("sample_height", 32)
  hparams.add_hparam("sample_width", 32)
  hparams.add_hparam("discriminator_batchnorm", True)
  hparams.add_hparam("num_sliced_vecs", 4096)
  hparams.add_hparam("gan_loss_factor", 0.0)
  return hparams
开发者ID:kltony,项目名称:tensor2tensor,代码行数:28,代码来源:autoencoders.py


示例2: next_frame_base

def next_frame_base():
  """Common HParams for next_frame models."""
  hparams = common_hparams.basic_params1()
  # Loss cutoff.
  hparams.add_hparam("video_modality_loss_cutoff", 0.01)
  # Additional resizing the frames before feeding them to model.
  hparams.add_hparam("preprocess_resize_frames", None)
  # How many data points to suffle. Ideally should be part of problem not model!
  hparams.add_hparam("shuffle_buffer_size", 128)
  # Tiny mode. For faster tests.
  hparams.add_hparam("tiny_mode", False)
  # In case a model supports smaller/faster version.
  hparams.add_hparam("small_mode", False)
  # In case a model has stochastic version.
  hparams.add_hparam("stochastic_model", False)
  # Internal loss for recurrent models.
  hparams.add_hparam("internal_loss", True)
  # choose from: concat, multiplicative, multi_additive
  hparams.add_hparam("action_injection", "multi_additive")
  # Scheduled sampling method. Choose between
  # ground_truth_only, prediction_only, prob, count, prob_inverse_exp.
  hparams.add_hparam("scheduled_sampling_mode", "prediction_only")
  hparams.add_hparam("scheduled_sampling_decay_steps", 10000)
  hparams.add_hparam("scheduled_sampling_max_prob", 1.0)
  hparams.add_hparam("scheduled_sampling_k", 900.0)
  return hparams
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:26,代码来源:base.py


示例3: ppo_base_v1

def ppo_base_v1():
  """Set of hyperparameters."""
  hparams = common_hparams.basic_params1()
  hparams.learning_rate = 1e-4
  hparams.add_hparam("init_mean_factor", 0.1)
  hparams.add_hparam("init_logstd", 0.1)
  hparams.add_hparam("policy_layers", (100, 100))
  hparams.add_hparam("value_layers", (100, 100))
  hparams.add_hparam("num_agents", 30)
  hparams.add_hparam("clipping_coef", 0.2)
  hparams.add_hparam("gae_gamma", 0.99)
  hparams.add_hparam("gae_lambda", 0.95)
  hparams.add_hparam("entropy_loss_coef", 0.01)
  hparams.add_hparam("value_loss_coef", 1)
  hparams.add_hparam("optimization_epochs", 15)
  hparams.add_hparam("epoch_length", 200)
  hparams.add_hparam("epochs_num", 2000)
  hparams.add_hparam("eval_every_epochs", 10)
  hparams.add_hparam("num_eval_agents", 3)
  hparams.add_hparam("video_during_eval", False)
  hparams.add_hparam("save_models_every_epochs", 30)
  hparams.add_hparam("optimization_batch_size", 50)
  hparams.add_hparam("max_gradients_norm", 0.5)
  hparams.add_hparam("simulated_environment", False)
  hparams.add_hparam("simulation_random_starts", False)
  hparams.add_hparam("intrinsic_reward_scale", 0.)
  return hparams
开发者ID:kltony,项目名称:tensor2tensor,代码行数:27,代码来源:rl.py


示例4: revnet_base

def revnet_base():
  """Default hparams for Revnet."""
  hparams = common_hparams.basic_params1()
  hparams.add_hparam('num_channels', [64, 128, 256, 416])
  hparams.add_hparam('num_layers_per_block', [1, 1, 10, 1])
  hparams.add_hparam('bottleneck', True)
  hparams.add_hparam('first_batch_norm', [False, True, True, True])
  hparams.add_hparam('init_stride', 2)
  hparams.add_hparam('init_kernel_size', 7)
  hparams.add_hparam('init_maxpool', True)
  hparams.add_hparam('strides', [1, 2, 2, 2])
  hparams.add_hparam('num_channels_init_block', 64)
  hparams.add_hparam('dim', '2d')

  # Variable init
  hparams.initializer = 'normal_unit_scaling'
  hparams.initializer_gain = 2.

  # Optimization
  hparams.optimizer = 'Momentum'
  hparams.optimizer_momentum_momentum = 0.9
  hparams.optimizer_momentum_nesterov = True
  hparams.weight_decay = 1e-4
  hparams.clip_grad_norm = 0.0
  # (base_lr=0.1) * (batch_size=128*8 (on TPU, or 8 GPUs)=1024) / (256.)
  hparams.learning_rate = 0.4
  hparams.learning_rate_decay_scheme = 'cosine'
  # For image_imagenet224, 120k training steps, which effectively makes this a
  # cosine decay (i.e. no cycles).
  hparams.learning_rate_cosine_cycle_steps = 120000

  # Can run with a batch size of 128 with Problem ImageImagenet224
  hparams.batch_size = 128
  return hparams
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:34,代码来源:revnet.py


示例5: testNeuralGPU

 def testNeuralGPU(self):
   hparams = common_hparams.basic_params1()
   batch_size = 3
   input_length = 5
   target_length = input_length
   input_vocab_size = 9
   target_vocab_size = 11
   p_hparams = problem_hparams.test_problem_hparams(input_vocab_size,
                                                    target_vocab_size)
   inputs = -1 + np.random.random_integers(
       input_vocab_size, size=(batch_size, input_length, 1, 1))
   targets = -1 + np.random.random_integers(
       target_vocab_size, size=(batch_size, target_length, 1, 1))
   with self.test_session() as session:
     features = {
         "inputs": tf.constant(inputs, dtype=tf.int32),
         "targets": tf.constant(targets, dtype=tf.int32)
     }
     model = neural_gpu.NeuralGPU(hparams, tf.estimator.ModeKeys.TRAIN,
                                  p_hparams)
     logits, _ = model(features)
     session.run(tf.global_variables_initializer())
     res = session.run(logits)
   self.assertEqual(res.shape, (batch_size, target_length, 1, 1,
                                target_vocab_size))
开发者ID:AranKomat,项目名称:tensor2tensor,代码行数:25,代码来源:neural_gpu_test.py


示例6: resnet_base

def resnet_base():
  """Set of hyperparameters."""
  # For imagenet on TPU:
  # Set train_steps=120000
  # Set eval_steps=48

  # Base
  hparams = common_hparams.basic_params1()

  # Model-specific parameters
  hparams.add_hparam("layer_sizes", [3, 4, 6, 3])
  hparams.add_hparam("filter_sizes", [64, 64, 128, 256, 512])
  hparams.add_hparam("block_fn", "bottleneck")
  hparams.add_hparam("use_nchw", True)

  # Variable init
  hparams.initializer = "normal_unit_scaling"
  hparams.initializer_gain = 2.

  # Optimization
  hparams.optimizer = "Momentum"
  hparams.optimizer_momentum_momentum = 0.9
  hparams.optimizer_momentum_nesterov = True
  hparams.weight_decay = 1e-4
  hparams.clip_grad_norm = 0.0
  # (base_lr=0.1) * (batch_size=128*8 (on TPU, or 8 GPUs)=1024) / (256.)
  hparams.learning_rate = 0.4
  hparams.learning_rate_decay_scheme = "cosine"
  # For image_imagenet224, 120k training steps, which effectively makes this a
  # cosine decay (i.e. no cycles).
  hparams.learning_rate_cosine_cycle_steps = 120000

  hparams.batch_size = 128
  return hparams
开发者ID:kltony,项目名称:tensor2tensor,代码行数:34,代码来源:resnet.py


示例7: bluenet_base

def bluenet_base():
  """Set of hyperparameters."""
  hparams = common_hparams.basic_params1()
  hparams.batch_size = 4096
  hparams.hidden_size = 256
  hparams.dropout = 0.2
  hparams.symbol_dropout = 0.5
  hparams.label_smoothing = 0.1
  hparams.clip_grad_norm = 2.0
  hparams.num_hidden_layers = 8
  hparams.kernel_height = 3
  hparams.kernel_width = 3
  hparams.learning_rate_decay_scheme = "exp10k"
  hparams.learning_rate = 0.05
  hparams.learning_rate_warmup_steps = 3000
  hparams.initializer_gain = 1.0
  hparams.weight_decay = 3.0
  hparams.num_sampled_classes = 0
  hparams.sampling_method = "argmax"
  hparams.optimizer_adam_epsilon = 1e-6
  hparams.optimizer_adam_beta1 = 0.85
  hparams.optimizer_adam_beta2 = 0.997
  hparams.add_hparam("anneal_until", 40000)
  hparams.add_hparam("batch_deviation_loss_factor", 5.0)
  return hparams
开发者ID:AranKomat,项目名称:tensor2tensor,代码行数:25,代码来源:bluenet.py


示例8: testSymbolModalityTargetsFactored

 def testSymbolModalityTargetsFactored(self):
   batch_size = 10
   num_datashards = 5
   length = 6
   height = 7
   hidden_size = 9
   vocab_size = 11
   model_hparams = common_hparams.basic_params1()
   model_hparams.factored_logits = True
   model_hparams.hidden_size = hidden_size
   model_hparams.mode = tf.estimator.ModeKeys.TRAIN
   body_output = -1 + np.random.random_integers(
       100, size=(batch_size, length, height, hidden_size))
   targets = -1 + np.random.random_integers(
       vocab_size, size=(batch_size, length, height, 1))
   m = modalities.SymbolModality(model_hparams, vocab_size)
   data_parallelism = expert_utils.Parallelism(
       ["/device:CPU:0"] * num_datashards)
   with self.test_session() as session:
     sharded_body_output = tf.split(tf.to_float(body_output), num_datashards)
     sharded_targets = tf.split(targets, num_datashards)
     sharded_logits = m.top_sharded(sharded_body_output, sharded_targets,
                                    data_parallelism)
     train_loss = m.loss_sharded(sharded_logits, sharded_targets,
                                 data_parallelism)
     logits = tf.concat(sharded_logits, 0)
     session.run(tf.global_variables_initializer())
     res1, res2 = session.run((logits, train_loss))
   self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size))
   self.assertEqual(res2.shape, ())
开发者ID:chqiwang,项目名称:tensor2tensor,代码行数:30,代码来源:modalities_test.py


示例9: ppo_base_v1

def ppo_base_v1():
  """Set of hyperparameters."""
  hparams = common_hparams.basic_params1()
  hparams.learning_rate = 1e-4
  hparams.add_hparam("init_mean_factor", 0.1)
  hparams.add_hparam("init_logstd", 0.1)
  hparams.add_hparam("policy_layers", (100, 100))
  hparams.add_hparam("value_layers", (100, 100))
  hparams.add_hparam("clipping_coef", 0.2)
  hparams.add_hparam("gae_gamma", 0.99)
  hparams.add_hparam("gae_lambda", 0.95)
  hparams.add_hparam("entropy_loss_coef", 0.01)
  hparams.add_hparam("value_loss_coef", 1)
  hparams.add_hparam("optimization_epochs", 15)
  hparams.add_hparam("epoch_length", 200)
  hparams.add_hparam("epochs_num", 2000)
  hparams.add_hparam("eval_every_epochs", 10)
  hparams.add_hparam("save_models_every_epochs", 30)
  hparams.add_hparam("optimization_batch_size", 50)
  hparams.add_hparam("max_gradients_norm", 0.5)
  hparams.add_hparam("intrinsic_reward_scale", 0.)
  hparams.add_hparam("logits_clip", 0.0)
  hparams.add_hparam("dropout_ppo", 0.1)
  hparams.add_hparam("effective_num_agents", None)
  return hparams
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:25,代码来源:rl.py


示例10: shakeshake_cifar10

def shakeshake_cifar10():
  """Parameters for CIFAR-10."""
  tf.logging.warning("shakeshake_cifar10 hparams have not been verified to "
                     "achieve good performance.")
  hparams = common_hparams.basic_params1()
  # This leads to effective batch size 128 when number of GPUs is 1
  hparams.batch_size = 4096 * 8
  hparams.hidden_size = 16
  hparams.dropout = 0
  hparams.label_smoothing = 0.0
  hparams.clip_grad_norm = 2.0
  hparams.num_hidden_layers = 26
  hparams.kernel_height = -1  # Unused
  hparams.kernel_width = -1  # Unused
  hparams.learning_rate_decay_scheme = "cosine"
  # Model should be run for 700000 steps with batch size 128 (~1800 epochs)
  hparams.learning_rate_cosine_cycle_steps = 700000
  hparams.learning_rate = 0.2
  hparams.learning_rate_warmup_steps = 3000
  hparams.initializer = "uniform_unit_scaling"
  hparams.initializer_gain = 1.0
  # TODO(rshin): Adjust so that effective value becomes ~1e-4
  hparams.weight_decay = 3.0
  hparams.optimizer = "Momentum"
  hparams.optimizer_momentum_momentum = 0.9
  hparams.add_hparam("base_filters", 16)
  hparams.add_hparam("shakeshake_type", "batch")
  return hparams
开发者ID:zeyu-h,项目名称:tensor2tensor,代码行数:28,代码来源:shake_shake.py


示例11: testSymbolTupleModalityInputs

 def testSymbolTupleModalityInputs(self):
   """Adapted from tensor2tensor/layers/modalities_test.py."""
   batch_size = 10
   num_datashards = 5
   length = 5
   vocab_size = [2000, 500, 2500]
   hidden_size = 9
   model_hparams = common_hparams.basic_params1()
   model_hparams.hidden_size = hidden_size
   model_hparams.mode = tf.estimator.ModeKeys.TRAIN
   x = np.stack([
       -1 + np.random.random_integers(
           vocab_size[i], size=(batch_size, length, 1))
       for i in range(len(vocab_size))
   ], axis=3)
   m = modalities.SymbolTupleModality(model_hparams, vocab_size)
   data_parallelism = expert_utils.Parallelism(
       ['/device:CPU:0'] * num_datashards)
   with self.test_session() as session:
     xs = tf.split(x, num_datashards)
     sharded_output = m.bottom_sharded(xs, data_parallelism)
     output = tf.concat(sharded_output, 0)
     session.run(tf.global_variables_initializer())
     res = session.run(output)
   self.assertEqual(res.shape, (batch_size, length, 1, hidden_size))
开发者ID:cghawthorne,项目名称:magenta,代码行数:25,代码来源:modalities_test.py


示例12: attention_lm_base

def attention_lm_base():
  """Set of hyperparameters."""
  hparams = common_hparams.basic_params1()
  hparams.hidden_size = 1024
  hparams.batch_size = 8192
  hparams.max_length = 256
  hparams.dropout = 0.0
  hparams.clip_grad_norm = 0.  # i.e. no gradient clipping
  hparams.optimizer_adam_epsilon = 1e-9
  hparams.learning_rate_decay_scheme = "noam"
  hparams.learning_rate = 0.1
  hparams.learning_rate_warmup_steps = 2000
  hparams.initializer_gain = 1.0
  hparams.num_hidden_layers = 6
  hparams.initializer = "uniform_unit_scaling"
  hparams.weight_decay = 0.0
  hparams.optimizer_adam_beta1 = 0.9
  hparams.optimizer_adam_beta2 = 0.98
  hparams.label_smoothing = 0.0
  hparams.shared_embedding_and_softmax_weights = False

  hparams.add_hparam("filter_size", 4096)  # Add new ones like this.
  # attention-related flags
  hparams.add_hparam("num_heads", 8)
  hparams.add_hparam("attention_key_channels", 0)
  hparams.add_hparam("attention_value_channels", 0)
  # All hyperparameters ending in "dropout" are automatically set to 0.0
  # when not in training mode.
  hparams.add_hparam("attention_dropout", 0.0)
  hparams.add_hparam("relu_dropout", 0.0)
  hparams.add_hparam("pos", "timing")  # timing, none
  hparams.add_hparam("encoder_full_attention", False)
  return hparams
开发者ID:zeyu-h,项目名称:tensor2tensor,代码行数:33,代码来源:attention_lm.py


示例13: my_very_own_hparams

def my_very_own_hparams():
  # Start with the base set
  hp = common_hparams.basic_params1()
  # Modify existing hparams
  hp.num_hidden_layers = 2
  # Add new hparams
  hp.add_hparam("filter_size", 2048)
  return hp
开发者ID:kltony,项目名称:tensor2tensor,代码行数:8,代码来源:my_submodule.py


示例14: lstm_seq2seq

def lstm_seq2seq():
  """hparams for LSTM."""
  hparams = common_hparams.basic_params1()
  hparams.daisy_chain_variables = False
  hparams.batch_size = 1024
  hparams.hidden_size = 128
  hparams.num_hidden_layers = 2
  hparams.initializer = "uniform_unit_scaling"
  hparams.initializer_gain = 1.0
  hparams.weight_decay = 0.0
  return hparams
开发者ID:chqiwang,项目名称:tensor2tensor,代码行数:11,代码来源:lstm.py


示例15: resnet_base

def resnet_base():
  """Set of hyperparameters."""
  hparams = common_hparams.basic_params1()
  hparams.add_hparam("layer_sizes", [3, 4, 6, 3])
  hparams.add_hparam("use_nchw", True)
  hparams.add_hparam("num_filters", [64, 128, 256, 512])
  hparams.add_hparam("strides", [1, 2, 2, 2])

  # Can run with a batch size of 128 with Problem ImageImagenet224
  hparams.tpu_batch_size_per_shard = 128
  return hparams
开发者ID:zeyu-h,项目名称:tensor2tensor,代码行数:11,代码来源:resnet.py


示例16: basic_fc_small

def basic_fc_small():
  """Small fully connected model."""
  hparams = common_hparams.basic_params1()
  hparams.learning_rate = 0.1
  hparams.batch_size = 128
  hparams.hidden_size = 256
  hparams.num_hidden_layers = 2
  hparams.initializer = "uniform_unit_scaling"
  hparams.initializer_gain = 1.0
  hparams.weight_decay = 0.0
  hparams.dropout = 0.0
  return hparams
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:12,代码来源:basic.py


示例17: vanilla_gan

def vanilla_gan():
  """Basic parameters for a vanilla_gan."""
  hparams = common_hparams.basic_params1()

  hparams.batch_size = 32
  hparams.label_smoothing = 0.0
  hparams.add_hparam("hidden_dim", 128)
  hparams.add_hparam("random_sample_size", 100)
  hparams.add_hparam("height", 28)
  hparams.add_hparam("width", 28)
  hparams.add_hparam("epsilon", 1e-4)
  return hparams
开发者ID:chqiwang,项目名称:tensor2tensor,代码行数:12,代码来源:vanilla_gan.py


示例18: transformer_base_v1

def transformer_base_v1():
  """Set of hyperparameters."""
  hparams = common_hparams.basic_params1()
  hparams.norm_type = "layer"
  hparams.hidden_size = 512
  hparams.batch_size = 4096
  hparams.max_length = 256
  hparams.clip_grad_norm = 0.  # i.e. no gradient clipping
  hparams.optimizer_adam_epsilon = 1e-9
  hparams.learning_rate_schedule = "legacy"
  hparams.learning_rate_decay_scheme = "noam"
  hparams.learning_rate = 0.1
  hparams.learning_rate_warmup_steps = 4000
  hparams.initializer_gain = 1.0
  hparams.num_hidden_layers = 6
  hparams.initializer = "uniform_unit_scaling"
  hparams.weight_decay = 0.0
  hparams.optimizer_adam_beta1 = 0.9
  hparams.optimizer_adam_beta2 = 0.98
  hparams.num_sampled_classes = 0
  hparams.label_smoothing = 0.1
  hparams.shared_embedding_and_softmax_weights = True
  hparams.symbol_modality_num_shards = 16

  # Add new ones like this.
  hparams.add_hparam("filter_size", 2048)
  # Layer-related flags. If zero, these fall back on hparams.num_hidden_layers.
  hparams.add_hparam("num_encoder_layers", 0)
  hparams.add_hparam("num_decoder_layers", 0)
  # Attention-related flags.
  hparams.add_hparam("num_heads", 8)
  hparams.add_hparam("attention_key_channels", 0)
  hparams.add_hparam("attention_value_channels", 0)
  hparams.add_hparam("ffn_layer", "dense_relu_dense")
  hparams.add_hparam("parameter_attention_key_channels", 0)
  hparams.add_hparam("parameter_attention_value_channels", 0)
  # All hyperparameters ending in "dropout" are automatically set to 0.0
  # when not in training mode.
  hparams.add_hparam("attention_dropout", 0.0)
  hparams.add_hparam("attention_dropout_broadcast_dims", "")
  hparams.add_hparam("relu_dropout", 0.0)
  hparams.add_hparam("relu_dropout_broadcast_dims", "")
  hparams.add_hparam("pos", "timing")  # timing, none
  hparams.add_hparam("nbr_decoder_problems", 1)
  hparams.add_hparam("proximity_bias", False)
  hparams.add_hparam("use_pad_remover", True)
  hparams.add_hparam("self_attention_type", "dot_product")
  hparams.add_hparam("max_relative_position", 0)
  return hparams
开发者ID:chqiwang,项目名称:tensor2tensor,代码行数:49,代码来源:transformer.py


示例19: transformer_symshard_base

def transformer_symshard_base():
  """Set of hyperparameters."""
  hparams = common_hparams.basic_params1()
  hparams.hidden_size = 256
  hparams.batch_size = 2048
  hparams.max_length = 0
  # All hyperparameters ending in "dropout" are automatically set to 0.0
  # when not in training mode.
  hparams.layer_prepostprocess_dropout = 0.2
  hparams.add_hparam("attention_dropout", 0.1)
  hparams.add_hparam("relu_dropout", 0.0)
  hparams.add_hparam("relu_dropout_broadcast_dims", "1")
  hparams.layer_prepostprocess_dropout = 0.1
  hparams.layer_prepostprocess_dropout_broadcast_dims = "1"  # length
  hparams.label_smoothing = 0.1
  hparams.clip_grad_norm = 0.  # i.e. no gradient clipping
  hparams.optimizer = "Adafactor"
  hparams.learning_rate_schedule = "rsqrt_decay"
  hparams.learning_rate_warmup_steps = 10000
  hparams.initializer_gain = 1.0
  hparams.initializer = "uniform_unit_scaling"
  hparams.weight_decay = 0.0
  # TODO(noam): use this to control sharing.  We now share always
  hparams.shared_embedding_and_softmax_weights = True
  # we only want one data shard.
  hparams.no_data_parallelism = True
  # bypass the symbol modality so that we can use model parallelism.
  hparams.modality = {
      "inputs": modalities.IdentitySymbolModality,
      "targets": modalities.IdentitySymbolModality,
  }
  hparams.add_hparam("filter_size", 1280)
  hparams.add_hparam("mix_fraction", 0.5)
  # attention-related flags
  hparams.add_hparam("multihead_attention_num_heads", 4)
  hparams.add_hparam("multihead_attention_key_channels", 0)
  hparams.add_hparam("multihead_attention_value_channels", 0)
  hparams.add_hparam("pos", "timing")  # timing, none
  hparams.add_hparam(
      "encoder_layers", ("n,att,m,d,a," "n,ffn,m,d,a,") * 6 + "n,d")
  hparams.add_hparam(
      "decoder_layers",
      ("n,att,m,d,a," "n,enc-att,m,d,a," "n,ffn,m,d,a,") * 6 + "n,d")
  # Number of model shards - each one has separate parameters.
  # Changing this number invalidates checkpoints.
  hparams.add_hparam("num_model_shards", 8)
  return hparams
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:47,代码来源:transformer_symshard.py


示例20: mtf_image_transformer_base

def mtf_image_transformer_base():
  """Set of hyperparameters."""
  hparams = common_hparams.basic_params1()
  hparams.no_data_parallelism = True
  hparams.use_fixed_batch_size = True
  hparams.batch_size = 1
  hparams.max_length = 3072
  hparams.hidden_size = 256
  hparams.label_smoothing = 0.0
  # 8-way model-parallelism
  hparams.add_hparam("mesh_shape", "batch:8")
  hparams.add_hparam("layout", "batch:batch")
  hparams.add_hparam("mtf_mode", True)
  hparams.add_hparam("num_heads", 8)
  hparams.add_hparam("filter_size", 1024)
  hparams.add_hparam("num_encoder_layers", 0)
  hparams.add_hparam("num_decoder_layers", 6)
  hparams.add_hparam("attention_key_size", 256)
  hparams.add_hparam("attention_value_size", 256)
  # Share weights between input and target embeddings
  hparams.shared_embedding = True

  # mixture of experts hparams
  hparams.add_hparam("ffn_layer", "dense_relu_dense")
  hparams.add_hparam("moe_overhead_train", 1.0)
  hparams.add_hparam("moe_overhead_eval", 2.0)
  hparams.moe_num_experts = 16
  hparams.moe_loss_coef = 1e-3

  hparams.shared_embedding_and_softmax_weights = True
  hparams.optimizer = "Adafactor"
  hparams.learning_rate_schedule = "rsqrt_decay"
  hparams.learning_rate_warmup_steps = 10000
  hparams.add_hparam("d_kv", 64)
  hparams.add_hparam("d_ff", 2048)

  # Image related hparams
  hparams.add_hparam("img_len", 32)
  hparams.add_hparam("num_channels", 3)
  hparams.add_hparam("unconditional", True)

  # Local Attention related params
  hparams.add_hparam("block_length", 128)
  hparams.add_hparam("block_height", 16)
  hparams.add_hparam("block_width", 16)
  hparams.add_hparam("attention_type", "local1d")
  return hparams
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:47,代码来源:mtf_image_transformer.py



注:本文中的tensor2tensor.layers.common_hparams.basic_params1函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python common_layers.flatten4d3d函数代码示例发布时间:2022-05-27
下一篇:
Python generator_utils.maybe_download函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap