def xavier(uniform=True, seed=None, dtype=tf.float32):
""" Xavier.
Returns an initializer performing "Xavier" initialization for weights.
This initializer is designed to keep the scale of the gradients roughly the
same in all layers. In uniform distribution this ends up being the range:
`x = sqrt(6. / (in + out)); [-x, x]` and for normal distribution a standard
deviation of `sqrt(3. / (in + out))` is used.
Arguments:
uniform: Whether to use uniform or normal distributed random
initialization.
seed: A Python integer. Used to create random seeds. See
`set_random_seed` for behavior.
dtype: The data type. Only floating point types are supported.
Returns:
An initializer for a weight matrix.
References:
Understanding the difficulty of training deep feedforward neural
networks. International conference on artificial intelligence and
statistics. Xavier Glorot and Yoshua Bengio (2010).
Links:
[http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf]
(http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf)
"""
return xavier_initializer(uniform=uniform, seed=seed, dtype=dtype)
def dnn_sampled_softmax_classifier_model_fn(features, target_indices,
mode, params):
"""model_fn that uses candidate sampling.
Args:
features: Single Tensor or dict of Tensor (depends on data passed to `fit`)
target_indices: A single Tensor of shape [batch_size, n_labels] containing
the target indices.
mode: Represents if this training, evaluation or prediction. See `ModeKeys`.
params: A dict of hyperparameters that are listed below.
hidden_units- List of hidden units per layer. All layers are fully
connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
has 32.
feature_columns- An iterable containing all the feature columns used by
the model. All items in the set should be instances of classes derived
from `FeatureColumn`.
n_classes- number of target classes. It must be greater than 2.
n_samples- number of sample target classes. Needs to be tuned - A good
starting point could be 2% of n_classes.
n_labels- number of labels in each example.
top_k- The number of classes to predict.
optimizer- An instance of `tf.Optimizer` used to train the model. If
`None`, will use an Adagrad optimizer.
dropout- When not `None`, the probability we will drop out a given
coordinate.
gradient_clip_norm- A float > 0. If provided, gradients are
clipped to their global norm with this clipping ratio. See
tf.clip_by_global_norm for more details.
num_ps_replicas- The number of parameter server replicas.
Returns:
predictions: A single Tensor or a dict of Tensors.
loss: A scalar containing the loss of the step.
train_op: The op for training.
"""
hidden_units = params["hidden_units"]
feature_columns = params["feature_columns"]
n_classes = params["n_classes"]
n_samples = params["n_samples"]
n_labels = params["n_labels"]
top_k = params["top_k"]
optimizer = params["optimizer"]
dropout = params["dropout"]
gradient_clip_norm = params["gradient_clip_norm"]
num_ps_replicas = params["num_ps_replicas"]
parent_scope = "dnn_ss"
# Setup the input layer partitioner.
input_layer_partitioner = (
partitioned_variables.min_max_variable_partitioner(
max_partitions=num_ps_replicas,
min_slice_size=64 << 20))
# Create the input layer.
with variable_scope.variable_scope(
parent_scope + "/input_from_feature_columns",
features.values(),
partitioner=input_layer_partitioner) as scope:
net = layers.input_from_feature_columns(
features,
feature_columns,
weight_collections=[parent_scope],
scope=scope)
# Setup the hidden layer partitioner.
hidden_layer_partitioner = (
partitioned_variables.min_max_variable_partitioner(
max_partitions=num_ps_replicas))
final_hidden_layer_dim = None
# Create hidden layers using fully_connected.
for layer_id, num_hidden_units in enumerate(hidden_units):
with variable_scope.variable_scope(
parent_scope + "/hiddenlayer_%d" % layer_id, [net],
partitioner=hidden_layer_partitioner) as scope:
net = layers.fully_connected(net,
num_hidden_units,
variables_collections=[parent_scope],
scope=scope)
final_hidden_layer_dim = num_hidden_units
# Add dropout if it is enabled.
if dropout is not None and mode == estimator.ModeKeys.TRAIN:
net = layers.dropout(net, keep_prob=(1.0 - dropout))
# Create the weights and biases for the logit layer.
with variable_scope.variable_scope(
parent_scope + "/logits", [net],
partitioner=hidden_layer_partitioner) as scope:
dtype = net.dtype.base_dtype
weights_shape = [n_classes, final_hidden_layer_dim]
weights = variables.model_variable(
"weights",
shape=weights_shape,
dtype=dtype,
initializer=initializers.xavier_initializer(),
trainable=True,
collections=[parent_scope])
biases = variables.model_variable(
#.........这里部分代码省略.........
def fully_connected(x,
num_output_units,
activation_fn=None,
weight_init=initializers.xavier_initializer(),
bias_init=standard_ops.constant_initializer(0.),
name=None,
weight_collections=(ops.GraphKeys.WEIGHTS,),
bias_collections=(ops.GraphKeys.BIASES,),
output_collections=(ops.GraphKeys.ACTIVATIONS,),
weight_regularizer=None,
bias_regularizer=None):
"""Adds the parameters for a fully connected layer and returns the output.
A fully connected layer is generally defined as a matrix multiply:
`y = f(w * x + b)` where `f` is given by `activation_fn`. If
`activation_fn` is `None`, the result of `y = w * x + b` is
returned.
This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting
`bias_init` to `None`.
The variable creation is compatible with `tf.variable_scope` and so can be
reused with `tf.variable_scope` or `tf.make_template`.
Most of the details of variable creation can be controlled by specifying the
initializers (`weight_init` and `bias_init`) and which in collections to place
the created variables (`weight_collections` and `bias_collections`; note that
the variables are always added to the `VARIABLES` collection). The output of
the layer can be placed in custom collections using `output_collections`.
The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`,
respectively.
A per layer regularization can be specified by setting `weight_regularizer`
and `bias_regularizer`, which are applied to the weights and biases
respectively, and whose output is added to the `REGULARIZATION_LOSSES`
collection.
Args:
x: The input `Tensor`.
num_output_units: The size of the output.
activation_fn: A function that requires a single Tensor that is applied as a
non-linearity. If None is used, do not apply any activation.
weight_init: An optional weight initialization, defaults to
`xavier_initializer`.
bias_init: An initializer for the bias, defaults to 0. Set to `None` in
order to disable bias.
name: The name for this operation is used to name operations and to find
variables. If specified it must be unique for this scope, otherwise a
unique name starting with "fully_connected" will be created. See
`tf.variable_op_scope` for details.
weight_collections: List of graph collections to which weights are added.
bias_collections: List of graph collections to which biases are added.
output_collections: List of graph collections to which outputs are added.
weight_regularizer: A regularizer like the result of
`l1_regularizer` or `l2_regularizer`. Used for weights.
bias_regularizer: A regularizer like the result of
`l1_regularizer` or `l2_regularizer`. Used for biases.
Returns:
The output of the fully connected layer.
"""
with variable_scope.variable_op_scope([x], name, 'fully_connected'):
num_input_units = x.get_shape().dims[1].value
dtype = x.dtype.base_dtype
w = _weight_variable(shape=[num_input_units, num_output_units],
dtype=dtype,
initializer=weight_init,
collections=weight_collections,
regularizer=weight_regularizer)
y = standard_ops.matmul(x, w)
if bias_init is not None:
b = _bias_variable(shape=[num_output_units],
dtype=dtype,
initializer=bias_init,
collections=bias_collections,
regularizer=bias_regularizer)
y = nn.bias_add(y, b)
return _apply_activation(y, activation_fn, output_collections)
def depthwise_convolution2d(
inputs,
kernel_size,
depth_multiplier=1,
stride=1,
padding='SAME',
rate=1,
activation_fn=nn.relu,
normalizer_fn=None,
normalizer_params=None,
weights_initializer=initializers.xavier_initializer(),
weights_regularizer=None,
biases_initializer=init_ops.zeros_initializer(),
biases_regularizer=None,
reuse=None,
variables_collections=None,
outputs_collections=None,
trainable=True,
data_format='NHWC',
scope=None):
"""Adds a depthwise 2D convolution with optional batch_norm layer.
This op performs a depthwise convolution that acts separately on
channels, creating a variable called `depthwise_weights`. Then,
if `normalizer_fn` is None,
it adds bias to the result, creating a variable called 'biases', otherwise,
the `normalizer_fn` is applied. It finally applies an activation function
to produce the end result.
Args:
inputs: A tensor of size [batch_size, height, width, channels].
num_outputs: The number of pointwise convolution output filters. If is
None, then we skip the pointwise convolution stage.
kernel_size: A list of length 2: [kernel_height, kernel_width] of
of the filters. Can be an int if both values are the same.
depth_multiplier: The number of depthwise convolution output channels for
each input channel. The total number of depthwise convolution output
channels will be equal to `num_filters_in * depth_multiplier`.
stride: A list of length 2: [stride_height, stride_width], specifying the
depthwise convolution stride. Can be an int if both strides are the same.
padding: One of 'VALID' or 'SAME'.
rate: A list of length 2: [rate_height, rate_width], specifying the dilation
rates for atrous convolution. Can be an int if both rates are the same.
If any value is larger than one, then both stride values need to be one.
activation_fn: Activation function. The default value is a ReLU function.
Explicitly set it to None to skip it and maintain a linear activation.
normalizer_fn: Normalization function to use instead of `biases`. If
`normalizer_fn` is provided then `biases_initializer` and
`biases_regularizer` are ignored and `biases` are not created nor added.
default set to None for no normalizer function
normalizer_params: Normalization function parameters.
weights_initializer: An initializer for the weights.
weights_regularizer: Optional regularizer for the weights.
biases_initializer: An initializer for the biases. If None skip biases.
biases_regularizer: Optional regularizer for the biases.
reuse: Whether or not the layer and its variables should be reused. To be
able to reuse the layer scope must be given.
variables_collections: Optional list of collections for all the variables or
a dictionary containing a different list of collection per variable.
outputs_collections: Collection to add the outputs.
trainable: Whether or not the variables should be trainable or not.
scope: Optional scope for variable_scope.
Returns:
A `Tensor` representing the output of the operation.
"""
with variable_scope.variable_scope(scope, 'DepthwiseConv2d', [inputs],
reuse=reuse) as sc:
inputs = ops.convert_to_tensor(inputs)
# Actually apply depthwise conv instead of separable conv.
dtype = inputs.dtype.base_dtype
kernel_h, kernel_w = utils.two_element_tuple(kernel_size)
stride_h, stride_w = utils.two_element_tuple(stride)
if data_format == 'NHWC':
num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4)
strides = [1, stride_h, stride_w, 1]
else:
num_filters_in = inputs.get_shape().as_list()[1]
strides = [1, 1, stride_h, stride_w]
weights_collections = utils.get_variable_collections(
variables_collections, 'weights')
# Depthwise weights variable.
depthwise_shape = [kernel_h, kernel_w,
num_filters_in, depth_multiplier]
depthwise_weights = variables.model_variable(
'depthwise_weights',
shape=depthwise_shape,
dtype=dtype,
initializer=weights_initializer,
regularizer=weights_regularizer,
trainable=trainable,
collections=weights_collections)
outputs = nn.depthwise_conv2d(inputs, depthwise_weights,
strides, padding,
rate=utils.two_element_tuple(rate),
data_format=data_format)
num_outputs = depth_multiplier * num_filters_in
if normalizer_fn is not None:
normalizer_params = normalizer_params or {}
#.........这里部分代码省略.........
def legacy_fully_connected(x,
num_output_units,
activation_fn=None,
weight_init=initializers.xavier_initializer(),
bias_init=init_ops.zeros_initializer,
name=None,
weight_collections=(ops.GraphKeys.WEIGHTS,),
bias_collections=(ops.GraphKeys.BIASES,),
output_collections=(ops.GraphKeys.ACTIVATIONS,),
trainable=True,
weight_regularizer=None,
bias_regularizer=None):
# pylint: disable=anomalous-backslash-in-string
r"""Adds the parameters for a fully connected layer and returns the output.
A fully connected layer is generally defined as a matrix multiply:
`y = f(w * x + b)` where `f` is given by `activation_fn`. If
`activation_fn` is `None`, the result of `y = w * x + b` is
returned.
If `x` has shape [\\\(\\text{dim}_0, \\text{dim}_1, ..., \\text{dim}_n\\\)]
with more than 2 dimensions (\\\(n > 1\\\)), then we repeat the matrix
multiply along the first dimensions. The result r is a tensor of shape
[\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`],
where \\\( r_{i_0, ..., i_{n-1}, k} =
\\sum_{0 \\leq j < \\text{dim}_n} x_{i_0, ... i_{n-1}, j} \cdot w_{j, k}\\\).
This is accomplished by reshaping `x` to 2-D
[\\\(\\text{dim}_0 \\cdot ... \\cdot \\text{dim}_{n-1}, \\text{dim}_n\\\)]
before the matrix multiply and afterwards reshaping it to
[\\\(\\text{dim}_0, ..., \\text{dim}_{n-1},\\\) `num_output_units`].
This op creates `w` and optionally `b`. Bias (`b`) can be disabled by setting
`bias_init` to `None`.
The variable creation is compatible with `tf.variable_scope` and so can be
reused with `tf.variable_scope` or `tf.make_template`.
Most of the details of variable creation can be controlled by specifying the
initializers (`weight_init` and `bias_init`) and in which collections to place
the created variables (`weight_collections` and `bias_collections`; note that
the variables are always added to the `VARIABLES` collection). The output of
the layer can be placed in custom collections using `output_collections`.
The collections arguments default to `WEIGHTS`, `BIASES` and `ACTIVATIONS`,
respectively.
A per layer regularization can be specified by setting `weight_regularizer`
and `bias_regularizer`, which are applied to the weights and biases
respectively, and whose output is added to the `REGULARIZATION_LOSSES`
collection.
Args:
x: The input `Tensor`.
num_output_units: The size of the output.
activation_fn: A function that requires a single Tensor that is applied as a
non-linearity. If None is used, do not apply any activation.
weight_init: An optional weight initialization, defaults to
`xavier_initializer`.
bias_init: An initializer for the bias, defaults to 0. Set to `None` in
order to disable bias.
name: The name for this operation is used to name operations and to find
variables. If specified it must be unique for this scope, otherwise a
unique name starting with "fully_connected" will be created. See
`tf.variable_op_scope` for details.
weight_collections: List of graph collections to which weights are added.
bias_collections: List of graph collections to which biases are added.
output_collections: List of graph collections to which outputs are added.
trainable: If `True` also add variables to the graph collection
`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
weight_regularizer: A regularizer like the result of
`l1_regularizer` or `l2_regularizer`. Used for weights.
bias_regularizer: A regularizer like the result of
`l1_regularizer` or `l2_regularizer`. Used for biases.
Returns:
The output of the fully connected layer.
Raises:
ValueError: if x has rank less than 2 or if its last dimension is not set.
"""
with variable_scope.variable_op_scope([x], name, 'fully_connected'):
dims = x.get_shape().dims
if dims is None:
raise ValueError('dims of x must be known but is None')
if len(dims) < 2:
raise ValueError('rank of x must be at least 2 not: %d' % len(dims))
num_input_units = dims[-1].value
if num_input_units is None:
raise ValueError('last dimension of x must be known but is None')
dtype = x.dtype.base_dtype
weight_collections = set(list(weight_collections or []) +
[ops.GraphKeys.VARIABLES])
w = variable_scope.get_variable('weights',
shape=[num_input_units, num_output_units],
dtype=dtype,
initializer=weight_init,
collections=weight_collections,
regularizer=weight_regularizer,
trainable=trainable)
x_2_dim = x if len(dims) <= 2 else array_ops.reshape(x,
[-1, num_input_units])
y = standard_ops.matmul(x_2_dim, w)
if bias_init is not None:
bias_collections = set(list(bias_collections or []) +
[ops.GraphKeys.VARIABLES])
b = variable_scope.get_variable('bias',
shape=[num_output_units],
dtype=dtype,
#.........这里部分代码省略.........
def convolution2d(inputs,
num_outputs,
kernel_size,
stride=1,
padding='SAME',
activation_fn=nn.relu,
normalizer_fn=None,
normalizer_params=None,
weights_initializer=initializers.xavier_initializer(),
weights_regularizer=None,
biases_initializer=init_ops.zeros_initializer,
biases_regularizer=None,
reuse=None,
variables_collections=None,
outputs_collections=None,
trainable=True,
scope=None):
"""Adds a 2D convolution followed by an optional batch_norm layer.
`convolution2d` creates a variable called `weights`, representing the
convolutional kernel, that is convolved with the `inputs` to produce a
`Tensor` of activations. If a `normalizer_fn` is provided (such as
`batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
None and a `biases_initializer` is provided then a `biases` variable would be
created and added the activations. Finally, if `activation_fn` is not `None`,
it is applied to the activations as well.
Args:
inputs: a 4-D tensor `[batch_size, height, width, channels]`.
num_outputs: integer, the number of output filters.
kernel_size: a list of length 2 `[kernel_height, kernel_width]` of
of the filters. Can be an int if both values are the same.
stride: a list of length 2 `[stride_height, stride_width]`.
Can be an int if both strides are the same. Note that presently
both strides must have the same value.
padding: one of `VALID` or `SAME`.
activation_fn: activation function.
normalizer_fn: normalization function to use instead of `biases`. If
`normalize_fn` is provided then `biases_initializer` and
`biases_regularizer` are ignored and `biases` are not created nor added.
normalizer_params: normalization function parameters.
weights_initializer: An initializer for the weights.
weights_regularizer: Optional regularizer for the weights.
biases_initializer: An initializer for the biases. If None skip biases.
biases_regularizer: Optional regularizer for the biases.
reuse: whether or not the layer and its variables should be reused. To be
able to reuse the layer scope must be given.
variables_collections: optional list of collections for all the variables or
a dictionay containing a different list of collection per variable.
outputs_collections: collection to add the outputs.
trainable: If `True` also add variables to the graph collection
`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
scope: Optional scope for `variable_op_scope`.
Returns:
a tensor representing the output of the operation.
"""
with variable_scope.variable_op_scope([inputs],
scope, 'Conv', reuse=reuse) as sc:
dtype = inputs.dtype.base_dtype
kernel_h, kernel_w = utils.two_element_tuple(kernel_size)
stride_h, stride_w = utils.two_element_tuple(stride)
num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4)
weights_shape = [kernel_h, kernel_w,
num_filters_in, num_outputs]
weights_collections = utils.get_variable_collections(
variables_collections, 'weights')
weights = variables.model_variable('weights',
shape=weights_shape,
dtype=dtype,
initializer=weights_initializer,
regularizer=weights_regularizer,
collections=weights_collections,
trainable=trainable)
outputs = nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1],
padding=padding)
if normalizer_fn:
normalizer_params = normalizer_params or {}
outputs = normalizer_fn(outputs, **normalizer_params)
else:
if biases_initializer is not None:
biases_collections = utils.get_variable_collections(
variables_collections, 'biases')
biases = variables.model_variable('biases',
shape=[num_outputs,],
dtype=dtype,
initializer=biases_initializer,
regularizer=biases_regularizer,
collections=biases_collections,
trainable=trainable)
outputs = nn.bias_add(outputs, biases)
if activation_fn:
outputs = activation_fn(outputs)
return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
请发表评论