本文整理汇总了Python中pylearn2.compat.OrderedDict类的典型用法代码示例。如果您正苦于以下问题:Python OrderedDict类的具体用法?Python OrderedDict怎么用?Python OrderedDict使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了OrderedDict类的17个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: get_gradients
def get_gradients(self, model, data, ** kwargs):
indiv_results = []
composite_specs, mapping = self.get_composite_specs_and_mapping(model)
nested_data = mapping.nest(data)
for cost, cost_data in safe_zip(self.costs, nested_data):
result = cost.get_gradients(model, cost_data, ** kwargs)
indiv_results.append(result)
grads = OrderedDict()
updates = OrderedDict()
params = model.get_params()
for coeff, packed in zip(self.coeffs, indiv_results):
g, u = packed
for param in g:
if param not in params:
raise ValueError("A shared variable (" +
str(param) +
") that is not a parameter appeared "
"a cost gradient dictionary.")
for param in g:
assert param.ndim == g[param].ndim
v = coeff * g[param]
if param not in grads:
grads[param] = v
else:
grads[param] = grads[param] + v
assert grads[param].ndim == param.ndim
assert not any([state in updates for state in u])
assert not any([state in params for state in u])
updates.update(u)
return grads, updates
开发者ID:nitbix,项目名称:pylearn2,代码行数:33,代码来源:cost.py
示例2: get_monitoring_channels
def get_monitoring_channels(self, model, data, ** kwargs):
self.get_data_specs(model)[0].validate(data)
rval = OrderedDict()
composite_specs, mapping = self.get_composite_specs_and_mapping(model)
nested_data = mapping.nest(data)
for i, cost in enumerate(self.costs):
cost_data = nested_data[i]
try:
channels = cost.get_monitoring_channels(model, cost_data,
**kwargs)
rval.update(channels)
except TypeError:
reraise_as(Exception('SumOfCosts.get_monitoring_channels '
'encountered TypeError while calling {0}'
'.get_monitoring_channels'.format(
type(cost))))
value = cost.expr(model, cost_data, ** kwargs)
if value is not None:
name = ''
if hasattr(value, 'name') and value.name is not None:
name = '_' + value.name
rval['term_' + str(i) + name] = value
return rval
开发者ID:nitbix,项目名称:pylearn2,代码行数:26,代码来源:cost.py
示例3: get_gradients
def get_gradients(self, model, data, **kwargs):
cost = self._cost(model, data, **kwargs)
params = list(model.get_params())
grads = T.grad(cost, params, disconnected_inputs='ignore',
consider_constant=[self.sampler.particles])
gradients = OrderedDict(izip(params, grads))
updates = OrderedDict()
sampler_updates = self.sampler.updates()
updates.update(sampler_updates)
return gradients, updates
开发者ID:ASAPPinc,项目名称:pylearn2,代码行数:15,代码来源:ebm_estimation.py
示例4: __init__
def __init__(self, dim, layer_name, irange, indices=None,
init_bias=0., svd=True, nonlinearity=tensor.tanh):
self.rnn_friendly = True
self._scan_updates = OrderedDict()
self.__dict__.update(locals())
del self.self
super(Recurrent, self).__init__()
开发者ID:MarCnu,项目名称:pylearn2,代码行数:7,代码来源:rnn.py
示例5: __init__
def __init__(self, base_learning_rule, decay=0.9):
self.base = base_learning_rule
# hack to allow MomentumAdjustor to access momentum value
if hasattr(self.base, 'momentum'):
self.momentum = self.base.momentum
self.decay = decay
self.mean_updates = OrderedDict()
开发者ID:Neuroglycerin,项目名称:neukrill-net-tools,代码行数:7,代码来源:update_norm_monitor.py
示例6: __init__
def __init__(self, dim, layer_name, irange, indices=None,
init_bias=0., nonlinearity=tensor.tanh,
weight_noise=False, **kwargs):
self._std_dev = kwargs.pop('noise_std_dev', .075)
self.rnn_friendly = True
self._scan_updates = OrderedDict()
self.__dict__.update(locals())
del self.self
super(Recurrent, self).__init__()
if not self.weight_noise:
self._std_dev = None
开发者ID:dwf,项目名称:pylearn2,代码行数:11,代码来源:rnn.py
示例7: get_lr_scalers
def get_lr_scalers(self):
"""
.. todo::
WRITEME
"""
rval = OrderedDict()
params = self.get_params()
for layer in self.hidden_layers + [self.visible_layer]:
contrib = layer.get_lr_scalers()
# No two layers can contend to scale a parameter
assert not any([key in rval for key in contrib])
# Don't try to scale anything that's not a parameter
assert all([key in params for key in contrib])
rval.update(contrib)
assert all([isinstance(val, float) for val in rval.values()])
return rval
开发者ID:HBadertscher,项目名称:pylearn2,代码行数:22,代码来源:dbm.py
示例8: __init__
def __init__(
self,
decrease_rate=0.5,
increase_rate=1.2,
min_rate=1e-6,
max_rate=50
):
assert increase_rate > 1.
assert decrease_rate < 1.
self.decrease_rate = sharedX(decrease_rate, 'decrease_rate')
self.increase_rate = sharedX(increase_rate, 'increase_rate')
self.min_rate = min_rate
self.max_rate = max_rate
self.zeros = OrderedDict()
开发者ID:nitbix,项目名称:pylearn2,代码行数:14,代码来源:learning_rule.py
示例9: __init__
def __init__(self, model):
self.training_succeeded = False
self.model = model
self.channels = OrderedDict()
self._num_batches_seen = 0
self._examples_seen = 0
self._epochs_seen = 0
self._datasets = []
self._iteration_mode = []
self._batch_size = []
self._num_batches = []
self._dirty = True
self._rng_seed = []
self.names_to_del = ['theano_function_mode']
self.t0 = time.time()
self.theano_function_mode = None
# Initialize self._nested_data_specs, self._data_specs_mapping,
# and self._flat_data_specs
self._build_data_specs()
开发者ID:MarCnu,项目名称:pylearn2,代码行数:20,代码来源:monitor.py
示例10: get_lr_scalers
def get_lr_scalers(self, model_idx=-1):
scaler = OrderedDict()
for model in self.models:
scaler.update(model.get_lr_scalers())
return scaler
开发者ID:ballasn,项目名称:facedet,代码行数:5,代码来源:cascade.py
示例11: DROP_RPROP
class DROP_RPROP(LearningRule):
def __init__(
self,
decrease_rate=0.5,
increase_rate=1.2,
min_rate=1e-6,
max_rate=50
):
assert increase_rate > 1.
assert decrease_rate < 1.
self.decrease_rate = sharedX(decrease_rate, 'decrease_rate')
self.increase_rate = sharedX(increase_rate, 'increase_rate')
self.min_rate = min_rate
self.max_rate = max_rate
self.zeros = OrderedDict()
def add_channels_to_monitor(self, monitor, monitoring_dataset):
monitor.add_channel(
'rprop_decrease_rate',
ipt=None,
val=self.decrease_rate,
dataset=monitoring_dataset,
data_specs=(NullSpace(), '')
)
monitor.add_channel(
'rprop_increase_rate',
ipt=None,
val=self.increase_rate,
dataset=monitoring_dataset,
data_specs=(NullSpace(), '')
)
for zero in self.zeros.values():
monitor.add_channel(
zero.name,
ipt=None,
val=T.sum(zero),
dataset=monitoring_dataset,
data_specs=(NullSpace(), '')
)
def get_updates(self, learning_rate, grads, lr_scalers=None,
global_error=None,masks=None):
updates = OrderedDict()
for param, grad in grads.iteritems():
# Create required shared variables
lr = lr_scalers.get(param, learning_rate.get_value())
delta = sharedX(
np.zeros_like(param.get_value()) + lr,
borrow=True
)
previous_grad = sharedX(
np.zeros_like(param.get_value()),
borrow=True
)
zeros = sharedX(
np.zeros_like(param.get_value()),
borrow=True
)
layer_name = re.sub('_W$','',param.name)
if re.match(r'.*_W$',param.name) and layer_name in masks:
mask = masks[layer_name]
masked_grad = T.gt(T.dot(mask.T,T.dot(mask,grad)),0.)
else:
masked_grad = 1. #T.ones_like(grad)
# Name variables according to the parameter name
if param.name is not None:
delta.name = 'delta_'+param.name
zeros.name = 'zeros_' + param.name
previous_grad.name = 'previous_grad_' + param.name
self.zeros[param] = zeros
temp = grad * previous_grad
delta_inc = T.switch(
T.neq(grad,0.),
T.clip(
T.switch(
T.eq(temp, 0.),
delta,
T.switch(
T.lt(temp, 0.),
delta*self.decrease_rate,
delta*self.increase_rate
)
),
self.min_rate,
self.max_rate
),
delta
)
previous_grad_inc = T.switch(
T.gt(masked_grad,0.),
T.switch(
T.gt(temp,0.),
grad,
#.........这里部分代码省略.........
开发者ID:nitbix,项目名称:pylearn2,代码行数:101,代码来源:learning_rule.py
示例12: RMSProp
class RMSProp(LearningRule):
"""
Implements the RMSProp learning rule.
The RMSProp learning rule is described by Hinton in `lecture 6
<http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf>`
of the Coursera Neural Networks for Machine Learning course.
In short, Hinton suggests "[the] magnitude of the gradient can be very
different for different weights and can change during learning. This
makes it hard to choose a global learning rate." RMSProp solves this
problem by "[dividing] the learning rate for a weight by a running
average of the magnitudes of recent gradients for that weight."
Parameters
----------
decay : float, optional
Decay constant similar to that used in AdaDelta and Momentum methods.
max_scaling: float, optional
Restrict the RMSProp gradient scaling coefficient to values
below `max_scaling`.
Notes
-----
An instance of this LearningRule should only be used with one
TrainingAlgorithm, and its get_updates method should be called
only once. This is required in order to make the monitoring
channels correctly report the moving averages.
"""
def __init__(self, decay=0.9, max_scaling=1e5):
assert 0. <= decay < 1.
assert max_scaling > 0
self.decay = sharedX(decay, 'decay')
self.epsilon = 1. / max_scaling
self.mean_square_grads = OrderedDict()
@wraps(LearningRule.add_channels_to_monitor)
def add_channels_to_monitor(self, monitor, monitoring_dataset):
"""
The channels added are the min, mean, and max of the
mean_square_grad of each parameter.
"""
channel_mapping = {
'_min': T.min,
'_max': T.max,
'_mean': T.mean
}
for mean_square_grad in self.mean_square_grads.values():
for suffix, op in channel_mapping.items():
monitor.add_channel(
name=(mean_square_grad.name + suffix),
ipt=None,
val=op(mean_square_grad),
data_specs=(NullSpace(), ''),
dataset=monitoring_dataset)
return
def get_updates(self, learning_rate, grads, lr_scalers=None):
"""
Provides the symbolic (theano) description of the updates needed to
perform this learning rule. See Notes for side-effects.
Parameters
----------
learning_rate : float
Learning rate coefficient.
grads : dict
A dictionary mapping from the model's parameters to their
gradients.
lr_scalers : dict
A dictionary mapping from the model's parameters to a learning
rate multiplier.
Returns
-------
updates : OrderdDict
A dictionary mapping from the old model parameters, to their new
values after a single iteration of the learning rule.
Notes
-----
This method has the side effect of storing the moving average
of the square gradient in `self.mean_square_grads`. This is
necessary in order for the monitoring channels to be able
to track the value of these moving averages.
Therefore, this method should only get called once for each
instance of RMSProp.
"""
updates = OrderedDict()
for param in grads:
# mean_squared_grad := E[g^2]_{t-1}
mean_square_grad = sharedX(param.get_value() * 0.)
if param.name is None:
#.........这里部分代码省略.........
开发者ID:nitbix,项目名称:pylearn2,代码行数:101,代码来源:learning_rule.py
示例13: Monitor
class Monitor(object):
"""
A class for monitoring Models while they are being trained.
A monitor object records the number of minibatches and number of
examples the model has trained, as well as any number of "channels"
that track quantities of interest (examples: the objective
function, measures of hidden unit activity, reconstruction error,
sum of squared second derivatives, average norm of the weight
vectors, etc.)
Parameters
----------
model : `pylearn2.models.model.Model`
Attributes
----------
on_channel_conflict : string
`error` : this is a behavior when there is conlfict
on creating a channel twice
`copy_history` : this is a behavior when creating a
new channel and transfering history of old_monitor
`overwrite` : this is a behavior when creating a
new channel without taking an account of old_monitor
"""
def __init__(self, model):
self.training_succeeded = False
self.model = model
self.channels = OrderedDict()
self._num_batches_seen = 0
self._examples_seen = 0
self._epochs_seen = 0
self._datasets = []
self._iteration_mode = []
self._batch_size = []
self._num_batches = []
self._dirty = True
self._rng_seed = []
self.names_to_del = ['theano_function_mode']
self.t0 = time.time()
self.theano_function_mode = None
self.on_channel_conflict = 'error'
# Initialize self._nested_data_specs, self._data_specs_mapping,
# and self._flat_data_specs
self._build_data_specs()
def _build_data_specs(self):
"""
Computes a nested data_specs for input and all channels
Also computes the mapping to flatten it. This function is
called from redo_theano.
"""
# Ask the model what it needs
m_space, m_source = self.model.get_monitoring_data_specs()
input_spaces = [m_space]
input_sources = [m_source]
for channel in self.channels.values():
space = channel.data_specs[0]
assert isinstance(space, Space)
input_spaces.append(space)
input_sources.append(channel.data_specs[1])
nested_space = CompositeSpace(input_spaces)
nested_source = tuple(input_sources)
self._nested_data_specs = (nested_space, nested_source)
self._data_specs_mapping = DataSpecsMapping(self._nested_data_specs)
flat_space = self._data_specs_mapping.flatten(nested_space,
return_tuple=True)
flat_source = self._data_specs_mapping.flatten(nested_source,
return_tuple=True)
self._flat_data_specs = (CompositeSpace(flat_space), flat_source)
def set_theano_function_mode(self, mode):
"""
.. todo::
WRITEME
Parameters
----------
mode : theano.compile.Mode
Theano functions for the monitoring channels will be
compiled and run using this mode.
"""
if self.theano_function_mode != mode:
self._dirty = True
self.theano_function_mode = mode
def add_dataset(self, dataset, mode='sequential', batch_size=None,
num_batches=None, seed=None):
"""
Determines the data used to calculate the values of each channel.
Parameters
----------
#.........这里部分代码省略.........
开发者ID:123fengye741,项目名称:pylearn2,代码行数:101,代码来源:monitor.py
示例14: Recurrent
class Recurrent(Layer):
"""
A recurrent neural network layer using the hyperbolic tangent
activation function, passing on all hidden states or a selection
of them to the next layer.
The hidden state is initialized to zeros.
Parameters
----------
dim : int
The number of elements in the hidden layer
layer_name : str
The name of the layer. All layers in an MLP must have a unique name.
irange : float
Initializes each weight randomly in U(-irange, irange)
irange : float
The input-to-hidden weight matrix is initialized with weights in
the uniform interval (-irange, irange). The hidden-to-hidden
matrix weights are sampled in the same manner, unless the argument
svd is set to True (see below).
indices : slice, list of integers or integer, optional
If specified this layer will return only the given hidden
states. If an integer is given, it will not return a
SequenceSpace. Otherwise, it will return a SequenceSpace of
fixed length. Note that a SequenceSpace of fixed length
can be flattened by using the FlattenerLayer.
Note: For now only [-1] is supported.
init_bias : float, optional
Set an initial bias to be added at each time step. Defaults to 0.
nonlinearity : theano.function, optional
weight_noise : bool, optional
Additive Gaussian noise applied to parameters
"""
def __init__(self, dim, layer_name, irange, indices=None,
init_bias=0., nonlinearity=tensor.tanh,
weight_noise=False, **kwargs):
self._std_dev = kwargs.pop('noise_std_dev', .075)
self.rnn_friendly = True
self._scan_updates = OrderedDict()
self.__dict__.update(locals())
del self.self
super(Recurrent, self).__init__()
if not self.weight_noise:
self._std_dev = None
@wraps(Layer.set_input_space)
def set_input_space(self, space):
if ((not isinstance(space, SequenceSpace) and
not isinstance(space, SequenceDataSpace)) or
not isinstance(space.space, VectorSpace)):
raise ValueError("Recurrent layer needs a SequenceSpace("
"VectorSpace) or SequenceDataSpace(VectorSpace)\
as input but received %s instead"
% (space))
self.input_space = space
if self.indices is not None:
if len(self.indices) > 1:
raise ValueError("Only indices = [-1] is supported right now")
self.output_space = CompositeSpace(
[VectorSpace(dim=self.dim) for _
in range(len(self.indices))]
)
else:
assert self.indices == [-1], "Only indices = [-1] works now"
self.output_space = VectorSpace(dim=self.dim)
else:
if isinstance(self.input_space, SequenceSpace):
self.output_space = SequenceSpace(VectorSpace(dim=self.dim))
elif isinstance(self.input_space, SequenceDataSpace):
self.output_space =\
SequenceDataSpace(VectorSpace(dim=self.dim))
# Initialize the parameters
rng = self.mlp.rng
if self.irange is None:
raise ValueError("Recurrent layer requires an irange value in "
"order to initialize its weight matrices")
input_dim = self.input_space.dim
# W is the input-to-hidden matrix
W = rng.uniform(-self.irange, self.irange, (input_dim, self.dim))
# U is the hidden-to-hidden transition matrix
U = rng.randn(self.dim, self.dim)
U, _ = scipy.linalg.qr(U)
# b is the bias
b = np.zeros((self.dim,))
self._params = [
sharedX(W, name=(self.layer_name + '_W')),
sharedX(U, name=(self.layer_name + '_U')),
sharedX(b + self.init_bias,
name=(self.layer_name + '_b'))
]
#.........这里部分代码省略.........
开发者ID:dwf,项目名称:pylearn2,代码行数:101,代码来源:rnn.py
示例15: __init__
def __init__(self, objective, params, inputs=None,
param_constrainers=None, max_iter=-1,
lr_scalers=None, verbose=0, tol=None,
init_alpha=None, min_init_alpha=1e-3,
reset_alpha=True, conjugate=False,
reset_conjugate=True, gradients=None,
gradient_updates=None, line_search_mode=None,
accumulate=False, theano_function_mode=None):
self.__dict__.update(locals())
del self.self
if line_search_mode is None:
if init_alpha is None:
init_alpha = (.001, .005, .01, .05, .1)
else:
assert line_search_mode == 'exhaustive'
if init_alpha is None:
init_alpha = (.5, 1.)
self.init_alpha = tuple([float(elem) for elem in init_alpha])
if inputs is None:
inputs = []
if param_constrainers is None:
param_constrainers = []
obj = objective
self.verbose = verbose
param_to_grad_sym = OrderedDict()
param_to_grad_shared = OrderedDict()
updates = OrderedDict()
if self.gradient_updates is not None:
updates.update(self.gradient_updates)
self.params = [param for param in params]
for param in params:
if self.gradients is not None and param in self.gradients:
g = self.gradients[param]
else:
g = grad(objective, param)
param_to_grad_sym[param] = g
if param.name is not None:
param_name = param.name
else:
param_name = 'anon_param'
grad_name = 'BatchGradientDescent.grad_' + param_name
grad_shared = sharedX(param.get_value() * 0., name=grad_name)
param_to_grad_shared[param] = grad_shared
updates[grad_shared] = g
self.param_to_grad_shared = param_to_grad_shared
if self.verbose:
logger.info('batch gradient class compiling gradient function')
t1 = time.time()
if self.accumulate:
self._compute_grad = Accumulator(inputs, updates=updates)
else:
self._compute_grad = function(
inputs,
updates=updates,
mode=self.theano_function_mode,
name='BatchGradientDescent._compute_grad')
if self.verbose:
t2 = time.time()
logger.info('done. Took {0}'.format(t2-t1))
if self.verbose:
logger.info('batch gradient class compiling objective function')
if self.accumulate:
self.obj = Accumulator(inputs, obj)
else:
self.obj = function(inputs, obj, mode=self.theano_function_mode,
name='BatchGradientDescent.obj')
if self.verbose:
logger.info('done')
self.param_to_cache = OrderedDict()
alpha = T.scalar(name='alpha')
alpha.tag.test_value = np.cast[alpha.dtype](.01)
cache_updates = OrderedDict()
goto_updates = OrderedDict()
for param in params:
if param.name is None:
param_name = 'anon_param'
else:
param_name = param.name
cache_name = 'BatchGradientDescent.param_to_cache[%s]' % param_name
self.param_to_cache[param] = sharedX(param.get_value(borrow=False),
name=cache_name)
cache_updates[self.param_to_cache[param]] = param
cached = self.param_to_cache[param]
g = self.param_to_grad_shared[param]
if lr_scalers is not None and param in lr_scalers:
#.........这里部分代码省略.........
开发者ID:123fengye741,项目名称:pylearn2,代码行数:101,代码来源:batch_gradient_descent.py
示例16: DRPROP
class DRPROP(LearningRule):
def __init__(
self,
decrease_rate=0.5,
increase_rate=1.2,
min_rate=1e-6,
max_rate=50,
switching_threshold=1e-6
):
assert increase_rate > 1.
assert decrease_rate < 1.
self.decrease_rate = sharedX(decrease_rate, 'decrease_rate')
self.increase_rate = sharedX(increase_rate, 'increase_rate')
self.min_rate = min_rate
self.max_rate = max_rate
self.switching_threshold = switching_threshold
self.epsilons = OrderedDict()
self.gt_epsilons = OrderedDict()
self.lt_epsilons = OrderedDict()
self.eq_epsilons = OrderedDict()
def add_channels_to_monitor(self, monitor, monitoring_dataset):
monitor.add_channel(
'rprop_decrease_rate',
ipt=None,
val=self.decrease_rate,
dataset=monitoring_dataset,
data_specs=(NullSpace(), '')
)
monitor.add_channel(
'rprop_increase_rate',
ipt=None,
val=self.increase_rate,
dataset=monitoring_dataset,
data_specs=(NullSpace(), '')
)
#for gt_epsilon in self.gt_epsilons.values():
# monitor.add_channel(
# gt_epsilon.name,
# ipt=None,
# val=T.sum(gt_epsilon),
# dataset=monitoring_dataset,
# data_specs=(NullSpace(), '')
# )
#for lt_epsilon in self.lt_epsilons.values():
# monitor.add_channel(
# lt_epsilon.name,
# ipt=None,
# val=T.sum(lt_epsilon),
# dataset=monitoring_dataset,
# data_specs=(NullSpace(), '')
# )
#for eq_epsilon in self.eq_epsilons.values():
# monitor.add_channel(
# eq_epsilon.name,
# ipt=None,
# val=T.sum(eq_epsilon),
# dataset=monitoring_dataset,
# data_specs=(NullSpace(), '')
# )
for epsilon in self.epsilons.values():
monitor.add_channel(
epsilon.name + '_sum',
ipt=None,
val=T.sum(epsilon),
dataset=monitoring_dataset,
data_specs=(NullSpace(), '')
)
monitor.add_channel(
epsilon.name + '_min',
ipt=None,
val=T.min(epsilon),
dataset=monitoring_dataset,
data_specs=(NullSpace(), '')
)
monitor.add_channel(
epsilon.name + '_max',
ipt=None,
val=T.max(epsilon),
dataset=monitoring_dataset,
data_specs=(NullSpace(), '')
)
def get_updates(self, learning_rate, grads, lr_scalers=None,
global_error=None,dropout_mask=None):
updates = OrderedDict()
for param, grad in grads.iteritems():
# Created required shared variables
lr = lr_scalers.get(param, learning_rate.get_value())
delta = sharedX(
np.zeros_like(param.get_value()) + lr,
borrow=True
)
previous_grad = sharedX(
np.zeros_like(param.get_value()),
borrow=True
)
epsilons = sharedX(
np.zeros_like(param.get_value()),
#.........这里部分代码省略.........
开发者ID:nitbix,项目名称:pylearn2,代码行数:101,代码来源:learning_rule.py
示例17: UpdateNormMonitorLearningRule
class UpdateNormMonitorLearningRule(LearningRule):
""" Wraps an existing pylearn2 learning rule and adds monitor channels
for the norms of the gradient based updates calculated during
learning.
"""
def __init__(self, base_learning_rule, decay=0.9):
self.base = base_learning_rule
# hack to allow MomentumAdjustor to access momentum value
if hasattr(self.base, 'momentum'):
self.momentum = self.base.momentum
self.decay = decay
self.mean_updates = OrderedDict()
def add_channels_to_monitor(self, monitor, monitoring_dataset):
channel_mapping = {
'_min': T.min,
'_max': T.max,
'_mean': T.mean
}
for mean_update in self.mean_updates.values():
if mean_update.ndim == 4:
# rank-4 tensor (assuming stack of rank-3 convolutional kernels)
knl_norm_vals = T.sqrt(T.sum(T.sqr(mean_update), axis=(1,2,3)))
for suffix, op in channel_mapping.items():
monitor.add_channel(
name=(mean_update.name + "_kernel_norm" + suffix),
ipt=None,
val=op(knl_norm_vals),
data_specs=(NullSpace(), ''),
dataset=monitoring_dataset)
elif mean_update.ndim == 3:
# rank-3 tensor (assuming stack of rank-2 conv layer biases)
knl_norm_vals = T.sqrt(T.sum(T.sqr(mean_update), axis=(1,2)))
for suffix, op in channel_mapping.items():
monitor.add_channel(
name=(mean_update.name + "_norm" + suffix),
ipt=None,
val=op(knl_norm_vals),
data_specs=(NullSpace(), ''),
dataset=monitoring_dataset)
elif mean_update.ndim == 2:
# rank-2 tensor (matrix)
col_norm_vals = T.sqrt(T.sum(T.sqr(mean_update), axis=0))
row_norm_vals = T.sqrt(T.sum(T.sqr(mean_update), axis=1))
mtx_norm_val = T.sqrt(T.sum(T.sqr(mean_update)))
for suffix, op in channel_mapping.items():
monitor.add_channel(
name=(mean_update.name + "_col_norm" + suffix),
ipt=None,
val=op(col_norm_vals),
data_specs=(NullSpace(), ''),
dataset=monitoring_dataset)
monitor.add_channel(
name=(mean_update.name + "_row_norm" + suffix),
ipt=None,
val=op(row_norm_vals),
data_specs=(NullSpace(), ''),
dataset=monitoring_dataset)
monitor.add_channel(
name=(mean_update.name + "_norm"),
ipt=None,
val=mtx_norm_val,
data_specs=(NullSpace(), ''),
dataset=monitoring_dataset)
elif mean_update.ndim == 1:
# rank-1 tensor (vector)
norm_val = T.sqrt(T.sum(T.sqr(mean_update), axis=0))
monitor.add_channel(
name=(mean_update.name + "_norm"),
ipt=None,
val=norm_val,
data_specs=(NullSpace(), ''),
dataset=monitoring_dataset)
elif mean_update.ndim == 0:
# rank-0 tensor (scalar)
monitor.add_channel(
name=(mean_update.name + "_norm"),
ipt=None,
val=mean_update,
data_specs=(NullSpace(), ''),
dataset=monitoring_dataset)
else:
# not sure which axes to sum over in this case
raise ValueError(
'Mean update {0} has unexpected number of dimensions {1} ({2})'
.format(mean_update, mean_update.ndim, mean_update.shape))
self.base.add_channels_to_monitor(monitor, monitoring_dataset)
return
def get_updates(self, learning_rate, grads, lr_scalers=None):
updates = self.base.get_updates(learning_rate, grads, lr_scalers)
for (param, grad) in six.iteritems(grads):
#.........这里部分代码省略.........
开发者ID:Neuroglycerin,项目名称:neukrill-net-tools,代码行数:101,代码来源:update_norm_monitor.py
注:本文中的pylearn2.compat.OrderedDict类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论