• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python utils.contains_nan函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pylearn2.utils.contains_nan函数的典型用法代码示例。如果您正苦于以下问题:Python contains_nan函数的具体用法?Python contains_nan怎么用?Python contains_nan使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了contains_nan函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: set_topological_view

    def set_topological_view(self, V, axes=('b', 0, 1, 'c')):
        """
        Sets the dataset to represent V, where V is a batch
        of topological views of examples.

        .. todo::

            Why is this parameter named 'V'?

        Parameters
        ----------
        V : ndarray
            An array containing a design matrix representation of
            training examples.
        axes : WRITEME
        """
        assert not contains_nan(V)
        rows = V.shape[axes.index(0)]
        cols = V.shape[axes.index(1)]
        channels = V.shape[axes.index('c')]
        self.view_converter = DefaultViewConverter([rows, cols, channels],
                                                   axes=axes)
        self.X = self.view_converter.topo_view_to_design_mat(V)
        # self.X_topo_space stores a "default" topological space that
        # will be used only when self.iterator is called without a
        # data_specs, and with "topo=True", which is deprecated.
        self.X_topo_space = self.view_converter.topo_space
        assert not contains_nan(self.X)

        # Update data specs
        X_space = VectorSpace(dim=self.X.shape[1])
        X_source = 'features'
        if self.y is None:
            space = X_space
            source = X_source
        else:
            if self.y.ndim == 1:
                dim = 1
            else:
                dim = self.y.shape[-1]
            # This is to support old pickled models
            if getattr(self, 'y_labels', None) is not None:
                y_space = IndexSpace(dim=dim, max_labels=self.y_labels)
            elif getattr(self, 'max_labels', None) is not None:
                y_space = IndexSpace(dim=dim, max_labels=self.max_labels)
            else:
                y_space = VectorSpace(dim=dim)
            y_source = 'targets'

            Latent_space = VectorSpace(dim=self.latent.shape[-1])
            Latent_source = 'latents'

            space = CompositeSpace((X_space, y_space,Latent_space))
            source = (X_source, y_source,Latent_source)

        self.data_specs = (space, source)
        self.X_space = X_space
        self._iter_data_specs = (X_space, X_source)
开发者ID:HiQiQi,项目名称:src,代码行数:58,代码来源:myDenseDesignMatrix.py


示例2: __init__

    def __init__(self, which_set, numOfClasses,
                 numOfExamplesPerClass, axes=('b', 0, 1, 'c')):
        self.height = 32
        self.width = 100
        self.axes = axes
        self.dtype = 'uint8'
        self.examples = []
        self.img_shape = (1, self.height, self.width)
        self.img_size = numpy.prod(self.img_shape)
        self.numOfClasses = numOfClasses
        self.numOfExamplesPerClass = numOfExamplesPerClass
        self.classes = []
        self.examplesPerClassCount = {}
        if which_set == "train":
            self.fileToLoadFrom = "annotation_train.txt"
        elif which_set == "test":
            self.fileToLoadFrom = "annotation_test.txt"
        elif which_set == "valid":
            self.fileToLoadFrom = "annotation_val.txt"
        else:
            raise ValueError("Set not recognized")
        self.datapath = "/media/tommaso/Lacie/mnt/ramdisk/max/90kDICT32px/"

        self.loadData()
        X = numpy.cast['float32'](self.x)

        view_converter = dense_design_matrix.DefaultViewConverter((self.height, self.width, 1),
                                                                   axes)

        super(MJSYNTH, self).__init__(X=X, y=self.y, view_converter=view_converter,
                                       y_labels=numOfClasses)

        assert not contains_nan(self.X)
开发者ID:feixuedudiao,项目名称:MachineLearning,代码行数:33,代码来源:mjsynth_bak.py


示例3: set_mri_topological_view

    def set_mri_topological_view(self, topo_view, mask=None,
                                 axes=("b", 0, 1, "c")):
        """
        Set the topological view.

        Parameters
        ----------
        topo_view: array-like
            Topological view of a matrix, 4D. Should be MRI 4D data.
        mask: array-like
            Mask for data.
        axes: tuple, optional
            Axis to use to set topological view.

        Returns
        -------
        design_matrix: array-like
            The corresponding design matrix for the topological view.
        """
        assert not contains_nan(topo_view)
        r, c, d = tuple(topo_view.shape[axes.index(i)] for i in (0, 1, "c"))

        self.view_converter = MRIViewConverterTransposed(
            (r, c, d), mask=mask, axes=axes)
        design_matrix = self.view_converter.topo_view_to_design_mat(topo_view)

        return design_matrix
开发者ID:ecastrow,项目名称:pl2mind,代码行数:27,代码来源:MRI.py


示例4: next

    def next(self):
        """
        Get the next subset of the dataset during dataset iteration.

        Converts index selections for batches to boolean selections that
        are supported by HDF5 datasets.
        """
        next_index = self._subset_iterator.next()

        # convert to boolean selection
        sel = np.zeros(self.num_examples, dtype=bool)
        sel[next_index] = True
        next_index = sel

        rval = []
        for data, fn in safe_izip(self._raw_data, self._convert):
            try:
                this_data = data[next_index]
            except TypeError:
                this_data = data[next_index, :]
            if fn:
                this_data = fn(this_data)
            assert not contains_nan(this_data)
            rval.append(this_data)
        rval = tuple(rval)
        if not self._return_tuple and len(rval) == 1:
            rval, = rval
        return rval
开发者ID:JesseLivezey,项目名称:pylearn2,代码行数:28,代码来源:hdf5.py


示例5: main_loop

 def main_loop(self):
     self.algorithm.setup(agent=self.agent, environment=self.environment)
     i = 0
     for param in self.agent.get_params():
         assert not contains_nan(param.get_value()), (i, param.name)
         assert not contains_inf(param.get_value()), (i, param.name)
     while True:
         rval = self.algorithm.train()
         assert rval is None
         i += 1
         for param in self.agent.get_params():
             assert not contains_nan(param.get_value()), (i, param.name)
             assert not contains_inf(param.get_value()), (i, param.name)
         if i % 1000 == 0:
             serial.save(self.save_path, self.agent)
             logger.info('saved!')
开发者ID:123fengye741,项目名称:pylearn2,代码行数:16,代码来源:simulator.py


示例6: __init__

    def __init__(self, which_set, label_type=None,
                 center=False, contrast_normalize=False, seed=132987):
        assert which_set in ['train', 'valid', 'test']
        assert label_type in [
            None, 'label', 'azimuth', 'rotation', 'texture_id']

        # load data
        fname = '${PYLEARN2_DATA_PATH}/mnistplus/mnistplus'
        if label_type == 'azimuth':
            fname += '_azi'
        if label_type == 'rotation':
            fname += '_rot'
            label_type = 'label'
        if label_type == 'texture_id':
            fname += '_tex'
            label_type = 'label'

        data = load(fname + '.pkl')

        # get images and cast to floatX
        data_x = np.cast[config.floatX](data['data'])
        data_x = data_x[MNISTPlus.idx[which_set]]

        if contrast_normalize:
            meanx = np.mean(data_x, axis=1)[:, None]
            stdx = np.std(data_x, axis=1)[:, None]
            data_x = (data_x - meanx) / stdx

        if center:
            data_x -= np.mean(data_x, axis=0)

        # get labels
        data_y = None
        if label_type is not None:
            data_y = data[label_type].reshape(-1, 1)

            # convert to float for performing regression
            if label_type == 'azimuth':
                data_y = np.cast[config.floatX](data_y / 360.)

            # retrieve only subset of data
            data_y = data_y[MNISTPlus.idx[which_set]]

        view_converter = dense_design_matrix.DefaultViewConverter((48, 48, 1))

        # init the super class
        if data_y is not None:
            super(MNISTPlus, self).__init__(
                X=data_x, y=data_y, y_labels=np.max(data_y) + 1,
                view_converter=view_converter
            )
        else:
            super(MNISTPlus, self).__init__(
                X=data_x,
                view_converter=view_converter
            )

        assert not contains_nan(self.X)
开发者ID:123fengye741,项目名称:pylearn2,代码行数:58,代码来源:mnistplus.py


示例7: __init__

    def __init__(self, which_set, one_hot=False, axes=['b', 0, 1, 'c']):
        """
        .. todo::

            WRITEME
        """
        self.args = locals()

        assert which_set in self.data_split.keys()

        path = serial.preprocess(
            "${PYLEARN2_DATA_PATH}/ocr_letters/letter.data")
        with open(path, 'r') as data_f:
            data = data_f.readlines()
            data = [line.split("\t") for line in data]

        data_x = [map(int, item[6:-1]) for item in data]
        data_letters = [item[1] for item in data]
        data_fold = [int(item[5]) for item in data]

        letters = list(numpy.unique(data_letters))
        data_y = [letters.index(item) for item in data_letters]

        if which_set == 'train':
            split = slice(0, self.data_split['train'])
        elif which_set == 'valid':
            split = slice(self.data_split['train'], self.data_split['train'] +
                          self.data_split['valid'])
        elif which_set == 'test':
            split = slice(self.data_split['train'] + self.data_split['valid'],
                          (self.data_split['train'] +
                           self.data_split['valid'] +
                           self.data_split['test']))

        data_x = numpy.asarray(data_x[split])
        data_y = numpy.asarray(data_y[split])
        data_fold = numpy.asarray(data_y[split])
        assert data_x.shape[0] == data_y.shape[0]
        assert data_x.shape[0] == self.data_split[which_set]

        self.one_hot = one_hot
        if one_hot:
            one_hot = numpy.zeros(
                (data_y.shape[0], len(letters)), dtype='float32')
            for i in xrange(data_y.shape[0]):
                one_hot[i, data_y[i]] = 1.
            data_y = one_hot

        view_converter = dense_design_matrix.DefaultViewConverter(
            (16, 8, 1), axes)
        super(OCR, self).__init__(
            X=data_x, y=data_y, view_converter=view_converter)

        assert not contains_nan(self.X)
        self.fold = data_fold
开发者ID:Deathmonster,项目名称:pylearn2,代码行数:55,代码来源:ocr.py


示例8: set_data

    def set_data(self, data, data_specs):
        """
        .. todo::

            WRITEME
        """
        # data is organized as data_specs
        # keep self.data_specs, and convert data
        data_specs[0].np_validate(data)
        assert not [contains_nan(X) for X in data]
        raise NotImplementedError()
开发者ID:AlexArgus,项目名称:pylearn2,代码行数:11,代码来源:vector_spaces_dataset.py


示例9: entropy_binary_vector

def entropy_binary_vector(P):
    """
    .. todo::

        WRITEME properly

    If P[i,j] represents the probability of some binary random variable X[i,j]
    being 1, then rval[i] gives the entropy of the random vector X[i,:]
    """

    for Pv in get_debug_values(P):
        assert Pv.min() >= 0.0
        assert Pv.max() <= 1.0

    oneMinusP = 1. - P

    PlogP = xlogx(P)
    omPlogOmP = xlogx(oneMinusP)

    term1 = - T.sum(PlogP, axis=1)
    assert len(term1.type.broadcastable) == 1

    term2 = - T.sum(omPlogOmP, axis=1)
    assert len(term2.type.broadcastable) == 1

    rval = term1 + term2

    debug_vals = get_debug_values(PlogP, omPlogOmP, term1, term2, rval)
    for plp, olo, t1, t2, rv in debug_vals:
        debug_assert(isfinite(plp))
        debug_assert(isfinite(olo))

        debug_assert(not contains_nan(t1))
        debug_assert(not contains_nan(t2))
        debug_assert(not contains_nan(rv))

    return rval
开发者ID:123fengye741,项目名称:pylearn2,代码行数:37,代码来源:information_theory.py


示例10: do_check_on

        def do_check_on(var, nd, f, is_input):
            """
            Checks `var` for NaNs / Infs. If detected, raises an exception
            and / or prints information about `nd`, `f`, and `is_input` to
            help the user determine the cause of the invalid values.

            Parameters
            ----------
            var : numpy.ndarray
                The value to be checked.
            nd : theano.gof.Apply
                The Apply node being executed
            f : callable
                The thunk for the apply node
            is_input : bool
                If True, `var` is an input to `nd`.
                If False, it is an output.
            """
            error = False
            if nan_is_error:
                if contains_nan(var):
                    logger.error('NaN detected')
                    error = True
            if inf_is_error:
                if contains_inf(var):
                    logger.error('Inf detected')
                    error = True
            if big_is_error:
                if np.abs(var).max() > 1e10:
                    logger.error('Big value detected')
                    error = True
            if error:
                if is_input:
                    logger.error('In an input')
                else:
                    logger.error('In an output')
                logger.error('Inputs: ')
                for ivar, ival in zip(nd.inputs, f.inputs):
                    logger.error('var')
                    logger.error(ivar)
                    logger.error(theano.printing.min_informative_str(ivar))
                    logger.error('val')
                    logger.error(ival)
                logger.error('Node:')
                logger.error(nd)
                assert False
开发者ID:AlexArgus,项目名称:pylearn2,代码行数:46,代码来源:nan_guard.py


示例11: next

    def next(self):
        """
        Get the next subset of the dataset during dataset iteration.

        Converts index selections for batches to boolean selections that
        are supported by HDF5 datasets.
        """
        next_index = self._subset_iterator.next()

        # convert to boolean selection
        sel = np.zeros(self.num_examples, dtype=bool)
        sel[next_index] = True
        next_index = sel

        rval = []
        for data, fn in safe_izip(self._raw_data, self._convert):
            try:
                this_data = data[next_index]
            except TypeError:
                # FB: Why this try..except is there? I think this is useless.
                # Do not hide the original if we can't fall back.
                # FV: This is triggered if the shape of next_index is
                # incompatible with the shape of the dataset. See for an
                # example test_hdf5_topo_view(), where where i
                # next.index.shape = (10,) and data is 'data': <HDF5
                # dataset "y": shape (10, 3), type "<f8">
                # I think it would be better to explicitly check if
                # next_index.shape is incompatible with data.shape, for
                # instance checking if next_index.ndim == data.ndim
                if data.ndim > 1:
                    this_data = data[next_index, :]
                else:
                    raise
            # Check if the dataset data is a vector and transform it into a
            # one-column matrix. This is needed to automatically convert the
            # shape of the data later (in the format_as method of the
            # Space.)
            if fn:
                this_data = fn(this_data)
            assert not contains_nan(this_data)
            rval.append(this_data)
        rval = tuple(rval)
        if not self._return_tuple and len(rval) == 1:
            rval, = rval
        return rval
开发者ID:123fengye741,项目名称:pylearn2,代码行数:45,代码来源:hdf5_deprecated.py


示例12: __init__


#.........这里部分代码省略.........
        self.axes = axes

        # we define here:
        dtype = 'uint8'
        ntrain = 50000
        nvalid = 0  # artefact, we won't use it
        ntest = 10000

        # we also expose the following details:
        self.img_shape = (3, 32, 32)
        self.img_size = N.prod(self.img_shape)
        self.n_classes = 10
        self.label_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
                            'dog', 'frog', 'horse', 'ship', 'truck']

        # prepare loading
        fnames = ['data_batch_%i' % i for i in range(1, 6)]
        lenx = N.ceil((ntrain + nvalid) / 10000.) * 10000
        x = N.zeros((lenx, self.img_size), dtype=dtype)
        y = N.zeros((lenx, 1), dtype=dtype)

        # load train data
        nloaded = 0
        for i, fname in enumerate(fnames):
            data = CIFAR10._unpickle(fname)
            x[i * 10000:(i + 1) * 10000, :] = data['data']
            y[i * 10000:(i + 1) * 10000, 0] = data['labels']
            nloaded += 10000
            if nloaded >= ntrain + nvalid + ntest:
                break

        # load test data
        data = CIFAR10._unpickle('test_batch')

        # process this data
        Xs = {'train': x[0:ntrain],
              'test': data['data'][0:ntest]}

        Ys = {'train': y[0:ntrain],
              'test': data['labels'][0:ntest]}

        X = N.cast['float32'](Xs[which_set])
        y = Ys[which_set]

        if isinstance(y, list):
            y = np.asarray(y).astype(dtype)

        if which_set == 'test':
            assert y.shape[0] == 10000
            y = y.reshape((y.shape[0], 1))

        if center:
            X -= 127.5
        self.center = center

        if rescale:
            X /= 127.5
        self.rescale = rescale

        if toronto_prepro:
            assert not center
            assert not gcn
            X = X / 255.
            if which_set == 'test':
                other = CIFAR10(which_set='train')
                oX = other.X
                oX /= 255.
                X = X - oX.mean(axis=0)
            else:
                X = X - X.mean(axis=0)
        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        if start is not None:
            # This needs to come after the prepro so that it doesn't
            # change the pixel means computed above for toronto_prepro
            assert start >= 0
            assert stop > start
            assert stop <= X.shape[0]
            X = X[start:stop, :]
            y = y[start:stop, :]
            assert X.shape[0] == y.shape[0]

        if which_set == 'test':
            assert X.shape[0] == 10000

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)

        super(CIFAR10, self).__init__(X=X, y=y, view_converter=view_converter,
                                      y_labels=self.n_classes)

        assert not contains_nan(self.X)

        if preprocessor:
            preprocessor.apply(self)
开发者ID:MarCnu,项目名称:pylearn2,代码行数:101,代码来源:cifar10.py


示例13: test_grad_h

    def test_grad_h(self):

        "tests that the gradients with respect to h_i are 0 after doing a mean field update of h_i "

        model = self.model
        e_step = self.e_step
        X = self.X

        assert X.shape[0] == self.m

        init_H = e_step.init_H_hat(V = X)
        init_Mu1 = e_step.init_S_hat(V = X)

        prev_setting = config.compute_test_value
        config.compute_test_value= 'off'
        H, Mu1 = function([], outputs=[init_H, init_Mu1])()
        config.compute_test_value = prev_setting

        H = broadcast(H, self.m)
        Mu1 = broadcast(Mu1, self.m)

        H = np.cast[config.floatX](self.model.rng.uniform(0.,1.,H.shape))
        Mu1 = np.cast[config.floatX](self.model.rng.uniform(-5.,5.,Mu1.shape))


        H_var = T.matrix(name='H_var')
        H_var.tag.test_value = H
        Mu1_var = T.matrix(name='Mu1_var')
        Mu1_var.tag.test_value = Mu1
        idx = T.iscalar()
        idx.tag.test_value = 0


        new_H = e_step.infer_H_hat(V = X, H_hat = H_var, S_hat = Mu1_var)
        h_idx = new_H[:,idx]

        updates_func = function([H_var,Mu1_var,idx], h_idx)

        sigma0 = 1. / model.alpha
        Sigma1 = e_step.infer_var_s1_hat()
        mu0 = T.zeros_like(model.mu)

        #by truncated KL, I mean that I am dropping terms that don't depend on H and Mu1
        # (they don't affect the outcome of this test and some of them are intractable )
        trunc_kl = - model.entropy_hs(H_hat = H_var, var_s0_hat = sigma0, var_s1_hat = Sigma1) + \
                     model.expected_energy_vhs(V = X, H_hat = H_var, S_hat = Mu1_var,  var_s0_hat = sigma0,
                             var_s1_hat = Sigma1)

        grad_H = T.grad(trunc_kl.sum(), H_var)

        assert len(grad_H.type.broadcastable) == 2

        #from theano.printing import min_informative_str
        #print min_informative_str(grad_H)

        #grad_H = Print('grad_H')(grad_H)

        #grad_H_idx = grad_H[:,idx]

        grad_func = function([H_var, Mu1_var], grad_H)

        failed = False

        for i in xrange(self.N):
            rval = updates_func(H, Mu1, i)
            H[:,i] = rval

            g = grad_func(H,Mu1)[:,i]

            assert not contains_nan(g)

            g_abs_max = np.abs(g).max()

            if g_abs_max > self.tol:
                #print "new values of H"
                #print H[:,i]
                #print "gradient on new values of H"
                #print g

                failed = True

                print('iteration ',i)
                #print 'max value of new H: ',H[:,i].max()
                #print 'H for failing g: '
                failing_h = H[np.abs(g) > self.tol, i]
                #print failing_h

                #from matplotlib import pyplot as plt
                #plt.scatter(H[:,i],g)
                #plt.show()

                #ignore failures extremely close to h=1

                high_mask = failing_h > .001
                low_mask = failing_h < .999

                mask = high_mask * low_mask

                print('masked failures: ',mask.shape[0],' err ',g_abs_max)

#.........这里部分代码省略.........
开发者ID:123fengye741,项目名称:pylearn2,代码行数:101,代码来源:test_s3c_inference.py


示例14: __init__

    def __init__(self, which_set, center=False, rescale=False, gcn=None,
                 start=None, stop=None, axes=('b', 0, 1, 'c'),
                 toronto_prepro = False, preprocessor = None):
        # note: there is no such thing as the cifar10 validation set;
        # pylearn1 defined one but really it should be user-configurable
        # (as it is here)

        self.axes = axes

        # we define here:
        dtype = 'uint8'
        ntrain = 50000
        nvalid = 0  # artefact, we won't use it
        ntest = 10000

        # we also expose the following details:
        self.img_shape = (3, 32, 32)
        self.img_size = numpy.prod(self.img_shape)
        self.n_classes = 100
        # make sure that this is working (we can also copy it from meta file)
        self.label_names = range(1900,2000)

        import cPickle
        fo = open('datasets/data_batch')
        dict = cPickle.load(fo)
        fo.close()
        
        lenx = numpy.ceil((ntrain + nvalid) / 10000.) * 10000
        x = numpy.zeros((lenx, self.img_size), dtype=dtype)
        y = numpy.zeros((lenx, 1), dtype=dtype)

        # load train data
        #data = serial.load(datasets[fname])
        x[0:8305,:] = dict['data']
        #x[i * 10000:(i + 1) * 10000, :] = dict['data']
        #y[i * 10000:(i + 1) * 10000, 0] = dict['labels']

        #X = dict['data']
        y[0:8305,0] = dict['labels']
        
        # load test data
        #_logger.info('loading file %s' % datasets['test_batch'])
        #data = serial.load(datasets['test_batch'])

        # process this data
        #Xs = {'train': x[0:ntrain],
        #      'test': data['data'][0:ntest]}

        #Ys = {'train': y[0:ntrain],
        #      'test': data['labels'][0:ntest]}

        X = numpy.cast['float32'](x[0:8305])
        y = y[0:8305]
#        y = Ys[which_set]



        if isinstance(y, list):
            y = numpy.asarray(y).astype(dtype)

        self.center = center

        self.rescale = rescale

        self.toronto_prepro = toronto_prepro

        self.gcn = gcn
        if gcn is not None:
            gcn = float(gcn)
            X = global_contrast_normalize(X, scale=gcn)

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)

        super(Timeliner, self).__init__(X=X, y=y, view_converter=view_converter,
                                      y_labels=self.n_classes)

        assert not contains_nan(self.X)

        if preprocessor:
            preprocessor.apply(self)
开发者ID:pauchan,项目名称:deepLearningMokuMoku,代码行数:81,代码来源:timeline_dataset.py


示例15: train_all

    def train_all(self, dataset, mu=None):
        """
        Process kmeans algorithm on the input to localize clusters.

        Parameters
        ----------
        dataset : WRITEME
        mu : WRITEME

        Returns
        -------
        rval : bool
            WRITEME
        """

        # TODO-- why does this sometimes return X and sometimes return nothing?

        X = dataset.get_design_matrix()

        n, m = X.shape
        k = self.k

        if milk is not None:
            # use the milk implementation of k-means if it's available
            cluster_ids, mu = milk.kmeans(X, k)
        else:
            # our own implementation

            # taking random inputs as initial clusters if user does not provide
            # them.
            if mu is not None:
                if not len(mu) == k:
                    raise Exception("You gave %i clusters"
                                    ", but k=%i were expected"
                                    % (len(mu), k))
            else:
                indices = numpy.random.randint(X.shape[0], size=k)
                mu = X[indices]

            try:
                dists = numpy.zeros((n, k))
            except MemoryError as e:
                improve_memory_error_message(e, "dying trying to allocate "
                                                "dists matrix for {0} "
                                                "examples and {1} "
                                                "means".format(n, k))

            old_kills = {}

            iter = 0
            mmd = prev_mmd = float('inf')
            while True:
                if self.verbose:
                    logger.info('kmeans iter {0}'.format(iter))

                # print 'iter:',iter,' conv crit:',abs(mmd-prev_mmd)
                # if numpy.sum(numpy.isnan(mu)) > 0:
                if contains_nan(mu):
                    logger.info('nan found')
                    return X

                # computing distances
                for i in xrange(k):
                    dists[:, i] = numpy.square((X - mu[i, :])).sum(axis=1)

                if iter > 0:
                    prev_mmd = mmd

                min_dists = dists.min(axis=1)

                # mean minimum distance:
                mmd = min_dists.mean()

                logger.info('cost: {0}'.format(mmd))

                if iter > 0 and (iter >= self.max_iter or
                                 abs(mmd - prev_mmd) < self.convergence_th):
                    # converged
                    break

                # finding minimum distances
                min_dist_inds = dists.argmin(axis=1)

                # computing means
                i = 0
                blacklist = []
                new_kills = {}
                while i < k:
                    b = min_dist_inds == i
                    if not numpy.any(b):
                        killed_on_prev_iter = True
                        # initializes empty cluster to be the mean of the d
                        # data points farthest from their corresponding means
                        if i in old_kills:
                            d = old_kills[i] - 1
                            if d == 0:
                                d = 50
                            new_kills[i] = d
                        else:
                            d = 5
#.........这里部分代码省略.........
开发者ID:AlexArgus,项目名称:pylearn2,代码行数:101,代码来源:kmeans.py


示例16: test_grad_s

    def test_grad_s(self):

        "tests that the gradients with respect to s_i are 0 after doing a mean field update of s_i "

        model = self.model
        e_step = self.e_step
        X = self.X

        assert X.shape[0] == self.m

        model.test_batch_size = X.shape[0]

        init_H = e_step.init_H_hat(V = X)
        init_Mu1 = e_step.init_S_hat(V = X)

        prev_setting = config.compute_test_value
        config.compute_test_value= 'off'
        H, Mu1 = function([], outputs=[init_H, init_Mu1])()
        config.compute_test_value = prev_setting

        H = broadcast(H, self.m)
        Mu1 = broadcast(Mu1, self.m)

        H = np.cast[config.floatX](self.model.rng.uniform(0.,1.,H.shape))
        Mu1 = np.cast[config.floatX](self.model.rng.uniform(-5.,5.,Mu1.shape))



        H_var = T.matrix(name='H_var')
        H_var.tag.test_value = H
        Mu1_var = T.matrix(name='Mu1_var')
        Mu1_var.tag.test_value = Mu1
        idx = T.iscalar()
        idx.tag.test_value = 0


        S = e_step.infer_S_hat(V = X, H_hat = H_var, S_hat = Mu1_var)

        s_idx = S[:,idx]

        s_i_func = function([H_var,Mu1_var,idx],s_idx)

        sigma0 = 1. / model.alpha
        Sigma1 = e_step.infer_var_s1_hat()
        mu0 = T.zeros_like(model.mu)

        #by truncated KL, I mean that I am dropping terms that don't depend on H and Mu1
        # (they don't affect the outcome of this test and some of them are intractable )
        trunc_kl = - model.entropy_hs(H_hat = H_var, var_s0_hat = sigma0, var_s1_hat = Sigma1) + \
                     model.expected_energy_vhs(V = X, H_hat = H_var, S_hat = Mu1_var, var_s0_hat = sigma0, var_s1_hat = Sigma1)

        grad_Mu1 = T.grad(trunc_kl.sum(), Mu1_var)

        grad_Mu1_idx = grad_Mu1[:,idx]

        grad_func = function([H_var, Mu1_var, idx], grad_Mu1_idx)

        for i in xrange(self.N):
            Mu1[:,i] = s_i_func(H, Mu1, i)

            g = grad_func(H,Mu1,i)

            assert not contains_nan(g)

            g_abs_max = np.abs(g).max()


            if g_abs_max > self.tol:
                raise Exception('after mean field step, gradient of kl divergence wrt mean field parameter should be 0, but here the max magnitude of a gradient element is '+str(g_abs_max)+' after updating s_'+str(i))
开发者ID:123fengye741,项目名称:pylearn2,代码行数:69,代码来源:test_s3c_inference.py


示例17: main


#.........这里部分代码省略.........
                    for j in xrange(i,len(sorted_codes)):
                        if sorted_codes[j] == rng[1]:
                            found = True
                            break

                    if not found:
                        print("Invalid code: "+rng[1])
                        quit(-1)

                    final_codes = final_codes.union(set(sorted_codes[i:j+1]))
                else:
                    #The current list element is just a single code
                    final_codes = final_codes.union(set([code]))
            # end for code in codes
        else:
            final_codes ,= set(codebook.keys())

        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
        styles = list(colors)
        styles += [color+'--' for color in colors]
        styles += [color+':' for color in colors]

        fig = plt.figure()
        ax = plt.subplot(1,1,1)

        # plot the requested channels
        for idx, code in enumerate(sorted(final_codes)):

            channel_name= codebook[code]
            channel = channels[channel_name]

            y = np.asarray(channel.val_record)

            if contains_nan(y):
                print(channel_name + ' contains NaNs')

            if contains_inf(y):
                print(channel_name + 'contains infinite values')

            if x_axis == 'example':
                x = np.asarray(channel.example_record)
            elif x_axis == 'batche':
                x = np.asarray(channel.batch_record)
            elif x_axis == 'epoch':
                try:
                    x = np.asarray(channel.epoch_record)
                except AttributeError:
                    # older saved monitors won't have epoch_record
                    x = np.arange(len(channel.batch_record))
            elif x_axis == 'second':
                x = np.asarray(channel.time_record)
            elif x_axis == 'hour':
                x = np.asarray(channel.time_record) / 3600.
            else:
                assert False


            ax.plot( x,
                      y,
                      styles[idx % len(styles)],
                      marker = '.', # add point margers to lines
                      label = channel_name)

        plt.xlabel('# '+x_axis+'s')
        ax.ticklabel_format( scilimits = (-3,3), axis = 'both')
开发者ID:123fengye741,项目名称:pylearn2,代码行数:66,代码来源:plot_monitor.py


示例18: __init__

    def __init__(self, which_set, center=False, example_range=None):
        """
        .. todo::

            WRITEME
        """
        if which_set == 'train':
            train = load('${PYLEARN2_DATA_PATH}/stl10/stl10_matlab/train.mat')

            # Load the class names
            self.class_names = [array[0].encode('utf-8')
                                for array in train['class_names'][0]]

            # Load the fold indices
            fold_indices = train['fold_indices']
            assert fold_indices.shape == (1, 10)
            self.fold_indices = np.zeros((10, 1000), dtype='uint16')
            for i in xrange(10):
                indices = fold_indices[0, i]
                assert indices.shape == (1000, 1)
                assert indices.dtype == 'uint16'
                self.fold_indices[i, :] = indices[:, 0]

            # The data is stored as uint8
            # If we leave it as uint8, it will cause the CAE to silently fail
            # since theano will treat derivatives wrt X as 0
            X = np.cast['float32'](train['X'])

            assert X.shape == (5000, 96 * 96 * 3)

            if example_range is not None:
                X = X[example_range[0]:example_range[1], :]

            # this is uint8
            y = train['y'][:, 0]
            assert y.shape == (5000,)
        elif which_set == 'test':
            test = load('${PYLEARN2_DATA_PATH}/stl10/stl10_matlab/test.mat')

            # Load the class names
            self.class_names = [array[0].encode('utf-8')
                                for array in test['class_names'][0]]

            # The data is stored as uint8
            # If we leave it as uint8, it will cause the CAE to silently fail
            # since theano will treat derivatives wrt X as 0

            X = np.cast['float32'](test['X'])
            assert X.shape == (8000, 96 * 96 * 3)

            if example_range is not None:
                X = X[example_range[0]:example_range[1], :]

            # this is uint8
            y = test['y'][:, 0]
            assert y.shape == (8000,)

        elif which_set == 'unlabeled':
            unlabeled = load('${PYLEARN2_DATA_PATH}/stl10/stl10_matlab/'
                             'unlabeled.mat')

            X = unlabeled['X']

            # this file is stored in HDF format, which transposes everything
            assert X.shape == (96 * 96 * 3, 100000)
            assert X.dtype == 'uint8'

            if example_range is None:
                X = X.value
            else:
                X = X.value[:, example_range[0]:example_range[1]]
            X = np.cast['float32'](X.T)

            unlabeled.close()

            y = None

        else:
            raise ValueError('"' + which_set + '" is not an STL10 dataset. '
                             'Recognized values are "train", "test", and '
                             '"unlabeled".')
        if center:
            X -= 127.5

        view_converter = dense_design_matrix.DefaultViewConverter((96, 96, 3))

        super(STL10, self).__init__(X=X, y=y, y_labels=10,
                                    view_converter=view_converter)

        for i in xrange(self.X.shape[0]):
            mat = X[i:i + 1, :]
            topo = self.get_topological_view(mat)
            for j in xrange(topo.shape[3]):
                temp = topo[0, :, :, j].T.copy()
                topo[0, :, :, j] = temp
            mat = self.get_design_matrix(topo)
            X[i:i + 1, :] = mat

        assert not contains_nan(self.X)
开发者ID:allansp84,项目名称:pylearn2,代码行数:99,代码来源:stl10.py


示例19: setup

    def setup(self, model, dataset):
        """
        Compiles the theano functions needed for the train method.

        Parameters
        ----------
        model : a Model instance
        dataset : Dataset
        """
        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [param for param in model.get_params()
                      if contains_inf(param.get_value())]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: "+str(inf_params))
        if any([contains_nan(param.get_value())
                for param in model.get_params()]):
            nan_params = [param for param in model.get_params()
                          if contains_nan(param.get_value())]
            raise ValueError("These params are NaN: "+str(nan_params))
        self.model = model

        self._synchronize_batch_size(model)
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        self.monitor._sanity_check()

        # test if force batch size and batch size
        has_force_batch_size = getattr(model, "force_batch_size", False)
        train_dataset_is_uneven = \
            dataset.get_num_examples() % self.batch_size != 0

        has_monitoring_datasets = \
            self.monitoring_dataset is not None and \
            self.monitoring_dataset.values() > 0

        if has_monitoring_datasets:
            monitoring_datasets_are_uneven = \
                any(d.get_num_examples() % self.batch_size
                    != 0 for d in self.monitoring_dataset.values())
        else:
            monitoring_datasets_are_uneven = False  # or True it doesn't matter

        if has_force_batch_size and train_dataset_is_uneven and \
           not has_uniform_batch_size(self.train_iteration_mode):

            raise ValueError(&quo 

鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python utils.function函数代码示例发布时间:2022-05-25
下一篇:
Python utils.as_floatX函数代码示例发布时间:2022-05-25
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap