• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python validation.check_array函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.utils.validation.check_array函数的典型用法代码示例。如果您正苦于以下问题:Python check_array函数的具体用法?Python check_array怎么用?Python check_array使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了check_array函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: predict

    def predict(self, X, categorical=None):
        """Predict the closest cluster each sample in X belongs to.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            New data to predict.
        categorical : Indices of columns that contain categorical data

        Returns
        -------
        labels : array, shape [n_samples,]
            Index of the cluster each sample belongs to.
        """
        assert hasattr(self, '_enc_cluster_centroids'), "Model not yet fitted."

        if categorical is not None:
            assert isinstance(categorical, (int, list, tuple)), "The 'categorical' \
                argument needs to be an integer with the index of the categorical \
                column in your data, or a list or tuple of several of them, \
                but it is a {}.".format(type(categorical))

        X = pandas_to_numpy(X)
        Xnum, Xcat = _split_num_cat(X, categorical)
        Xnum, Xcat = check_array(Xnum), check_array(Xcat, dtype=None)
        Xcat, _ = encode_features(Xcat, enc_map=self._enc_map)
        return _labels_cost(Xnum, Xcat, self._enc_cluster_centroids,
                            self.num_dissim, self.cat_dissim, self.gamma)[0]
开发者ID:nicodv,项目名称:kmodes,代码行数:28,代码来源:kprototypes.py


示例2: pairwise_distances_no_broadcast

def pairwise_distances_no_broadcast(X, Y):
    """Utility function to calculate row-wise euclidean distance of two matrix.
    Different from pair-wise calculation, this function would not broadcast.

    For instance, X and Y are both (4,3) matrices, the function would return
    a distance vector with shape (4,), instead of (4,4).

    Parameters
    ----------
    X : array of shape (n_samples, n_features)
        First input samples

    Y : array of shape (n_samples, n_features)
        Second input samples

    Returns
    -------
    distance : array of shape (n_samples,)
        Row-wise euclidean distance of X and Y
    """
    X = check_array(X)
    Y = check_array(Y)

    if X.shape[0] != Y.shape[0] or X.shape[1] != Y.shape[1]:
        raise ValueError("pairwise_distances_no_broadcast function receive"
                         "matrix with different shapes {0} and {1}".format(
            X.shape, Y.shape))
    return _pairwise_distances_no_broadcast_helper(X, Y)
开发者ID:flaviassantos,项目名称:pyod,代码行数:28,代码来源:stat_models.py


示例3: _process_inputs

  def _process_inputs(self, X, constraints):

    self.X_ = X = check_array(X)

    # check to make sure that no two constrained vectors are identical
    a,b,c,d = constraints
    no_ident = vector_norm(X[a] - X[b]) > 1e-9
    a, b = a[no_ident], b[no_ident]
    no_ident = vector_norm(X[c] - X[d]) > 1e-9
    c, d = c[no_ident], d[no_ident]
    if len(a) == 0:
      raise ValueError('No non-trivial similarity constraints given for MMC.')
    if len(c) == 0:
      raise ValueError('No non-trivial dissimilarity constraints given for MMC.')

    # init metric
    if self.A0 is None:
      self.A_ = np.identity(X.shape[1])
      if not self.diagonal:
        # Don't know why division by 10... it's in the original code
        # and seems to affect the overall scale of the learned metric.
        self.A_ /= 10.0
    else:
      self.A_ = check_array(self.A0)

    return a,b,c,d
开发者ID:svecon,项目名称:metric-learn,代码行数:26,代码来源:mmc.py


示例4: check_array_with_weights

def check_array_with_weights(X, weights, **kwargs):
    """Utility to validate data and weights.

    This calls check_array on X and weights, making sure results match.
    """
    if weights is None:
        return check_array(X, **kwargs), weights

    # Always use copy=False for weights
    kwargs_weights = dict(kwargs)
    kwargs_weights.update(copy=False)
    weights = check_array(weights, **kwargs_weights)

    # Always use force_all_finite=False for X
    kwargs_X = dict(kwargs)
    kwargs_X.update(force_all_finite=False)
    X = check_array(X, **kwargs_X)

    # Make sure shapes match and missing data has weights=0
    if X.shape != weights.shape:
        raise ValueError("Shape of `X` and `weights` should match")

    Wzero = (weights == 0)
    X[Wzero] = 0

    if not np.all(np.isfinite(X)):
        raise ValueError("Input contains NaN or infinity without "
                         "a corresponding zero in `weights`.")
    return X, weights
开发者ID:jakevdp,项目名称:wpca,代码行数:29,代码来源:utils.py


示例5: log_loss

def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None):
    lb = LabelBinarizer()
    T = lb.fit_transform(y_true)
    if T.shape[1] == 1:
        T = np.append(1 - T, T, axis=1)

    # Clipping
    Y = np.clip(y_pred, eps, 1 - eps)

    # This happens in cases when elements in y_pred have type "str".
    if not isinstance(Y, np.ndarray):
        raise ValueError("y_pred should be an array of floats.")

    # If y_pred is of single dimension, assume y_true to be binary
    # and then check.
    if Y.ndim == 1:
        Y = Y[:, np.newaxis]
    if Y.shape[1] == 1:
        Y = np.append(1 - Y, Y, axis=1)

    # Check if dimensions are consistent.
    val.check_consistent_length(T, Y)
    T = val.check_array(T)
    Y = val.check_array(Y)
    print(T)
    print(Y)
    if T.shape[1] != Y.shape[1]:
        raise ValueError("y_true and y_pred have different number of classes "
                         "%d, %d" % (T.shape[1], Y.shape[1]))

    # Renormalize
    Y /= Y.sum(axis=1)[:, np.newaxis]
    loss = -(T * np.log(Y)).sum(axis=1)

    return _weighted_sum(loss, sample_weight, normalize)
开发者ID:dvn123,项目名称:MachineLearning,代码行数:35,代码来源:aux_functions.py


示例6: _impose_f_order

def _impose_f_order(X):
    """Helper Function"""
    # important to access flags instead of calling np.isfortran,
    # this catches corner cases.
    if X.flags.c_contiguous:
        return check_array(X.T, copy=False, order='F'), True
    else:
        return check_array(X, copy=False, order='F'), False
开发者ID:f00barin,项目名称:bilppattach,代码行数:8,代码来源:extmath.py


示例7: _prepare_inputs

 def _prepare_inputs(self, X, W):
   self.X_ = X = check_array(X)
   W = check_array(W, accept_sparse=True)
   # set up prior M
   if self.use_cov:
     self.M_ = pinvh(np.cov(X, rowvar = False))
   else:
     self.M_ = np.identity(X.shape[1])
   L = laplacian(W, normed=False)
   return X.T.dot(L.dot(X))
开发者ID:svecon,项目名称:metric-learn,代码行数:10,代码来源:sdml.py


示例8: fit

    def fit(self, X, y=None):
        """Don't trust the documentation of this module!

        Compute the mean and std to be used for later scaling.

        Parameters
        ----------
        X : array-like or CSR matrix with shape [n_samples, n_features]
            The data used to compute the mean and standard deviation
            used for later scaling along the features axis.
        """
        X = check_array(X, copy=self.copy, accept_sparse="csc",
                         ensure_2d=False)
        if warn_if_not_float(X, estimator=self):
            # Costly conversion, but otherwise the pipeline will break:
            # https://github.com/scikit-learn/scikit-learn/issues/1709
            X = X.astype(np.float32)
        if sparse.issparse(X):
            if self.center_sparse:
                means = []
                vars = []

                # This only works for csc matrices...
                for i in range(X.shape[1]):
                    if X.indptr[i] == X.indptr[i + 1]:
                        means.append(0)
                        vars.append(1)
                    else:
                        vars.append(
                            X.data[X.indptr[i]:X.indptr[i + 1]].var())
                        # If the variance is 0, set all occurences of this
                        # features to 1
                        means.append(
                            X.data[X.indptr[i]:X.indptr[i + 1]].mean())
                        if 0.0000001 >= vars[-1] >= -0.0000001:
                            means[-1] -= 1

                self.std_ = np.sqrt(np.array(vars))
                self.std_[np.array(vars) == 0.0] = 1.0
                self.mean_ = np.array(means)

                return self
            elif self.with_mean:
                raise ValueError(
                    "Cannot center sparse matrices: pass `with_mean=False` "
                    "instead. See docstring for motivation and alternatives.")
            else:
                self.mean_ = None

            if self.with_std:
                var = mean_variance_axis(X, axis=0)[1]
                self.std_ = np.sqrt(var)
                self.std_[var == 0.0] = 1.0
            else:
                self.std_ = None
            return self
        else:
            self.mean_, self.std_ = _mean_and_std(
                X, axis=0, with_mean=self.with_mean, with_std=self.with_std)
            return self
开发者ID:Ayaro,项目名称:auto-sklearn,代码行数:60,代码来源:StandardScaler.py


示例9: transform

    def transform(self, X, y=None, copy=None):
        """Perform standardization by centering and scaling

        Parameters
        ----------
        X : array-like with shape [n_samples, n_features]
            The data used to scale along the features axis.
        """
        check_is_fitted(self, 'std_')

        copy = copy if copy is not None else self.copy
        X = check_array(X, copy=copy, accept_sparse="csc", ensure_2d=False)
        if warn_if_not_float(X, estimator=self):
            X = X.astype(np.float)
        if sparse.issparse(X):
            if self.center_sparse:
                for i in range(X.shape[1]):
                    X.data[X.indptr[i]:X.indptr[i + 1]] -= self.mean_[i]

            elif self.with_mean:
                raise ValueError(
                    "Cannot center sparse matrices: pass `with_mean=False` "
                    "instead. See docstring for motivation and alternatives.")

            else:
                pass

            if self.std_ is not None:
                inplace_column_scale(X, 1 / self.std_)
        else:
            if self.with_mean:
                X -= self.mean_
            if self.with_std:
                X /= self.std_
        return X
开发者ID:Ayaro,项目名称:auto-sklearn,代码行数:35,代码来源:StandardScaler.py


示例10: dump_svmlight_file

def dump_svmlight_file(X, y, f, zero_based=True, comment=None, query_id=None):

    y = np.asarray(y)
    if y.ndim != 1:
        raise ValueError("expected y of shape (n_samples,), got %r"
                         % (y.shape,))

    Xval = check_array(X, accept_sparse='csr')
    if Xval.shape[0] != y.shape[0]:
        raise ValueError("X.shape[0] and y.shape[0] should be the same, got"
                         " %r and %r instead." % (Xval.shape[0], y.shape[0]))

    # We had some issues with CSR matrices with unsorted indices (e.g. #1501),
    # so sort them here, but first make sure we don't modify the user's X.
    # TODO We can do this cheaper; sorted_indices copies the whole matrix.
    if Xval is X and hasattr(Xval, "sorted_indices"):
        X = Xval.sorted_indices()
    else:
        X = Xval
        if hasattr(X, "sort_indices"):
            X.sort_indices()

    if query_id is not None:
        query_id = np.asarray(query_id)
        if query_id.shape[0] != y.shape[0]:
            raise ValueError("expected query_id of shape (n_samples,), got %r"
                             % (query_id.shape,))

    one_based = not zero_based

    if hasattr(f, "write"):
        _dump_svmlight(X, y, f, one_based, comment, query_id)
    else:
        with open(f, "wb") as f:
            _dump_svmlight(X, y, f, one_based, comment, query_id)
开发者ID:junjiek,项目名称:cmu-exp,代码行数:35,代码来源:svmIO.py


示例11: fit

    def fit(self, X, y=None):
        """Fit detector. y is optional for unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """
        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        self.detector_ = LocalOutlierFactor(n_neighbors=self.n_neighbors,
                                            algorithm=self.algorithm,
                                            leaf_size=self.leaf_size,
                                            metric=self.metric,
                                            p=self.p,
                                            metric_params=self.metric_params,
                                            contamination=self.contamination,
                                            n_jobs=self.n_jobs)
        self.detector_.fit(X=X, y=y)

        # Invert decision_scores_. Outliers comes with higher outlier scores
        self.decision_scores_ = invert_order(
            self.detector_.negative_outlier_factor_)
        self._process_decision_scores()
        return self
开发者ID:flaviassantos,项目名称:pyod,代码行数:30,代码来源:lof.py


示例12: ttest

def ttest(X, y):
    X = check_array(X, accept_sparse='csr')
    if np.any((X.data if issparse(X) else X) < 0):
        raise ValueError("Input X must be non-negative.")

    Y = MultiLabelBinarizer().fit_transform(y)
    if Y.shape[1] == 1:
        Y = np.append(1 - Y, Y, axis=1)
    negY = 1- Y
    labelNum = Y.shape[1]
#     sampleNum = Y.shape[0]
    featureNum = X.shape[1]
    t = []
    prob = []
    for i in range(featureNum):
        values = X[:,i].T.toarray()[0]
        ti = 0
        probi = 0
        for j in range(labelNum):
            observed = values * Y[:,j]
            notObserved = values * negY[:,j]
            (res0, res1) = scipy.stats.ttest_ind(observed, notObserved)
            ti = ti + res0
            probi = probi + res1
        t.append(ti)
        prob.append(probi)
    t = np.asarray(t)
    prob = np.asarray(prob)
    return t, prob
开发者ID:junjiek,项目名称:cmu-exp,代码行数:29,代码来源:ttest_multilabel.py


示例13: fit

    def fit(self, X, y=None):
        """Fit the model with ``X``.
        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training data, where n_samples in the number of samples
            and n_features is the number of features.
        Returns
        -------
        self : object
            Returns the instance itself.
        """

        X = check_array(X, dtype=np.float)
        L, S, (U, s, Vt), self.n_iter_ = rpca(X, self.lam, self.mu,
                                              self.max_iter, self.eps_primal,
                                              self.eps_dual, self.rho,
                                              self.initial_sv, self.max_mu,
                                              self.verbose)
        self.low_rank_ = L
        r = np.count_nonzero(s)
        self.n_components_ = r
        self.components_ = Vt[:r]

        return self
开发者ID:Alan215,项目名称:advanced_training,代码行数:25,代码来源:robust_pca.py


示例14: predict_proba

    def predict_proba(self, X):
        """Predict probability for each possible outcome.

        Compute the probability estimates for each single sample in X
        and each possible outcome seen during training (categorical
        distribution).

        Parameters
        ----------
        X : array_like, shape = [n_samples, n_features]

        Returns
        -------
        probabilities : array, shape = [n_samples, n_classes]
            Normalized probability distributions across
            class labels
        """
        check_is_fitted(self, 'X_')

        X_2d = check_array(X, accept_sparse = ['csc', 'csr', 'coo', 'dok',
                        'bsr', 'lil', 'dia'])
        weight_matrices = self._get_kernel(self.X_, X_2d)
        if self.kernel == 'knn':
            probabilities = []
            for weight_matrix in weight_matrices:
                ine = np.sum(self.label_distributions_[weight_matrix], axis=0)
                probabilities.append(ine)
            probabilities = np.array(probabilities)
        else:
            weight_matrices = weight_matrices.T
            probabilities = np.dot(weight_matrices, self.label_distributions_)
        normalizer = np.atleast_2d(np.sum(probabilities, axis=1)).T
        probabilities /= normalizer
        return probabilities
开发者ID:musically-ut,项目名称:semi_supervised,代码行数:34,代码来源:label_propagation.py


示例15: predict

    def predict(self, X):
        """Predict class for X.

        Parameters
        ----------
        X : Array-like of shape [n_samples, n_features]
            The input to classify.

        Returns
        -------
        y : array of shape = [n_samples]
            The predicted classes.
        """

        X = check_array(X)

        if self.trees_ is None:
            raise Exception("Pattern trees not initialized. Perform a fit first.")

        y_classes = np.zeros((X.shape[0], len(self.classes_)))
        for i, c in enumerate(self.classes_):
            y_classes[:, i] = self.trees_[i](X)

        # predict the maximum value
        return self.classes_.take(np.argmax(y_classes, -1))
开发者ID:sorend,项目名称:fylearn,代码行数:25,代码来源:fpt.py


示例16: fit

    def fit(self, X, y):
        X = check_array(X)

        random_state = check_random_state(self.random_state)

        self.classes_, y_reverse = np.unique(y, return_inverse=True)

        if np.nan in self.classes_:
            raise ValueError("NaN class not supported.")

        # build models
        models = {}
        for c_idx, c_value in enumerate(self.classes_):
            X_class = X[y == c_value]
            a_sample_size = min(len(X_class), self.sample_size)
            c_models = []
            for i in range(self.n_models):
                # resample
                X_sample = X_class[random_state.choice(len(X_class), a_sample_size)]
                c_models.append(self.build_for_class(random_state, X_sample))
            models[c_value] = np.array(c_models)

        weights = self.fit_weights(random_state, models, X, y_reverse)

        self.models_ = models
        self.weights_ = weights

        return self
开发者ID:sorend,项目名称:fylearn,代码行数:28,代码来源:garules.py


示例17: transform

    def transform(self, X):
        """ A reference implementation of a transform function.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        X_transformed : array of int of shape = [n_samples, n_features]
            The array containing the element-wise square roots of the values
            in `X`
        """
        # Check is fit had been called
        check_is_fitted(self, ['input_shape_'])

        # Input validation
        X = check_array(X)

        # Check that the input is of the same shape as the one passed
        # during fit.
        if X.shape != self.input_shape_:
            raise ValueError('Shape of input is different from what was seen'
                             'in `fit`')
        return np.sqrt(X)
开发者ID:I2Cvb,项目名称:simblefaron,代码行数:26,代码来源:simblefaron.py


示例18: fit

    def fit(self, X, y=None):
        """Fit detector. y is optional for unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """
        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)
        n_samples, n_features = X.shape

        # check parameters
        # number of clusters are default to 8
        self._validate_estimator(default=MiniBatchKMeans(
            n_clusters=self.n_clusters,
            random_state=self.random_state))

        self.clustering_estimator_.fit(X=X, y=y)
        # Get the labels of the clustering results
        # labels_ is consistent across sklearn clustering algorithms
        self.cluster_labels_ = self.clustering_estimator_.labels_
        self.cluster_sizes_ = np.bincount(self.cluster_labels_)
        self._set_cluster_centers(X, n_features)
        self._set_small_large_clusters(n_samples)

        self.decision_scores_ = self._decision_function(X,
                                                        self.cluster_labels_)

        self._process_decision_scores()
        return self
开发者ID:flaviassantos,项目名称:pyod,代码行数:35,代码来源:cblof.py


示例19: fit_transform

    def fit_transform(self, X, y=None):
        """Fit the model with X and apply the dimensionality reduction on X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            New data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        X_new : array-like, shape (n_samples, n_components)
        """
        X = check_array(X)
        if self.n_components is None:
            n_components = X.shape[1]
        else:
            n_components = self.n_components

        self.mean_ = X.mean(0)
        U, s, VT = np.linalg.svd(X - self.mean_)
        self.components_ = VT[:n_components]
        var = s ** 2 / X.shape[0]
        self.explained_variance_ = var[:self.n_components]
        self.explained_variance_ratio_ = var[:n_components] / var.sum()
        return s[:n_components] * U[:, :n_components]
开发者ID:jakevdp,项目名称:wpca,代码行数:26,代码来源:pca.py


示例20: predict_proba

    def predict_proba(self, X):
        """
        Predict the membership probabilities for the data samples
        in X using trained model.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.

        Returns
        -------
        proba : array, shape (n_samples, n_clusters)
        """
        X = check_array(X, copy=False, order='C', dtype=sp.float64)
        K = self.score_samples(X)
        T = sp.empty_like(K)

        # Compute the Loglikelhood
        K *= (0.5)

        # Compute the posterior
        with sp.errstate(over='ignore'):
            for c in xrange(self.C):
                T[:, c] = 1 / sp.exp(K-K[:, c][:, sp.newaxis]).sum(axis=1)

        return T
开发者ID:mfauvel,项目名称:HDDA,代码行数:28,代码来源:hdda.py



注:本文中的sklearn.utils.validation.check_array函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python validation.check_arrays函数代码示例发布时间:2022-05-27
下一篇:
Python validation.check_X_y函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap