本文整理汇总了Python中sklearn.utils.column_or_1d函数的典型用法代码示例。如果您正苦于以下问题:Python column_or_1d函数的具体用法?Python column_or_1d怎么用?Python column_or_1d使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了column_or_1d函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: check_consistent_shape
def check_consistent_shape(X_train, y_train, X_test, y_test, y_train_pred,
y_test_pred):
"""Internal shape to check input data shapes are consistent.
Parameters
----------
X_train : numpy array of shape (n_samples, n_features)
The training samples.
y_train : list or array of shape (n_samples,)
The ground truth of training samples.
X_test : numpy array of shape (n_samples, n_features)
The test samples.
y_test : list or array of shape (n_samples,)
The ground truth of test samples.
y_train_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the training samples.
y_test_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the test samples.
Returns
-------
X_train : numpy array of shape (n_samples, n_features)
The training samples.
y_train : list or array of shape (n_samples,)
The ground truth of training samples.
X_test : numpy array of shape (n_samples, n_features)
The test samples.
y_test : list or array of shape (n_samples,)
The ground truth of test samples.
y_train_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the training samples.
y_test_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the test samples.
"""
# check input data shapes are consistent
X_train, y_train = check_X_y(X_train, y_train)
X_test, y_test = check_X_y(X_test, y_test)
y_test_pred = column_or_1d(y_test_pred)
y_train_pred = column_or_1d(y_train_pred)
check_consistent_length(y_train, y_train_pred)
check_consistent_length(y_test, y_test_pred)
if X_train.shape[1] != X_test.shape[1]:
raise ValueError("X_train {0} and X_test {1} have different number "
"of features.".format(X_train.shape, X_test.shape))
return X_train, y_train, X_test, y_test, y_train_pred, y_test_pred
开发者ID:flaviassantos,项目名称:pyod,代码行数:60,代码来源:data.py
示例2: evaluate_print
def evaluate_print(clf_name, y, y_pred):
"""Utility function for evaluating and printing the results for examples.
Default metrics include ROC and Precision @ n
Parameters
----------
clf_name : str
The name of the detector.
y : list or numpy array of shape (n_samples,)
The ground truth. Binary (0: inliers, 1: outliers).
y_pred : list or numpy array of shape (n_samples,)
The raw outlier scores as returned by a fitted model.
"""
y = column_or_1d(y)
y_pred = column_or_1d(y_pred)
check_consistent_length(y, y_pred)
print('{clf_name} ROC:{roc}, precision @ rank n:{prn}'.format(
clf_name=clf_name,
roc=np.round(roc_auc_score(y, y_pred), decimals=4),
prn=np.round(precision_n_scores(y, y_pred), decimals=4)))
开发者ID:flaviassantos,项目名称:pyod,代码行数:25,代码来源:data.py
示例3: savings_score
def savings_score(y_true, y_pred, cost_mat):
#TODO: update description
"""Savings score.
This function calculates the savings cost of using y_pred on y_true with
cost-matrix cost-mat, as the difference of y_pred and the cost_loss of a naive
classification model.
Parameters
----------
y_true : array-like or label indicator matrix
Ground truth (correct) labels.
y_pred : array-like or label indicator matrix
Predicted labels, as returned by a classifier.
cost_mat : array-like of shape = [n_samples, 4]
Cost matrix of the classification problem
Where the columns represents the costs of: false positives, false negatives,
true positives and true negatives, for each example.
Returns
-------
score : float
Savings of a using y_pred on y_true with cost-matrix cost-mat
The best performance is 1.
References
----------
.. [1] A. Correa Bahnsen, A. Stojanovic, D.Aouada, B, Ottersten,
`"Improving Credit Card Fraud Detection with Calibrated Probabilities" <http://albahnsen.com/files/%20Improving%20Credit%20Card%20Fraud%20Detection%20by%20using%20Calibrated%20Probabilities%20-%20Publish.pdf>`__, in Proceedings of the fourteenth SIAM International Conference on Data Mining,
677-685, 2014.
See also
--------
cost_loss
Examples
--------
>>> import numpy as np
>>> from costcla.metrics import savings_score, cost_loss
>>> y_pred = [0, 1, 0, 0]
>>> y_true = [0, 1, 1, 0]
>>> cost_mat = np.array([[4, 1, 0, 0], [1, 3, 0, 0], [2, 3, 0, 0], [2, 1, 0, 0]])
>>> savings_score(y_true, y_pred, cost_mat)
0.5
"""
#TODO: Check consistency of cost_mat
y_true = column_or_1d(y_true)
y_pred = column_or_1d(y_pred)
n_samples = len(y_true)
# Calculate the cost of naive prediction
cost_base = min(cost_loss(y_true, np.zeros(n_samples), cost_mat),
cost_loss(y_true, np.ones(n_samples), cost_mat))
cost = cost_loss(y_true, y_pred, cost_mat)
return 1.0 - cost / cost_base
开发者ID:S7evenrg,项目名称:CostSensitiveClassification,代码行数:60,代码来源:costs.py
示例4: precision_n_scores
def precision_n_scores(y, y_pred, n=None):
"""Utility function to calculate precision @ rank n.
Parameters
----------
y : list or numpy array of shape (n_samples,)
The ground truth. Binary (0: inliers, 1: outliers).
y_pred : list or numpy array of shape (n_samples,)
The raw outlier scores as returned by a fitted model.
n : int, optional (default=None)
The number of outliers. if not defined, infer using ground truth.
Returns
-------
precision_at_rank_n : float
Precision at rank n score.
"""
# turn raw prediction decision scores into binary labels
y_pred = get_label_n(y, y_pred, n)
# enforce formats of y and labels_
y = column_or_1d(y)
y_pred = column_or_1d(y_pred)
return precision_score(y, y_pred)
开发者ID:flaviassantos,项目名称:pyod,代码行数:29,代码来源:utility.py
示例5: _sigmoid_calibration
def _sigmoid_calibration(self,df, y, sample_weight=None):
"""Probability Calibration with sigmoid method (Platt 2000)
Parameters
----------
df : ndarray, shape (n_samples,)
The decision function or predict proba for the samples.
y : ndarray, shape (n_samples,)
The targets.
sample_weight : array-like, shape = [n_samples] or None
Sample weights. If None, then samples are equally weighted.
Returns
-------
a : float
The slope.
b : float
The intercept.
References
----------
Platt, "Probabilistic Outputs for Support Vector Machines"
"""
df = column_or_1d(df)
y = column_or_1d(y)
F = df # F follows Platt's notations in the Reference Paper
tiny = np.finfo(np.float).tiny # to avoid division by 0 warning
# Bayesian priors (see Platt end of section 2.2 in the Reference Paper)
prior0 = float(np.sum(y <= 0))
prior1 = y.shape[0] - prior0
T = np.zeros(y.shape)
T[y > 0] = (prior1 + 1.) / (prior1 + 2.)
T[y <= 0] = 1. / (prior0 + 2.)
T1 = 1. - T
def objective(AB):
# From Platt (beginning of Section 2.2 in the Reference Paper)
E = np.exp(AB[0] * F + AB[1])
P = 1. / (1. + E)
l = -(T * np.log(P + tiny) + T1 * np.log(1. - P + tiny))
if sample_weight is not None:
return (sample_weight * l).sum()
else:
return l.sum()
def grad(AB):
# gradient of the objective function
E = np.exp(AB[0] * F + AB[1])
P = 1. / (1. + E)
TEP_minus_T1P = P * (T * E - T1)
if sample_weight is not None:
TEP_minus_T1P *= sample_weight
dA = np.dot(TEP_minus_T1P, F)
dB = np.sum(TEP_minus_T1P)
return np.array([dA, dB])
AB0 = np.array([0., math.log((prior0 + 1.) / (prior1 + 1.))])
AB_ = fmin_bfgs(objective, AB0, fprime=grad, disp=False)
return (AB_[0], AB_[1])
开发者ID:jyothivinjumur,项目名称:jcmProject,代码行数:58,代码来源:PlattCaliberation.py
示例6: _check_targets_hmc
def _check_targets_hmc(y_true, y_pred):
check_consistent_length(y_true, y_pred)
y_type = set([type_of_target(y_true), type_of_target(y_pred)])
if y_type == set(["binary", "multiclass"]):
y_type = set(["multiclass"])
if y_type != set(["multiclass"]):
raise ValueError("{0} is not supported".format(y_type))
y_true = column_or_1d(y_true)
y_pred = column_or_1d(y_pred)
return y_true, y_pred
开发者ID:davidwarshaw,项目名称:hmc,代码行数:10,代码来源:metrics.py
示例7: brier_score_loss
def brier_score_loss(y_true, y_prob):
"""Compute the Brier score
The smaller the Brier score, the better, hence the naming with "loss".
Across all items in a set N predictions, the Brier score measures the
mean squared difference between (1) the predicted probability assigned
to the possible outcomes for item i, and (2) the actual outcome.
Therefore, the lower the Brier score is for a set of predictions, the
better the predictions are calibrated. Note that the Brier score always
takes on a value between zero and one, since this is the largest
possible difference between a predicted probability (which must be
between zero and one) and the actual outcome (which can take on values
of only 0 and 1).
The Brier score is appropriate for binary and categorical outcomes that
can be structured as true or false, but is inappropriate for ordinal
variables which can take on three or more values (this is because the
Brier score assumes that all possible outcomes are equivalently
"distant" from one another).
Parameters
----------
y_true : array, shape (n_samples,)
True targets.
y_prob : array, shape (n_samples,)
Probabilities of the positive class.
Returns
-------
score : float
Brier score
Examples
--------
>>> import numpy as np
>>> from costcla.metrics import brier_score_loss
>>> y_true = [0, 1, 1, 0]
>>> y_prob = [0.1, 0.9, 0.8, 0.3]
>>> brier_score_loss(y_true, y_prob) # doctest: +ELLIPSIS
0.037...
>>> brier_score_loss(y_true, np.array(y_prob) > 0.5)
0.0
References
----------
http://en.wikipedia.org/wiki/Brier_score
"""
y_true = column_or_1d(y_true)
y_prob = column_or_1d(y_prob)
return np.mean((y_true - y_prob) ** 2)
开发者ID:S7evenrg,项目名称:CostSensitiveClassification,代码行数:52,代码来源:costs.py
示例8: _check_clf_targets
def _check_clf_targets(y_true, y_pred):
"""Check that y_true and y_pred belong to the same classification task
This converts multiclass or binary types to a common shape, and raises a
ValueError for a mix of multilabel and multiclass targets, a mix of
multilabel formats, for the presence of continuous-valued or multioutput
targets, or for targets of different lengths.
Column vectors are squeezed to 1d.
Parameters
----------
y_true : array-like,
y_pred : array-like
Returns
-------
type_true : one of {'multilabel-indicator', 'multilabel-sequences', \
'multiclass', 'binary'}
The type of the true target data, as output by
``utils.multiclass.type_of_target``
y_true : array or indicator matrix or sequence of sequences
y_pred : array or indicator matrix or sequence of sequences
"""
y_true, y_pred = check_arrays(y_true, y_pred, allow_lists=True)
type_true = type_of_target(y_true)
type_pred = type_of_target(y_pred)
y_type = set([type_true, type_pred])
if y_type == set(["binary", "multiclass"]):
y_type = set(["multiclass"])
if len(y_type) > 1:
raise ValueError("Can't handle mix of {0} and {1}" "".format(type_true, type_pred))
# We can't have more than one value on y_type => The set is no more needed
y_type = y_type.pop()
# No metrics support "multiclass-multioutput" format
if y_type not in ["binary", "multiclass", "multilabel-indicator", "multilabel-sequences"]:
raise ValueError("{0} is not supported".format(y_type))
if y_type in ["binary", "multiclass"]:
y_true = column_or_1d(y_true)
y_pred = column_or_1d(y_pred)
return y_type, y_true, y_pred
开发者ID:DjalelBBZ,项目名称:SOS14_practical_session,代码行数:51,代码来源:SOS_tools.py
示例9: average
def average(scores, estimator_weight=None):
"""Combination method to merge the outlier scores from multiple estimators
by taking the average.
Parameters
----------
scores : numpy array of shape (n_samples, n_estimators)
Score matrix from multiple estimators on the same samples.
estimator_weight : list of shape (1, n_estimators)
If specified, using weighted average
Returns
-------
combined_scores : numpy array of shape (n_samples, )
The combined outlier scores.
"""
scores = check_array(scores)
if estimator_weight is not None:
estimator_weight = column_or_1d(estimator_weight).reshape(1, -1)
assert_equal(scores.shape[1], estimator_weight.shape[1])
# (d1*w1 + d2*w2 + ...+ dn*wn)/(w1+w2+...+wn)
# generated weighted scores
scores = np.sum(np.multiply(scores, estimator_weight),
axis=1) / np.sum(
estimator_weight)
return scores.ravel()
else:
return np.mean(scores, axis=1).ravel()
开发者ID:flaviassantos,项目名称:pyod,代码行数:33,代码来源:combination.py
示例10: fit
def fit(self, T, y, sample_weight=None):
"""Fit using `T`, `y` as training data.
Parameters
----------
* `T` [array-like, shape=(n_samples,)]:
Training data.
* `y` [array-like, shape=(n_samples,)]:
Training target.
* `sample_weight` [array-like, shape=(n_samples,), optional]:
Weights. If set to `None`, all weights will be set to 1.
Returns
-------
* `self` [object]:
`self`.
"""
# Check input
T = column_or_1d(T)
# Fit
self.calibrator_ = _SigmoidCalibration()
self.calibrator_.fit(T, y, sample_weight=sample_weight)
return self
开发者ID:diana-hep,项目名称:carl,代码行数:27,代码来源:calibration.py
示例11: _validate_y
def _validate_y(self, y):
y = column_or_1d(y, warn=True)
check_classification_targets(y)
self.classes_, y = np.unique(y, return_inverse=True)
self.n_classes_ = len(self.classes_)
return y
开发者ID:naranil,项目名称:ensemble_comparison,代码行数:7,代码来源:class_switching.py
示例12: fit
def fit(self, X, y, sample_weight=None, check_input=True):
"""Fit Ridge regression model after searching for the best mu and tau.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training data
y : array-like, shape = [n_samples] or [n_samples, n_targets]
Target values
sample_weight : float or array-like of shape [n_samples]
Sample weight
Returns
-------
self : Returns self.
"""
self._label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1)
y = self._label_binarizer.fit_transform(y)
if self._label_binarizer.y_type_.startswith('multilabel'):
raise ValueError(
"%s doesn't support multi-label classification" % (
self.__class__.__name__))
else:
y = column_or_1d(y, warn=False)
param_grid = {'tau': self.taus, 'lamda': self.lamdas}
fit_params = {'sample_weight': sample_weight,
'check_input': check_input}
estimator = L1L2TwoStepClassifier(
mu=self.mu, fit_intercept=self.fit_intercept,
use_gpu=self.use_gpu, threshold=self.threshold,
normalize=self.normalize, precompute=self.precompute,
max_iter=self.max_iter,
copy_X=self.copy_X, tol=self.tol, warm_start=self.warm_start,
positive=self.positive,
random_state=self.random_state, selection=self.selection)
gs = GridSearchCV(
estimator=estimator,
param_grid=param_grid, fit_params=fit_params, cv=self.cv,
scoring=self.scoring, n_jobs=self.n_jobs, iid=self.iid,
refit=self.refit, verbose=self.verbose,
pre_dispatch=self.pre_dispatch, error_score=self.error_score,
return_train_score=self.return_train_score)
gs.fit(X, y)
estimator = gs.best_estimator_
self.tau_ = estimator.tau
self.lamda_ = estimator.lamda
self.coef_ = estimator.coef_
self.intercept_ = estimator.intercept_
self.best_estimator_ = estimator # XXX DEBUG
if self.classes_.shape[0] > 2:
ndim = self.classes_.shape[0]
else:
ndim = 1
self.coef_ = self.coef_.reshape(ndim, -1)
return self
开发者ID:slipguru,项目名称:l1l2py,代码行数:60,代码来源:classification.py
示例13: fit
def fit(self, X, y):
"""Fit the model to the data X and target y.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data, where n_samples in the number of samples
and n_features is the number of features.
y : numpy array of shape (n_samples)
Returns
-------
self
"""
y = column_or_1d(y, warn=True)
# needs a better way to check multi-label instances
if isinstance(np.reshape(y, (-1, 1))[0][0], list):
self.multi_label = True
else:
self.multi_label = False
self.classes_ = np.unique(y)
self._lbin = LabelBinarizer()
y = self._lbin.fit_transform(y)
super(MultilayerPerceptronClassifier, self).fit(X, y)
return self
开发者ID:ddofer,项目名称:NeuralNetworks,代码行数:30,代码来源:multilayer_perceptron.py
示例14: get_color_codes
def get_color_codes(y):
"""Internal function to generate color codes for inliers and outliers.
Inliers (0): blue; Outlier (1): red.
Parameters
----------
y : list or numpy array of shape (n_samples,)
The ground truth. Binary (0: inliers, 1: outliers).
Returns
-------
c : numpy array of shape (n_samples,)
Color codes.
"""
y = column_or_1d(y)
# inliers are assigned blue
c = np.full([len(y)], 'b', dtype=str)
outliers_ind = np.where(y == 1)
# outlier are assigned red
c[outliers_ind] = 'r'
return c
开发者ID:flaviassantos,项目名称:pyod,代码行数:25,代码来源:data.py
示例15: score_to_label
def score_to_label(pred_scores, outliers_fraction=0.1):
"""Turn raw outlier outlier scores to binary labels (0 or 1).
Parameters
----------
pred_scores : list or numpy array of shape (n_samples,)
Raw outlier scores. Outliers are assumed have larger values.
outliers_fraction : float in (0,1)
Percentage of outliers.
Returns
-------
outlier_labels : numpy array of shape (n_samples,)
For each observation, tells whether or not
it should be considered as an outlier according to the
fitted model. Return the outlier probability, ranging
in [0,1].
"""
# check input values
pred_scores = column_or_1d(pred_scores)
check_parameter(outliers_fraction, 0, 1)
threshold = scoreatpercentile(pred_scores, 100 * (1 - outliers_fraction))
pred_labels = (pred_scores > threshold).astype('int')
return pred_labels
开发者ID:flaviassantos,项目名称:pyod,代码行数:26,代码来源:utility.py
示例16: get_label_n
def get_label_n(y, y_pred, n=None):
"""Function to turn raw outlier scores into binary labels by assign 1
to top n outlier scores.
Parameters
----------
y : list or numpy array of shape (n_samples,)
The ground truth. Binary (0: inliers, 1: outliers).
y_pred : list or numpy array of shape (n_samples,)
The raw outlier scores as returned by a fitted model.
n : int, optional (default=None)
The number of outliers. if not defined, infer using ground truth.
Returns
-------
labels : numpy array of shape (n_samples,)
binary labels 0: normal points and 1: outliers
Examples
--------
>>> from pyod.utils.utility import get_label_n
>>> y = [0, 1, 1, 0, 0, 0]
>>> y_pred = [0.1, 0.5, 0.3, 0.2, 0.7]
>>> get_label_n(y, y_pred)
>>> [0, 1, 0, 0, 1]
"""
# enforce formats of inputs
y = column_or_1d(y)
y_pred = column_or_1d(y_pred)
check_consistent_length(y, y_pred)
y_len = len(y) # the length of targets
# calculate the percentage of outliers
if n is not None:
outliers_fraction = n / y_len
else:
outliers_fraction = np.count_nonzero(y) / y_len
threshold = scoreatpercentile(y_pred, 100 * (1 - outliers_fraction))
y_pred = (y_pred > threshold).astype('int')
return y_pred
开发者ID:flaviassantos,项目名称:pyod,代码行数:47,代码来源:utility.py
示例17: _validate_y
def _validate_y(self, y):
y = column_or_1d(y, warn=True)
check_classification_targets(y)
self.classes_, y = np.unique(y, return_inverse=True)
n_classes = len(self.classes_)
if n_classes > 2:
raise ValueError("It's a binary classification algorithm. Use a dataset with only 2 classes to predict.")
return y
开发者ID:naranil,项目名称:ensemble_comparison,代码行数:10,代码来源:logitboost.py
示例18: partial_fit
def partial_fit(self, X, y, classes=None):
"""Fit the model to the data X and target y.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Training data, where n_samples in the number of samples
and n_features is the number of features.
classes : array, shape (n_classes)
Classes across all calls to partial_fit.
Can be obtained by via `np.unique(y_all)`, where y_all is the
target vector of the entire dataset.
This argument is required for the first call to partial_fit
and can be omitted in the subsequent calls.
Note that y doesn't need to contain all labels in `classes`.
y : numpy array of shape (n_samples)
Subset of the target values.
Returns
-------
self
"""
if self.algorithm != 'sgd':
raise ValueError("only SGD algorithm"
" supports partial fit")
if self.classes_ is None and classes is None:
raise ValueError("classes must be passed on the first call "
"to partial_fit.")
elif self.classes_ is not None and classes is not None:
if np.any(self.classes_ != np.unique(classes)):
raise ValueError("`classes` is not the same as on last call "
"to partial_fit.")
elif classes is not None:
self.classes_ = classes
if not hasattr(self, '_lbin'):
self._lbin = LabelBinarizer()
self._lbin._classes = classes
y = column_or_1d(y, warn=True)
# needs a better way to check multi-label instances
if isinstance(np.reshape(y, (-1, 1))[0][0], list):
self.multi_label = True
else:
self.multi_label = False
y = self._lbin.fit_transform(y)
super(MultilayerPerceptronClassifier, self).partial_fit(X, y)
return self
开发者ID:ddofer,项目名称:NeuralNetworks,代码行数:54,代码来源:multilayer_perceptron.py
示例19: fit_all
def fit_all(self, X, y, n_shop, last_obs_plan):
# if not warmstart - clear the estimator state
if not self.warm_start:
self._clear_state()
# Check input
X, = check_arrays(X, dtype=DTYPE, sparse_format="dense")
y = column_or_1d(y, warn=True)
n_samples, n_features = X.shape
self.n_features = n_features
random_state = check_random_state(self.random_state)
self._check_params()
if not self._is_initialized():
if self.verbose:
print 'Initializing gradient boosting...'
# init state
self._init_state()
# fit initial model
if not self.fix_history:
idx = get_truncated_shopping_indices(n_shop)
else:
idx = np.arange(len(n_shop))
# init predictions by averaging over the shopping histories
y_pred = self.init_.predict(last_obs_plan[idx])
print 'First training accuracy:', accuracy_score(y, y_pred.argmax(axis=1))
begin_at_stage = 0
else:
# add more estimators to fitted model
# invariant: warm_start = True
if self.n_estimators < self.estimators_.shape[0]:
raise ValueError('n_estimators=%d must be larger or equal to '
'estimators_.shape[0]=%d when '
'warm_start==True'
% (self.n_estimators,
self.estimators_.shape[0]))
begin_at_stage = self.estimators_.shape[0]
y_pred = self.decision_function(X)
self._resize_state()
# fit the boosting stages
n_stages = self._fit_stages(X, y, y_pred, random_state, begin_at_stage, n_shop)
# change shape of arrays after fit (early-stopping or additional tests)
if n_stages != self.estimators_.shape[0]:
self.estimators_ = self.estimators_[:n_stages]
self.train_score_ = self.train_score_[:n_stages]
if hasattr(self, 'oob_improvement_'):
self.oob_improvement_ = self.oob_improvement_[:n_stages]
if hasattr(self, '_oob_score_'):
self._oob_score_ = self._oob_score_[:n_stages]
return self
开发者ID:brandonckelly,项目名称:allstate,代码行数:54,代码来源:boosted_truncated_history.py
示例20: fit
def fit(self, X, y):
"""Finds the intervals of interest from the input data.
Parameters
----------
X : The array containing features to be discretized. Continuous
features should be specified by the `continuous_features`
attribute if `X` is a 2-D array.
y : A list or array of class labels corresponding to `X`.
"""
self.dimensions_ = len(X.shape)
if self.dimensions_ > 2:
raise ValueError("Invalid input dimension for `X`. Input shape is"
"{0}".format(X.shape))
X = check_array(X, force_all_finite=True, ensure_2d=False)
y = column_or_1d(y)
y = check_array(y, ensure_2d=False, dtype=int)
X, y = check_X_y(X, y)
if not self.shuffle:
import warnings
warnings.warn("Shuffle parameter will be removed in the future.",
DeprecationWarning)
else:
state = check_random_state(self.random_state)
perm = state.permutation(len(y))
X = X[perm]
y = y[perm]
if self.dimensions_ == 2:
if self.continuous_features_ is None:
self.continuous_features_ = np.arange(X.shape[1])
self.cut_points_ = dict()
for index, col in enumerate(X.T):
if index not in self.continuous_features_:
continue
cut_points = MDLPDiscretize(col, y, self.min_depth)
self.cut_points_[index] = cut_points
else:
if self.continuous_features_ is not None:
raise ValueError("Passed in a 1-d column of continuous features, "
"but continuous_features is not None")
self.continuous_features_ = None
cut_points = MDLPDiscretize(X, y, self.min_depth)
self.cut_points_ = cut_points
return self
开发者ID:hlin117,项目名称:mdlp-discretization,代码行数:53,代码来源:discretization.py
注:本文中的sklearn.utils.column_or_1d函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论