本文整理汇总了Python中sklearn.utils.check_X_y函数的典型用法代码示例。如果您正苦于以下问题:Python check_X_y函数的具体用法?Python check_X_y怎么用?Python check_X_y使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了check_X_y函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: fit_transform
def fit_transform(self,X,y=None):
"""
Fit an sklearn classifier to data
Parameters
----------
X : pandas dataframe or array-like
training samples
y : array like, required for array-like X and not used presently for pandas dataframe
class labels
Returns
-------
self: object
"""
if isinstance(X,pd.DataFrame):
df = X
(X,y,self.vectorizer) = self.convert_numpy(df)
else:
check_X_y(X,y)
self.clf.fit(X,y)
return self
开发者ID:smsahu,项目名称:seldon-server,代码行数:25,代码来源:anomaly_wrapper.py
示例2: check_consistent_shape
def check_consistent_shape(X_train, y_train, X_test, y_test, y_train_pred,
y_test_pred):
"""Internal shape to check input data shapes are consistent.
Parameters
----------
X_train : numpy array of shape (n_samples, n_features)
The training samples.
y_train : list or array of shape (n_samples,)
The ground truth of training samples.
X_test : numpy array of shape (n_samples, n_features)
The test samples.
y_test : list or array of shape (n_samples,)
The ground truth of test samples.
y_train_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the training samples.
y_test_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the test samples.
Returns
-------
X_train : numpy array of shape (n_samples, n_features)
The training samples.
y_train : list or array of shape (n_samples,)
The ground truth of training samples.
X_test : numpy array of shape (n_samples, n_features)
The test samples.
y_test : list or array of shape (n_samples,)
The ground truth of test samples.
y_train_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the training samples.
y_test_pred : numpy array of shape (n_samples, n_features)
The predicted binary labels of the test samples.
"""
# check input data shapes are consistent
X_train, y_train = check_X_y(X_train, y_train)
X_test, y_test = check_X_y(X_test, y_test)
y_test_pred = column_or_1d(y_test_pred)
y_train_pred = column_or_1d(y_train_pred)
check_consistent_length(y_train, y_train_pred)
check_consistent_length(y_test, y_test_pred)
if X_train.shape[1] != X_test.shape[1]:
raise ValueError("X_train {0} and X_test {1} have different number "
"of features.".format(X_train.shape, X_test.shape))
return X_train, y_train, X_test, y_test, y_train_pred, y_test_pred
开发者ID:flaviassantos,项目名称:pyod,代码行数:60,代码来源:data.py
示例3: fit
def fit(self,X,y=None):
"""Fit a model:
Parameters
----------
X : pandas dataframe or array-like
training samples. If pandas dataframe can handle dict of feature in one column or cnvert a set of columns
y : array like, required for array-like X and not used presently for pandas dataframe
class labels
Returns
-------
self: object
"""
if isinstance(X,pd.DataFrame):
df = X
if not self.dict_feature is None:
if not self.target_readable is None:
self.create_class_id_map(df,self.target,self.target_readable)
(X,y) = self._load_from_dict(df)
num_class = len(np.unique(y))
else:
(X,y,self.vectorizer) = self.convert_numpy(df)
num_class = len(y.unique())
else:
check_X_y(X,y)
num_class = len(np.unique(y))
self.clf = xgb.XGBClassifier(**self.params)
print self.clf.get_params(deep=True)
self.clf.fit(X,y,verbose=True)
return self
开发者ID:yangwx1402,项目名称:seldon-server,代码行数:35,代码来源:xgb.py
示例4: test_check_array_warn_on_dtype_deprecation
def test_check_array_warn_on_dtype_deprecation():
X = np.asarray([[0.0], [1.0]])
Y = np.asarray([[2.0], [3.0]])
with pytest.warns(DeprecationWarning,
match="'warn_on_dtype' is deprecated"):
check_array(X, warn_on_dtype=True)
with pytest.warns(DeprecationWarning,
match="'warn_on_dtype' is deprecated"):
check_X_y(X, Y, warn_on_dtype=True)
开发者ID:daniel-perry,项目名称:scikit-learn,代码行数:9,代码来源:test_validation.py
示例5: fit
def fit(self,X,y=None):
"""Derived from https://github.com/fchollet/keras/blob/master/keras/wrappers/scikit_learn.py
Adds:
Handling pandas inputs
Saving of model into the class to allow for easy pickling
Parameters
----------
X : pandas dataframe or array-like
training samples
y : array like, required for array-like X and not used presently for pandas dataframe
class labels
Returns
-------
self: object
"""
if isinstance(X,pd.DataFrame):
df = X
(X,y,self.vectorizer) = self.convert_numpy(df)
else:
check_X_y(X,y)
input_width = X.shape[1]
num_classes = len(y.unique())
logger.info("input_width %d",input_width)
logger.info("num_classes %d",num_classes)
train_y = np_utils.to_categorical(y, num_classes)
self.model = self.model_create(input_width,num_classes)
if len(y.shape) == 1:
self.classes_ = list(np.unique(y))
if self.loss == 'categorical_crossentropy':
y = to_categorical(y)
else:
self.classes_ = np.arange(0, y.shape[1])
if self.compiled_model_ is None:
self.compiled_model_ = copy.deepcopy(self.model)
self.compiled_model_.compile(optimizer=self.optimizer, loss=self.loss)
history = self.compiled_model_.fit(
X, y, batch_size=self.train_batch_size, nb_epoch=self.nb_epoch, verbose=self.verbose,
shuffle=self.shuffle, show_accuracy=self.show_accuracy,
validation_split=self.validation_split, validation_data=self.validation_data,
callbacks=self.callbacks)
self.config_ = self.model.to_json()
self.compiled_model_.save_weights(self.tmp_model)
with open(self.tmp_model, mode='rb') as file: # b is important -> binary
self.model_saved = file.read()
return self
开发者ID:PaulCousins,项目名称:seldon-server,代码行数:53,代码来源:keras.py
示例6: fit
def fit(self,X,y=None):
"""Convert data to vw lines and then train for required iterations
Parameters
----------
X : pandas dataframe or array-like
training samples
y : array like, required for array-like X and not used presently for pandas dataframe
class labels
Returns
-------
self: object
Caveats :
1. A seldon specific fork of wabbit_wappa is needed to allow vw to run in server mode without save_resume. Save_resume seems to cause issues with the scores returned. Maybe connected to https://github.com/JohnLangford/vowpal_wabbit/issues/262
"""
if isinstance(X,pd.DataFrame):
df = X
df_base = self._exclude_include_features(df)
df_base = df_base.fillna(0)
else:
check_X_y(X,y)
df = pd.DataFrame(X)
df_y = pd.DataFrame(y,columns=list('y'))
self.target='y'
df_base = pd.concat([df,df_y],axis=1)
print df_base.head()
min_target = df_base[self.target].astype(float).min()
print "min target ",min_target
if min_target == 0:
self.zero_based = True
else:
self.zero_based = False
if not self.target_readable is None:
self.create_class_id_map(df,self.target,self.target_readable,zero_based=self.zero_based)
self.num_classes = len(df_base[self.target].unique())
print "num classes ",self.num_classes
self._start_vw_if_needed("train")
df_vw = df_base.apply(self._convert_row,axis=1)
for i in range(0,self.num_iterations):
for (index,val) in df_vw.iteritems():
self.vw.send_line(val,parse_result=False)
self._save_model(self.model_file)
return self
开发者ID:kurzgood,项目名称:seldon-server,代码行数:48,代码来源:vw.py
示例7: fit
def fit(self,X,y):
'''
Fit Relevance Vector Regression Model
Parameters
-----------
X: {array-like,sparse matrix} of size [n_samples, n_features]
Training data, matrix of explanatory variables
y: array-like of size [n_samples, n_features]
Target values
Returns
-------
self: object
self
'''
X,y = check_X_y(X,y, accept_sparse = ['csr','coo','bsr'], dtype = np.float64)
# kernelise features
K = get_kernel( X, X, self.gamma, self.degree, self.coef0,
self.kernel, self.kernel_params)
# use fit method of RegressionARD
_ = super(RVR,self).fit(K,y)
# convert to csr (need to use __getitem__)
convert_tocsr = [scipy.sparse.coo.coo_matrix, scipy.sparse.dia.dia_matrix,
scipy.sparse.bsr.bsr_matrix]
if type(X) in convert_tocsr:
X = X.tocsr()
self.relevant_ = np.where(self.active_== True)[0]
if X.ndim == 1:
self.relevant_vectors_ = X[self.relevant_]
else:
self.relevant_vectors_ = X[self.relevant_,:]
return self
开发者ID:OncoImmunity,项目名称:sklearn-bayes,代码行数:34,代码来源:fast_rvm.py
示例8: fit
def fit(self, X, y):
"""Fit joint quantile regression model.
Parameters
----------
inputs : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training data.
targets : {array-like}, shape = [n_samples]
Target values.
Returns
-------
self : returns an instance of self.
"""
if self.eps > 0 and self.nc_const:
raise UserWarning("eps is considered null because you chose to "
"enfoce non-crossing constraints.")
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], y_numeric=True)
y = asarray(y).flatten()
self._validate_params()
self.linop_ = self._get_kernel_map(X)
gram = self.linop_.Gram_dense(X)
self.reg_c_ = 1. / self.lbda
# Solve the optimization problem
# probs = asarray(self.probs).reshape((-1, 1))
probs = asarray(self.probs).flatten()
if self.nc_const:
self._qp_nc(gram, y, probs)
else:
self._coneqp(gram, y, probs)
return self
开发者ID:operalib,项目名称:operalib,代码行数:33,代码来源:quantile.py
示例9: _check_params
def _check_params(self, X, y):
# checking input data and scaling it if y is continuous
X, y = check_X_y(X, y)
if not self.categorical:
ss = StandardScaler()
X = ss.fit_transform(X)
y = ss.fit_transform(y)
# sanity checks
methods = ['JMI', 'JMIM', 'MRMR']
if self.method not in methods:
raise ValueError('Please choose one of the following methods:\n' +
'\n'.join(methods))
if not isinstance(self.k, int):
raise ValueError("k must be an integer.")
if self.k < 1:
raise ValueError('k must be larger than 0.')
if self.categorical and np.any(self.k > np.bincount(y)):
raise ValueError('k must be smaller than your smallest class.')
if not isinstance(self.categorical, bool):
raise ValueError('Categorical must be Boolean.')
if self.categorical and np.unique(y).shape[0] > 5:
print 'Are you sure y is categorical? It has more than 5 levels.'
if not self.categorical and self._isinteger(y):
print 'Are you sure y is continuous? It seems to be discrete.'
if self._isinteger(X):
print ('The values of X seem to be discrete. MI_FS will treat them'
'as continuous.')
return X, y
开发者ID:RianaChen,项目名称:mifs,代码行数:32,代码来源:mifs.py
示例10: fit
def fit(self, X, y):
"""Find the classes statistics before to perform sampling.
Parameters
----------
X : ndarray, shape (n_samples, n_features)
Matrix containing the data which have to be sampled.
y : ndarray, shape (n_samples, )
Corresponding label for each sample in X.
Returns
-------
self : object,
Return self.
"""
# Check the consistency of X and y
X, y = check_X_y(X, y)
super(SMOTEENN, self).fit(X, y)
# Fit using SMOTE
self.sm.fit(X, y)
return self
开发者ID:apyeh,项目名称:UnbalancedDataset,代码行数:26,代码来源:smote_enn.py
示例11: f_classifNumba
def f_classifNumba(X, y):
"""Compute the ANOVA F-value for the provided sample.
Read more in the :ref:`User Guide <univariate_feature_selection>`.
Parameters
----------
X : {array-like, sparse matrix} shape = [n_samples, n_features]
The set of regressors that will tested sequentially.
y : array of shape(n_samples)
The data matrix.
Returns
-------
F : array, shape = [n_features,]
The set of F values.
pval : array, shape = [n_features,]
The set of p-values.
See also
--------
chi2: Chi-squared stats of non-negative features for classification tasks.
f_regression: F-value between label/feature for regression tasks.
"""
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'])
args = [X[safe_mask(X, y == k)] for k in np.unique(y)]
return f_onewayNumba(*args)
开发者ID:stylianos-kampakis,项目名称:ADAN,代码行数:29,代码来源:feature_selection.py
示例12: my_smote
def my_smote(X, y, minority_target=None, per=0.5):
"""
This object is an implementation of SMOTE - Synthetic Minority
Over-sampling Technique, and the variations Borderline SMOTE 1, 2 and
SVM-SMOTE.
:param X: nd-array, sparse matrix, shape=[n_samples, n_features]
:param y: nd-array, list, shape=[n_samples]
:param minority_target: list
:param per
:return:
"""
X, Y = check_X_y(X, y, 'csr')
unique_label = list(set(Y))
label_count = [np.sum(Y == i) for i in unique_label]
if minority_target is None:
minority_index = [np.argmin(label_count)]
else:
minority_index = [unique_label.index(target) for target in minority_target]
majority = np.max(label_count)
for i in minority_index:
N = (int((majority * 1.0 / (1 - per) - majority) / label_count[i]) - 1) * 100
safe, synthetic, danger = _smote._borderlineSMOTE(X, Y, unique_label[i], N, k=5)
syn_label = np.array([unique_label[i]] * synthetic.shape[0])
X = sp.vstack([X, synthetic])
Y = np.concatenate([Y, syn_label])
return X, Y
开发者ID:zqlhuanying,项目名称:Image_Emotion,代码行数:29,代码来源:preprocessing.py
示例13: fit
def fit(self, X, y):
"""Fit ORFF ridge regression model.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training data.
y : {array-like}, shape = [n_samples] or [n_samples, n_targets]
Target values.
Returns
-------
self : returns an instance of self.
"""
X, y = check_X_y(X, y, ['csr', 'csc', 'coo'],
y_numeric=True, multi_output=True)
self._validate_params()
self.p = y.shape[1] if y.ndim > 1 else 1
solver_params = self.solver_params or {}
self.linop_ = self._get_kernel(X, y)
self.phix_ = self.linop_.get_orff_map(X, self.D)
risk = ORFFRidgeRisk(self.lbda, 'LS')
self.solver_res_ = minimize(risk.functional_grad_val,
zeros(self.phix_.shape[1],
dtype=X.dtype),
args=(y.ravel(), self.phix_, self.linop_),
method=self.solver,
jac=True, options=solver_params)
self.coefs_ = self.solver_res_.x
return self
开发者ID:operalib,项目名称:operalib,代码行数:33,代码来源:orff.py
示例14: fit
def fit(self, x, y):
"""
Constructs GAM model(s) to predict y from X
x: 1 or 2 dimensional array of predictor values with each row being one observation
y: 1 or 2 dimensional array of predicted values (a GAM model is constructed for each output if y is 2 dimensional)
"""
# Input validation for standard estimators using sklearn utils
x, y = check_X_y(x, y, accept_sparse=["csr", "csc", "coo"], multi_output=True)
# Convert to R matrices
if (
x.ndim == 1
): # If we're only looking at 1 x at a time, shape[1] will give an error for one-dimensional arrays. Sklearn input validation doesn't change that.
rX = r.matrix(x, nrow=x.shape[0], ncol=1)
else:
rX = r.matrix(x, nrow=x.shape[0], ncol=x.shape[1])
if (
y.ndim == 1
): # If we're only looking at 1 y at a time, shape[1] will give an error for one-dimensional arrays
rY = r.matrix(y, nrow=y.shape[0], ncol=1)
else:
rY = r.matrix(y, nrow=y.shape[0], ncol=y.shape[1])
# Compute models (one for each column in y)
self.gammodels = self.computeGAM(rX, rY)
return self
开发者ID:zyfang,项目名称:PythonCollection,代码行数:25,代码来源:gamr.py
示例15: fit
def fit(self, X, y=None):
"""Fit the model using X as training data.
Parameters
----------
X : {array-like, sparse matrix}, optional
Training data. If array or matrix, shape = [n_samples, n_features]
If X is None, a "lazy fitting" is performed. If kneighbors is called, the fitting
with with the data there is done. Also the caching of computed hash values is deactivated in
this case.
y : list, optional (default = None)
List of classes for the given input of X. Size have to be n_samples."""
if y is not None:
self._y_is_csr = True
_, self._y = check_X_y(X, y, "csr", multi_output=True)
if self._y.ndim == 1 or self._y.shape[1] == 1:
self._y_is_csr = False
else:
self._y_is_csr = False
X_csr = csr_matrix(X)
self._index_elements_count = X_csr.shape[0]
instances, features = X_csr.nonzero()
maxFeatures = int(max(X_csr.getnnz(1)))
data = X_csr.data
# returns a pointer to the inverse index stored in c++
self._pointer_address_of_nearestNeighbors_object = _nearestNeighbors.fit(instances.tolist(), features.tolist(), data.tolist(),
X_csr.shape[0], maxFeatures,
self._pointer_address_of_nearestNeighbors_object)
开发者ID:joachimwolff,项目名称:minHashNearestNeighbors,代码行数:32,代码来源:nearestNeighborsCppInterface.py
示例16: fit
def fit(self,X,y):
'''
Fits L2VM model
Parameters:
-----------
X: numpy array of size 'n x m'
Matrix of explanatory variables
Y: numpy array of size 'n x '
Vector of dependent variable
Return
------
obj: self
self
'''
X,y = check_X_y(X,y, dtype = np.float64)
K = get_kernel(X, X, self.gamma, self.degree, self.coef0, self.kernel,
self.kernel_params )
self._model = LogisticRegression( penalty = "l1", dual = False, C = self.C,
tol = self.tol, fit_intercept = self.fit_intercept,
intercept_scaling=self.intercept_scaling,
n_jobs = self.n_jobs, solver = 'liblinear',
multi_class = 'ovr', max_iter = self.max_iter,
verbose = self.verbose, random_state = self.random_state)
self._model = self._model.fit(K,y)
self.relevant_indices_ = [np.where(coefs!=0)[0] for coefs in self._model.coef_]
self.relevant_vectors_ = [X[rvi,:] for rvi in self.relevant_indices_]
self.classes_ = self._model.classes_
return self
开发者ID:Ferrine,项目名称:sklearn-bayes,代码行数:31,代码来源:kernel_models.py
示例17: fit
def fit(self,X,y):
'''
Fits variational Bayesian Logistic Regression
Parameters
----------
X: array-like of size [n_samples, n_features]
Matrix of explanatory variables
y: array-like of size [n_samples]
Vector of dependent variables
Returns
-------
self: object
self
'''
# preprocess data
X,y = check_X_y( X, y , dtype = np.float64)
check_classification_targets(y)
self.classes_ = np.unique(y)
n_classes = len(self.classes_)
# take into account bias term if required
n_samples, n_features = X.shape
n_features = n_features + int(self.fit_intercept)
if self.fit_intercept:
X = np.hstack( (np.ones([n_samples,1]),X))
# handle multiclass problems using One-vs-Rest
if n_classes < 2:
raise ValueError("Need samples of at least 2 classes")
if n_classes > 2:
self.coef_, self.sigma_ = [0]*n_classes,[0]*n_classes
self.intercept_ = [0]*n_classes
else:
self.coef_, self.sigma_, self.intercept_ = [0],[0],[0]
# huperparameters of
a = self.a + 0.5 * n_features
b = self.b
for i in range(len(self.coef_)):
if n_classes == 2:
pos_class = self.classes_[1]
else:
pos_class = self.classes_[i]
mask = (y == pos_class)
y_bin = np.ones(y.shape, dtype=np.float64)
y_bin[~mask] = 0
coef_, sigma_ = self._fit(X,y_bin,a,b)
intercept_ = 0
if self.fit_intercept:
intercept_ = coef_[0]
coef_ = coef_[1:]
self.coef_[i] = coef_
self.intercept_[i] = intercept_
self.sigma_[i] = sigma_
self.coef_ = np.asarray(self.coef_)
return self
开发者ID:jlopezpena,项目名称:Bayesian-Regression-Methods,代码行数:60,代码来源:vblr.py
示例18: fit
def fit(self, X, y):
X, y = check_X_y(X, y)
print("c=%s, cov_algo=%s" % (self.c, self.cov_algo))
classes = np.unique(y)
self.classes_ = np.unique(y)
n_classes = len(self.classes_)
self.class_prior_ = np.zeros(n_classes)
self.class_count_ = np.zeros(n_classes)
unique_y = np.unique(y)
for y_i in unique_y:
i = classes.searchsorted(y_i)
X_i = X[y == y_i, :]
sw_i = None
N_i = X_i.shape[0]
self.class_count_[i] += N_i
self.class_prior_[:] = self.class_count_ / np.sum(self.class_count_)
self.priors = self.class_prior_
self.posteriors = []
for klass in self.classes_:
examples = self._examples_for_class(klass, X, y)
mean = np.array(examples.mean(0))[0]
cov = self._cov(examples)
cov_smoothed = cov + (self.c * np.eye(mean.shape[0]))
p_x = multivariate_normal(mean=mean, cov=cov_smoothed)
self.posteriors.append(p_x)
return self
开发者ID:eggie5,项目名称:UCSD-MAS-DSE210,代码行数:32,代码来源:gaussian_classifier.py
示例19: _check_X_y
def _check_X_y(X, y):
"""Overwrite the checking to let pass some string for categorical
features.
"""
y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], dtype=None)
return X, y, binarize_y
开发者ID:scikit-learn-contrib,项目名称:imbalanced-learn,代码行数:7,代码来源:_smote.py
示例20: fit
def fit(self, X, y):
"""Fit ONORMA model.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training data.
y : {array-like}, shape = [n_samples] or [n_samples, n_targets]
Target values.
Returns
-------
self : returns an instance of self.
"""
X, y = check_X_y(X, y, False, y_numeric=True, multi_output=True)
self._validate_params()
self.T_ = X.shape[0] if self.T is None else self.T
self.t_ = 0
if y.ndim > 1:
self.coefs_ = zeros(self.T_ * y.shape[1])
for i in range(self.T_):
idx = i % X.shape[0]
self.partial_fit(X[idx, :], y[idx, :])
else:
self.coefs_ = zeros(self.T_)
for i in range(self.T_):
idx = i % X.shape[0]
self.partial_fit(X[idx, :], y[idx])
return self
开发者ID:operalib,项目名称:operalib,代码行数:31,代码来源:onorma.py
注:本文中的sklearn.utils.check_X_y函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论