本文整理汇总了Python中sklearn.cross_validation.check_cv函数的典型用法代码示例。如果您正苦于以下问题:Python check_cv函数的具体用法?Python check_cv怎么用?Python check_cv使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了check_cv函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: benchmark
def benchmark(clf, X, y, cv=None):
X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
cv = check_cv(cv, X, y, classifier=is_classifier(clf))
# learning_curve_ = learning_curve(clf, X_all, y_all, cv=cv)
train_times = []
test_times = []
confusion_matrices = []
confusion_matrix_indices = []
coefs = []
for train, test in cv:
X_train, y_train = X[train], y[train]
X_test, y_test = X[test], y[test]
t0 = time()
clf.fit(X_train, y_train)
train_times.append(time()-t0)
t0 = time()
y_pred = clf.predict(X_test)
test_times.append(time()-t0)
confusion_matrices.append(confusion_matrix(y_test, y_pred))
confusion_matrix_indices.append(np.array([[test[pred] for pred in true] for true in confusion_matrix_instances(y_test, y_pred)]))
coefs.append(clf.coef_)
return dict(
train_times = np.array(train_times),
test_times = np.array(test_times),
confusion_matrices = np.array(confusion_matrices),
confusion_matrix_indices = np.array(confusion_matrix_indices),
coefs = np.array(coefs)
)
开发者ID:EdwardBetts,项目名称:twitter-sentiment,代码行数:35,代码来源:evaluation.py
示例2: fit
def fit(self, epochs, y=None):
from sklearn.cross_validation import check_cv, StratifiedKFold
from mne.decoding.time_gen import _check_epochs_input
X, y, self.gat.picks_ = _check_epochs_input(epochs, y, self.gat.picks)
gat_list = list()
cv = self.cv
if isinstance(cv, (int, np.int)):
cv = StratifiedKFold(y, cv)
cv = check_cv(cv, X, y, classifier=True)
# Construct meta epoch and fit gat with a single fold
for ii, (train, test) in enumerate(cv):
# meta trial
epochs_ = make_meta_epochs(epochs[train], y[train], n_bin=self.n)
# fit gat
gat_ = deepcopy(self.gat)
cv_one_fold = [(range(len(epochs_)), [])]
gat_.cv = cv_one_fold
gat_.fit(epochs_, epochs_.events[:, 2])
gat_list.append(gat_)
# gather
self.gat = gat_
self.gat.train_times_ = gat_.train_times_
self.gat.estimators_ = np.squeeze(
[gat.estimators_ for gat in gat_list]).T.tolist()
self.gat.cv_ = cv
self.gat.y_train_ = y
开发者ID:kingjr,项目名称:jr-tools,代码行数:28,代码来源:base.py
示例3: Bootstrap_cv
def Bootstrap_cv(estimator1, estimator2, X, y, score_func, cv=None, n_jobs=1,
verbose=0, ratio=.5):
X, y = cross_validation.check_arrays(X, y, sparse_format='csr')
cv = cross_validation.check_cv(cv, X, y,
classifier=
cross_validation.is_classifier(estimator1))
if score_func is None:
if not hasattr(estimator1, 'score') or \
not hasattr(estimator2, 'score'):
raise TypeError(
"If no score_func is specified, the estimator passed "
"should have a 'score' method. The estimator %s "
"does not." % estimator1)
# We clone the estimator to make sure that all the folds are
# independent, and that it is pickle-able.
scores = \
cross_validation.Parallel(
n_jobs=n_jobs, verbose=verbose)(
cross_validation.delayed(
dual_cross_val_score)
(cross_validation.clone(estimator1),
cross_validation.clone(estimator2),
X, y, score_func, train, test, verbose, ratio)
for train, test in cv)
return np.array(scores)
开发者ID:rdimaggio,项目名称:kaggle_overfitting,代码行数:25,代码来源:analysis_v1.py
示例4: _fit
def _fit(self, X, y, parameter_iterable):
"""Actual fitting, performing the search over parameters."""
self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
X, y = indexable(X, y)
cv = check_cv(self.cv, X, y, classifier=is_classifier(self.estimator))
base_estimator = clone(self.estimator)
best = best_parameters(base_estimator, cv, X, y, parameter_iterable,
self.scorer_, self.fit_params, self.iid)
best = best.compute()
self.best_params_ = best.parameters
self.best_score_ = best.mean_validation_score
if isinstance(base_estimator, Pipeline):
base_estimator = base_estimator.to_sklearn().compute()
if self.refit:
# fit the best estimator using the entire dataset
# clone first to work around broken estimators
best_estimator = base_estimator.set_params(**best.parameters)
if y is not None:
self.best_estimator_ = best_estimator.fit(X, y, **self.fit_params)
else:
self.best_estimator_ = best_estimator.fit(X, **self.fit_params)
return self
开发者ID:konggas,项目名称:dasklearn,代码行数:27,代码来源:grid_search.py
示例5: add_del_cv
def add_del_cv(df, predictors, target, model, scoring='roc_auc', cv1=None,
n_folds=8, n_jobs=-1, start=[], selmax=None, selmin=1,
min_ratio=1e-7, max_steps=10, verbosity=0):
""" Forward-Backward (ADD-DEL) selection using model.
Parameters
----------
Returns
-------
selected: list
selected predictors
Example
-------
References
----------
"""
def test_to_break(selected, selected_curr, to_break):
if set(selected) == set(selected_curr):
to_break += 1
else:
to_break = 0
return to_break
X, y, _ = df_xyf(df, predictors=predictors, target=target)
cv1 = cross_validation.check_cv(
cv1, X=X, y=y,
classifier=is_classifier(model))
selected_curr = start
to_break = 0
for i_step in xrange(max_steps):
selected = forward_cv(
df, predictors, target, model, scoring=scoring,
cv1=cv1, n_folds=n_folds, n_jobs=n_jobs,
start=selected_curr, selmax=selmax,
min_ratio=min_ratio, verbosity=verbosity-1)
to_break = test_to_break(selected, selected_curr, to_break)
selected_curr = selected
if verbosity > 0:
print('forward:', ' '.join(selected_curr))
if to_break > 1:
break
selected = backward_cv(
df, selected_curr, target, model, scoring=scoring,
cv1=cv1, n_folds=n_folds, n_jobs=n_jobs, selmin=selmin,
min_ratio=min_ratio, verbosity=verbosity-1)
to_break = test_to_break(selected, selected_curr, to_break)
selected_curr = selected
if verbosity > 0:
print('backward:', ' '.join(selected_curr))
if to_break > 0:
break
return selected_curr
开发者ID:orazaro,项目名称:kgml,代码行数:57,代码来源:feature_selection.py
示例6: dynamic_cross_val_predict
def dynamic_cross_val_predict(estimator, fv, esa_feature_list, unigram_feature_list, dynamic_X, y=None, cv=None,
verbose=0, fit_params=None):
print "dynamic predict cross val mit %s" % esa_feature_list + unigram_feature_list
vec = DictVectorizer()
tfidf = TfidfTransformer()
X = vec.fit_transform(fv).toarray()
# X = tfidf.fit_transform(X).toarray()
X, y = cross_validation.indexable(X, y)
cv = cross_validation.check_cv(cv, X, y, classifier=cross_validation.is_classifier(estimator))
preds_blocks = []
cross_val_step = 0
for train, test in cv:
fv_copy = copy.deepcopy(fv)
#baue X in jedem Schritt neu
for i in range(0,len(fv)): #jedes i steht für einen featuredict
feature_dict = fv_copy[i]
dynamic_vec = dynamic_X[cross_val_step] #zeigt auf esa_vec
for feature in esa_feature_list:
feature_dict.update(dynamic_vec[find_index_for_dynamic_feature(feature)][i]) #das i-te feature-dict mit esa-feature updaten
for feature in unigram_feature_list:
feature_dict.update(dynamic_vec[find_index_for_dynamic_feature(feature)][i]) #das i-te feature-dict mit esa-feature updaten
X = vec.fit_transform(fv_copy).toarray()
# X = tfidf.fit_transform(X).toarray()
preds_blocks.append(cross_validation._fit_and_predict(cross_validation.clone(estimator), X, y,
train, test, verbose,
fit_params))
cross_val_step+=1
preds = [p for p, _ in preds_blocks]
locs = np.concatenate([loc for _, loc in preds_blocks])
if not cross_validation._check_is_partition(locs, cross_validation._num_samples(X)):
raise ValueError('cross_val_predict only works for partitions')
inv_locs = np.empty(len(locs), dtype=int)
inv_locs[locs] = np.arange(len(locs))
# Check for sparse predictions
if sp.issparse(preds[0]):
preds = sp.vstack(preds, format=preds[0].format)
else:
preds = np.concatenate(preds)
return preds[inv_locs]
开发者ID:Ftohei,项目名称:classification_tests,代码行数:55,代码来源:classifier.py
示例7: fit
def fit(self, X, y):
"""Actual fitting, performing the search over parameters."""
parameter_iterable = ParameterSampler(self.param_distributions,
self.n_iter,
random_state=self.random_state)
estimator = self.estimator
cv = self.cv
n_samples = _num_samples(X)
X, y = indexable(X, y)
if y is not None:
if len(y) != n_samples:
raise ValueError('Target variable (y) has a different number '
'of samples (%i) than data (X: %i samples)'
% (len(y), n_samples))
cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
if self.verbose > 0:
if isinstance(parameter_iterable, Sized):
n_candidates = len(parameter_iterable)
print("Fitting {0} folds for each of {1} candidates, totalling"
" {2} fits".format(len(cv), n_candidates,
n_candidates * len(cv)))
base_estimator = clone(self.estimator)
pre_dispatch = self.pre_dispatch
out = Parallel(
n_jobs=self.n_jobs, verbose=self.verbose,
pre_dispatch=pre_dispatch
)(
delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
parameters, cv=cv)
for parameters in parameter_iterable)
best = sorted(out, reverse=True)[0]
self.best_params_ = best[1]
self.best_score_ = best[0]
if self.refit:
# fit the best estimator using the entire dataset
# clone first to work around broken estimators
best_estimator = clone(base_estimator).set_params(
**best[1])
if y is not None:
best_estimator.fit(X, y, **self.fit_params)
else:
best_estimator.fit(X, **self.fit_params)
self.best_estimator_ = best_estimator
return self
开发者ID:MD2Korg,项目名称:cStress-model,代码行数:54,代码来源:puffMarker.py
示例8: test_check_cv_return_types
def test_check_cv_return_types():
X = np.ones((9, 2))
cv = cval.check_cv(3, X, classifier=False)
assert_true(isinstance(cv, cval.KFold))
y_binary = np.array([0, 1, 0, 1, 0, 0, 1, 1, 1])
cv = cval.check_cv(3, X, y_binary, classifier=True)
assert_true(isinstance(cv, cval.StratifiedKFold))
y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2])
cv = cval.check_cv(3, X, y_multiclass, classifier=True)
assert_true(isinstance(cv, cval.StratifiedKFold))
X = np.ones((5, 2))
y_multilabel = [[1, 0, 1], [1, 1, 0], [0, 0, 0], [0, 1, 1], [1, 0, 0]]
cv = cval.check_cv(3, X, y_multilabel, classifier=True)
assert_true(isinstance(cv, cval.KFold))
y_multioutput = np.array([[1, 2], [0, 3], [0, 0], [3, 1], [2, 0]])
cv = cval.check_cv(3, X, y_multioutput, classifier=True)
assert_true(isinstance(cv, cval.KFold))
开发者ID:AppliedArtificialIntelligence,项目名称:scikit-learn,代码行数:21,代码来源:test_cross_validation.py
示例9: test_searchlight
def test_searchlight():
# Create a toy dataset to run searchlight on
# Initialize with 4x4x4 scans of random values on 30 frames
rand = np.random.RandomState(0)
frames = 30
data = rand.rand(5, 5, 5, frames)
mask = np.ones((5, 5, 5), np.bool)
mask_img = nibabel.Nifti1Image(mask.astype(np.int), np.eye(4))
# Create a condition array
cond = np.arange(frames, dtype=int) > frames / 2
# Create an activation pixel.
data[2, 2, 2, :] = 0
data[2, 2, 2][cond.astype(np.bool)] = 2
data_img = nibabel.Nifti1Image(data, np.eye(4))
# Define cross validation
from sklearn.cross_validation import check_cv
# avoid using KFold for compatibility with sklearn 0.10-0.13
cv = check_cv(4, cond)
n_jobs = 1
# Run Searchlight with different radii
# Small radius : only one pixel is selected
sl = searchlight.SearchLight(mask_img, process_mask_img=mask_img,
radius=0.5, n_jobs=n_jobs,
scoring='accuracy', cv=cv)
sl.fit(data_img, cond)
assert_equal(np.where(sl.scores_ == 1)[0].size, 1)
assert_equal(sl.scores_[2, 2, 2], 1.)
# Medium radius : little ball selected
sl = searchlight.SearchLight(mask_img, process_mask_img=mask_img, radius=1,
n_jobs=n_jobs, scoring='accuracy', cv=cv)
sl.fit(data_img, cond)
assert_equal(np.where(sl.scores_ == 1)[0].size, 7)
assert_equal(sl.scores_[2, 2, 2], 1.)
assert_equal(sl.scores_[1, 2, 2], 1.)
assert_equal(sl.scores_[2, 1, 2], 1.)
assert_equal(sl.scores_[2, 2, 1], 1.)
assert_equal(sl.scores_[3, 2, 2], 1.)
assert_equal(sl.scores_[2, 3, 2], 1.)
assert_equal(sl.scores_[2, 2, 3], 1.)
# Big radius : big ball selected
sl = searchlight.SearchLight(mask_img, process_mask_img=mask_img, radius=2,
n_jobs=n_jobs, scoring='accuracy', cv=cv)
sl.fit(data_img, cond)
assert_equal(np.where(sl.scores_ == 1)[0].size, 33)
assert_equal(sl.scores_[2, 2, 2], 1.)
开发者ID:VirgileFritsch,项目名称:nilearn,代码行数:53,代码来源:test_searchlight.py
示例10: test_check_cv_return_types
def test_check_cv_return_types():
X = np.ones((9, 2))
cv = cval.check_cv(3, X, classifier=False)
assert_true(isinstance(cv, cval.KFold))
y_binary = np.array([0, 1, 0, 1, 0, 0, 1, 1, 1])
cv = cval.check_cv(3, X, y_binary, classifier=True)
assert_true(isinstance(cv, cval.StratifiedKFold))
y_multiclass = np.array([0, 1, 0, 1, 2, 1, 2, 0, 2])
cv = cval.check_cv(3, X, y_multiclass, classifier=True)
assert_true(isinstance(cv, cval.StratifiedKFold))
X = np.ones((5, 2))
y_seq_of_seqs = [[], [1, 2], [3], [0, 1, 3], [2]]
with warnings.catch_warnings(record=True):
# deprecated sequence of sequence format
cv = cval.check_cv(3, X, y_seq_of_seqs, classifier=True)
assert_true(isinstance(cv, cval.KFold))
y_indicator_matrix = LabelBinarizer().fit_transform(y_seq_of_seqs)
cv = cval.check_cv(3, X, y_indicator_matrix, classifier=True)
assert_true(isinstance(cv, cval.KFold))
y_multioutput = np.array([[1, 2], [0, 3], [0, 0], [3, 1], [2, 0]])
cv = cval.check_cv(3, X, y_multioutput, classifier=True)
assert_true(isinstance(cv, cval.KFold))
开发者ID:jonathanwoodard,项目名称:scikit-learn,代码行数:28,代码来源:test_cross_validation.py
示例11: cross_validate
def cross_validate(self, k=10):
"""Performs a k-fold cross validation of our training data.
Args:
k: The number of folds for cross validation.
"""
self.scores = []
X, y = check_arrays(self.feature_vector,
self.classification_vector,
sparse_format='csr')
cv = cross_validation.check_cv(
k, self.feature_vector, self.classification_vector,
classifier=True)
for train, test in cv:
self.classifier1.fit(self.feature_vector[train],
self.classification_vector[train])
self.classifier2.fit(self.feature_vector[train],
self.classification_vector[train])
self.classifier3.fit(self.feature_vector[train],
self.classification_vector[train])
classification1 = self.classifier1.predict(
self.feature_vector[test])
classification2 = self.classifier2.predict(
self.feature_vector[test])
classification3 = self.classifier3.predict(
self.feature_vector[test])
classification = []
for predictions in zip(classification1, classification2,
classification3):
neutral_count = predictions.count(0)
positive_count = predictions.count(1)
negative_count = predictions.count(-1)
if (neutral_count == negative_count and
negative_count == positive_count):
classification.append(predictions[0])
elif (neutral_count > positive_count and
neutral_count > negative_count):
classification.append(0)
elif (positive_count > neutral_count and
positive_count > negative_count):
classification.append(1)
elif (negative_count > neutral_count and
negative_count > positive_count):
classification.append(-1)
classification = numpy.array(classification)
self.scores.append(self.score_func(y[test], classification))
开发者ID:AlinaKay,项目名称:sentiment-analyzer,代码行数:50,代码来源:train.py
示例12: score
def score(self,test_parameter):
"""
The score function to call in order to evaluate the quality
of the parameter test_parameter
Parameters
----------
`tested_parameter` : dict, the parameter to test
Returns
-------
`score` : the CV score, either the list of all cv results or
the mean (depending of score_format)
"""
if not self._callable_estimator:
cv = check_cv(self.cv, self.X, self.y, classifier=is_classifier(self.estimator))
cv_score = [ _fit_and_score(clone(self.estimator), self.X, self.y, self.scorer_,
train, test, False, test_parameter,
self.fit_params, return_parameters=True)
for train, test in cv ]
n_test_samples = 0
mean_score = 0
detailed_score = []
for tmp_score, tmp_n_test_samples, _, _ in cv_score:
detailed_score.append(tmp_score)
tmp_score *= tmp_n_test_samples
n_test_samples += tmp_n_test_samples
mean_score += tmp_score
mean_score /= float(n_test_samples)
if(self.score_format == 'avg'):
score = mean_score
else: # format == 'cv'
score = detailed_score
else:
if(self.score_format == 'avg'):
score = [self.estimator(test_parameter)]
else: # format == 'cv'
score = self.estimator(test_parameter)
return score
开发者ID:BenJamesbabala,项目名称:DeepMining,代码行数:45,代码来源:smart_search.py
示例13: cross_val_predict
def cross_val_predict(estimator, X, y, cv=5, n_jobs=1, refit=False, predict_fun="predict"):
X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
pred = Parallel(n_jobs=n_jobs)(
delayed(_cross_val_predict)(
clone(estimator), X, y, train, test, predict_fun)
for train, test in cv)
pred = np.concatenate(pred)
if cv.indices:
index = np.concatenate([test for _, test in cv])
else:
index = np.concatenate([np.where(test)[0] for _, test in cv])
## pred[index] = pred doesn't work as expected
pred[index] = pred.copy()
if refit:
return pred, clone(estimator).fit(X,y)
else:
return pred
开发者ID:luoq,项目名称:datatrek,代码行数:18,代码来源:stacking.py
示例14: my_cross_val_predict
def my_cross_val_predict(estimator, X, y=None, groups=None, cv=None, n_jobs=1,
verbose=0, fit_params=None, pre_dispatch='2*n_jobs',
method='predict'):
X, y, groups = indexable(X, y, groups)
cv = check_cv(cv, y, classifier=is_classifier(estimator))
# Ensure the estimator has implemented the passed decision function
if not callable(getattr(estimator, method)):
raise AttributeError('{} not implemented in estimator'
.format(method))
if method in ['decision_function', 'predict_proba', 'predict_log_proba']:
le = LabelEncoder()
y = le.fit_transform(y)
# We clone the estimator to make sure that all the folds are
# independent, and that it is pickle-able.
parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
pre_dispatch=pre_dispatch)
prediction_blocks = parallel(delayed(_my_fit_and_predict)(
clone(estimator), X, y, train, test, verbose, fit_params, method)
for train, test in cv.split(X, y, groups))
# Concatenate the predictions
predictions = [pred_block_i for pred_block_i, _, _ in prediction_blocks]
test_indices = np.concatenate([indices_i
for _, indices_i, _ in prediction_blocks])
scores = np.concatenate([score_i for _, _, score_i in prediction_blocks])
if not _check_is_permutation(test_indices, _num_samples(X)):
raise ValueError('cross_val_predict only works for partitions')
inv_test_indices = np.empty(len(test_indices), dtype=int)
inv_test_indices[test_indices] = np.arange(len(test_indices))
# Check for sparse predictions
if sp.issparse(predictions[0]):
predictions = sp.vstack(predictions, format=predictions[0].format)
else:
predictions = np.concatenate(predictions)
return predictions[inv_test_indices], scores
开发者ID:teopir,项目名称:ifqi,代码行数:42,代码来源:ifs.py
示例15: dynamic_cross_val_score
def dynamic_cross_val_score(estimator, fv, esa_feature_list, unigram_feature_list, dynamic_X, y=None, scoring=None, cv=None,
verbose=0, fit_params=None):
print "dynamic cross val mit %s" % esa_feature_list + unigram_feature_list
vec = DictVectorizer()
tfidf = TfidfTransformer()
X = vec.fit_transform(fv).toarray()
# X= tfidf.fit_transform(X).toarray()
X, y = cross_validation.indexable(X, y)
cv = cross_validation.check_cv(cv, X, y, classifier=cross_validation.is_classifier(estimator))
scorer = cross_validation.check_scoring(estimator, scoring=scoring)
scores = []
cross_val_step = 0
for train, test in cv:
fv_copy = copy.deepcopy(fv)
#baue X in jedem Schritt neu
for i in range(0,len(fv)): #jedes i steht für einen featuredict
feature_dict = fv_copy[i]
dynamic_vec = dynamic_X[cross_val_step] #zeigt auf esa_vec
for feature in esa_feature_list:
feature_dict.update(dynamic_vec[find_index_for_dynamic_feature(feature)][i]) #das i-te feature-dict mit esa-feature updaten
for feature in unigram_feature_list:
feature_dict.update(dynamic_vec[find_index_for_dynamic_feature(feature)][i]) #das i-te feature-dict mit esa-feature updaten
X = vec.fit_transform(fv_copy).toarray()
# X = tfidf.fit_transform(X).toarray()
scores.append(cross_validation._fit_and_score(cross_validation.clone(estimator), X, y, scorer,
train, test, verbose, None, fit_params))
cross_val_step += 1
return np.array(scores)[:, 0]
开发者ID:Ftohei,项目名称:classification_tests,代码行数:42,代码来源:classifier.py
示例16: cross_val_score_filter_feature_selection
def cross_val_score_filter_feature_selection(model,filter_function,filter_criteria, X, y, scoring=None, cv=None, n_jobs=1,
verbose=0, fit_params=None,
pre_dispatch='2*n_jobs'):
X, y = indexable(X, y)
cv = check_cv(cv, X, y, classifier=is_classifier(model))
scorer = check_scoring(model, scoring=scoring)
# We clone the estimator to make sure that all the folds are
# independent, and that it is pickle-able.
parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
pre_dispatch=pre_dispatch)
#
scores = parallel(delayed(_fit_and_score)(clone(model), filter_function(X,y,train,filter_criteria), y, scorer,
train, test, verbose, None,
fit_params)
for train, test in cv)
return np.array(scores)[:, 0]
开发者ID:joewledger,项目名称:Cell-Line-Classification,代码行数:20,代码来源:Cross_Validator.py
示例17: fit
def fit(self,X,Y):
if not self.best_subset:
self.fshape = np.shape(X)[1]
self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
self.cv = check_cv(self.cv, X, Y, classifier=is_classifier(self.estimator))
self.best_subset = tuple()
self.best_subset_score = 0
self.scores_ = {self.best_subset:self.best_subset_score}
X = np.array(X)
Y = np.array(Y)
try:
self.get_best_subset(X,Y)
except KeyboardInterrupt:
pass
self.estimator = self.estimator.fit(X[:,self.best_subset],Y)
return self
开发者ID:Khodeir,项目名称:datascience-cwk,代码行数:20,代码来源:wrappers.py
示例18: fit
def fit(self, X, y):
"""Fit KNN model by choosing the best `n_neighbors`.
Parameters
-----------
X : scipy.sparse matrix, (n_samples, vocab_size)
Data
y : ndarray, shape (n_samples,) or (n_samples, n_targets)
Target
"""
if self.n_neighbors_try is None:
n_neighbors_try = range(1, 6)
else:
n_neighbors_try = self.n_neighbors_try
X = check_array(X, accept_sparse='csr', copy=True)
X = normalize(X, norm='l1', copy=False)
cv = check_cv(self.cv, X, y)
knn = KNeighborsClassifier(metric='precomputed', algorithm='brute')
scorer = check_scoring(knn, scoring=self.scoring)
scores = []
for train_ix, test_ix in cv:
dist = self._pairwise_wmd(X[test_ix], X[train_ix])
knn.fit(X[train_ix], y[train_ix])
scores.append([
scorer(knn.set_params(n_neighbors=k), dist, y[test_ix])
for k in n_neighbors_try
])
scores = np.array(scores)
self.cv_scores_ = scores
best_k_ix = np.argmax(np.mean(scores, axis=0))
best_k = n_neighbors_try[best_k_ix]
self.n_neighbors = self.n_neighbors_ = best_k
return super(WordMoversKNNCV, self).fit(X, y)
开发者ID:jihyunp,项目名称:word_embeddings,代码行数:38,代码来源:word_movers_knn.py
示例19: _fit
def _fit(self, X, y, parameter_iterable):
"""Actual fitting, performing the search over parameters."""
estimator = self.estimator
cv = self.cv
n_samples = _num_samples(X)
X, y = check_arrays(X, y, allow_lists=True, sparse_format='csr')
if y is not None:
if len(y) != n_samples:
raise ValueError('Target variable (y) has a different number '
'of samples (%i) than data (X: %i samples)'
% (len(y), n_samples))
y = np.asarray(y)
cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
if not self.dataset_filenames:
self.save_dataset_filename(X, y, cv)
dataset_filenames = self.dataset_filenames
client = Client()
lb_view = client.load_balanced_view()
if self.verbose > 0:
print("Number of CPU core %d" % len(client.ids()))
self.tasks = [([lb_view.apply(evaluate, estimator, dataset_filename, params)
for dataset_filename in dataset_filenames], params)
for params in parameter_iterable]
if self.sync:
self.wait()
self.set_grid_scores()
self.set_best_score_params()
if self.refit:
self.set_best_estimator(estimator)
return self
开发者ID:brenden17,项目名称:IPyGridSearchCV,代码行数:38,代码来源:grid_search_ipy.py
示例20: cross_val_score
def cross_val_score(estimator, X, y=None, score_func=None, cv=None, n_jobs=-1,
verbose=0, as_dvalues=False):
"""Evaluate a score by cross-validation.
Replacement of :func:`sklearn.cross_validation.cross_val_score`, used to
support computation of decision values.
"""
X, y = check_arrays(X, y, sparse_format='csr')
cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
if score_func is None:
if not hasattr(estimator, 'score'):
raise TypeError(
"If no score_func is specified, the estimator passed "
"should have a 'score' method. The estimator %s "
"does not." % estimator)
# We clone the estimator to make sure that all the folds are
# independent, and that it is pickle-able.
scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
delayed(_cross_val_score)(clone(estimator), X, y, score_func, train, test,
verbose, as_dvalues)
for train, test in cv)
return np.array(scores)
开发者ID:mthomure,项目名称:glimpse-project,代码行数:23,代码来源:learn.py
注:本文中的sklearn.cross_validation.check_cv函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论