本文整理汇总了Python中sklearn.model_selection.cross_val_score函数的典型用法代码示例。如果您正苦于以下问题:Python cross_val_score函数的具体用法?Python cross_val_score怎么用?Python cross_val_score使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cross_val_score函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main
def main():
from sklearn import preprocessing
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import cross_val_score
db_name = 'iris'
hid_num = 1000
data_set = fetch_mldata(db_name)
data_set.data = preprocessing.scale(data_set.data)
print(db_name)
print('ECOBELM', hid_num)
e = ECOBELM(hid_num, c=2**5)
ave = 0
for i in range(10):
scores = cross_val_score(
e, data_set.data, data_set.target, cv=5, scoring='accuracy')
ave += scores.mean()
ave /= 10
print("Accuracy: %0.2f " % (ave))
print('ELM', hid_num)
e = ELM(hid_num)
ave = 0
for i in range(10):
scores = cross_val_score(
e, data_set.data, data_set.target, cv=5, scoring='accuracy')
ave += scores.mean()
ave /= 10
print("Accuracy: %0.2f " % (ave))
开发者ID:masaponto,项目名称:Python-ELM,代码行数:30,代码来源:ecob_elm.py
示例2: neural_net
def neural_net(features,target,test_size_percent=0.2,cv_split=3,n_iter=100,learning_rate=0.01):
'''Features -> Pandas Dataframe with attributes as columns
target -> Pandas Dataframe with target column for prediction
Test_size_percent -> Percentage of data point to be used for testing'''
scale=preprocessing.MinMaxScaler()
X_array = scale.fit_transform(features)
y_array = scale.fit_transform(target)
mlp = Regressor(layers=[Layer("Rectifier",units=5), # Hidden Layer1
Layer("Rectifier",units=3) # Hidden Layer2
,Layer("Linear")], # Output Layer
n_iter = n_iter, learning_rate=0.01)
X_train, X_test, y_train, y_test = train_test_split(X_array, y_array.T.squeeze(), test_size=test_size_percent, random_state=4)
mlp.fit(X_train,y_train)
test_prediction = mlp.predict(X_test)
tscv = TimeSeriesSplit(cv_split)
training_score = cross_val_score(mlp,X_train,y_train,cv=tscv.n_splits)
testing_score = cross_val_score(mlp,X_test,y_test,cv=tscv.n_splits)
print"Cross-val Training score:", training_score.mean()
# print"Cross-val Testing score:", testing_score.mean()
training_predictions = cross_val_predict(mlp,X_train,y_train,cv=tscv.n_splits)
testing_predictions = cross_val_predict(mlp,X_test,y_test,cv=tscv.n_splits)
training_accuracy = metrics.r2_score(y_train,training_predictions)
# test_accuracy_model = metrics.r2_score(y_test,test_prediction_model)
test_accuracy = metrics.r2_score(y_test,testing_predictions)
# print"Cross-val predicted accuracy:", training_accuracy
print"Test-predictions accuracy:",test_accuracy
plot_model(target,y_train,y_test,training_predictions,testing_predictions)
return mlp
开发者ID:SOLIMAN68,项目名称:Data-driven_Building_simulation_Polimi_EETBS,代码行数:32,代码来源:master_1_4_eachBuilding_allModels.py
示例3: three_models_combined
def three_models_combined(self, intrusion_features, avoidance_features, hypertension_features):
self.df = self.df[~self.df['intrusion_cutoff'].isna()]
self.df = self.df[~self.df['avoidance_cutoff'].isna()]
self.df = self.df[~self.df['hypertention_cutoff'].isna()]
print("self.df.shape", self.df.shape)
X = self.df
Y = self.df[self.target]# strict
all_Y = [self.target, "intrusion_cutoff", "avoidance_cutoff", "hypertention_cutoff"]
X_train, X_test, y_train, y_test = train_test_split(X, self.df[all_Y], test_size=0.25, random_state = 8526566, stratify=Y)
# intrusion
X_intrusion = X_train[intrusion_features].values
y_intrusion = y_train["intrusion_cutoff"].apply(lambda x: int(x))
pipe_intrusion = Pipeline(steps=[
('rfe', BorderlineSMOTE()),
('classifier', XGBClassifier(n_estimators=100, reg_alpha=1))])
scores = cross_val_score(pipe_intrusion, X_intrusion, y_intrusion, scoring='precision', cv=StratifiedKFold(5))
print(f"intrusion {sum(scores)/5}")
pipe_intrusion.fit(X_intrusion, y_intrusion)
# avoidance
X_avoidance = X_train[avoidance_features].values
y_avoidance = y_train["avoidance_cutoff"].apply(lambda x: int(x))
pipe_avoidance = Pipeline(steps=[
('classifier', XGBClassifier(n_estimators=100, scale_pos_weight=3, reg_alpha=1))])
scores = cross_val_score(pipe_avoidance, X_avoidance, y_avoidance, scoring='precision', cv=StratifiedKFold(5))
print(f"avoidance {sum(scores)/5}")
pipe_avoidance.fit(X_avoidance, y_avoidance)
# hypertension
X_hypertension = X_train[hypertension_features].values
y_hypertention = y_train["hypertention_cutoff"].apply(lambda x: int(x))
pipe_hypertension = Pipeline(steps=[
('classifier', BalancedBaggingClassifier(n_estimators=100))])
scores = cross_val_score(pipe_hypertension, X_hypertension, y_hypertention, scoring='precision', cv=StratifiedKFold(5))
print(f"hypertension {sum(scores)/5}")
pipe_hypertension.fit(X_hypertension, y_hypertention)
## combine three classifiers
X_test_hypertension = X_test[hypertension_features].values
X_test_avoidance = X_test[avoidance_features].values
X_test_intrusion = X_test[intrusion_features].values
y_pred_hypertension = pipe_hypertension.predict(X_test_hypertension)
y_pred_avoidance = pipe_avoidance.predict(X_test_avoidance)
y_pred_intrusion = pipe_intrusion.predict(X_test_intrusion)
y_pred = (y_pred_hypertension * y_pred_avoidance * y_pred_intrusion)
y_target = y_test["PCL_Strict3"].apply(lambda x: int(x))
acc = accuracy_score(y_target, y_pred)
f1 = f1_score(y_target, y_pred)
recall = recall_score(y_target, y_pred)
precision = precision_score(y_target, y_pred)
print("test scores")
print(f"acc-{acc}, f1- {f1}, recall-{recall}, precision - {precision}")
开发者ID:nogur9,项目名称:PTSD,代码行数:60,代码来源:EDA_backend.py
示例4: test_score_memmap
def test_score_memmap():
# Ensure a scalar score of memmap type is accepted
iris = load_iris()
X, y = iris.data, iris.target
clf = MockClassifier()
tf = tempfile.NamedTemporaryFile(mode='wb', delete=False)
tf.write(b'Hello world!!!!!')
tf.close()
scores = np.memmap(tf.name, dtype=np.float64)
score = np.memmap(tf.name, shape=(), mode='r', dtype=np.float64)
try:
cross_val_score(clf, X, y, scoring=lambda est, X, y: score)
# non-scalar should still fail
assert_raises(ValueError, cross_val_score, clf, X, y,
scoring=lambda est, X, y: scores)
finally:
# Best effort to release the mmap file handles before deleting the
# backing file under Windows
scores, score = None, None
for _ in range(3):
try:
os.unlink(tf.name)
break
except WindowsError:
sleep(1.)
开发者ID:YinongLong,项目名称:scikit-learn,代码行数:25,代码来源:test_validation.py
示例5: _cross_validation
def _cross_validation(self, sentences, labels, intent_features, spacy_nlp, max_ngrams):
"""choose the best number of ngrams to include in bow.
Given an intent classification problem and a set of ordered ngrams (ordered in terms
of importance by pick_applicable_ngrams) we choose the best number of ngrams to include
in our bow vecs by cross validation."""
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
import numpy as np
clf2 = LogisticRegression(class_weight='balanced')
intent_encoder = preprocessing.LabelEncoder()
intent_encoder.fit(labels)
y = intent_encoder.transform(labels)
cv_splits = min(10, np.min(np.bincount(y))) if y.size > 0 else 0
if cv_splits >= 3:
logger.debug("Started ngram cross-validation to find best number of ngrams to use...")
num_ngrams = np.unique(list(map(int, np.floor(np.linspace(1, max_ngrams, 8)))))
no_ngrams_X = self._create_bow_vecs(intent_features, sentences, spacy_nlp, max_ngrams=0)
no_ngrams_score = np.mean(cross_val_score(clf2, no_ngrams_X, y, cv=cv_splits))
scores = []
for n in num_ngrams:
X = self._create_bow_vecs(intent_features, sentences, spacy_nlp, max_ngrams=n)
score = np.mean(cross_val_score(clf2, X, y, cv=cv_splits))
scores.append(score)
logger.debug("Evaluating usage of {} ngrams. Score: {}".format(n, score))
n_top = num_ngrams[np.argmax(scores)]
logger.debug("Score without ngrams: {}".format(no_ngrams_score))
logger.info("Best score with {} ngrams: {}".format(n_top, np.max(scores)))
return n_top
else:
warnings.warn("Can't cross-validate ngram featurizer. There aren't enough examples per intent (at least 3)")
return max_ngrams
开发者ID:dhpollack,项目名称:rasa_nlu,代码行数:35,代码来源:ngram_featurizer.py
示例6: main
def main():
# 1 查看训练集和测试集的数据特征
train_data = pandas.read_csv('data/train.csv')
test_data = pandas.read_csv('data/test.csv')
print(train_data.info())
print(test_data.info())
# 2 人工选取预测有效的特征
selected_features = ['Pclass', 'Sex', 'Age', 'Embarked', 'SibSp', 'Parch', 'Fare']
x_train = train_data[selected_features]
x_test = test_data[selected_features]
y_train = train_data['Survived']
# 3 补充缺失值
# 得知Embared特征惨在缺失值,需要补完
print(x_train['Embarked'].value_counts())
print(x_test['Embarked'].value_counts())
# 对于类别型特征,使用出现频率最高的特征来填充,可以作为减少引入误差的方法之一
x_train['Embarked'].fillna('S', inplace=True)
x_test['Embarked'].fillna('S', inplace=True)
x_train['Age'].fillna(x_train['Age'].mean(), inplace=True)
x_test['Age'].fillna(x_test['Age'].mean(), inplace=True)
x_test['Fare'].fillna(x_test['Fare'].mean(), inplace=True)
print(x_train.info())
print(x_test.info())
# 4 采用DictVectorizer对特征向量化
dict_vectorizer = DictVectorizer(sparse=False)
x_train = dict_vectorizer.fit_transform(x_train.to_dict(orient='record'))
print(dict_vectorizer.feature_names_)
x_test = dict_vectorizer.transform(x_test.to_dict(orient='record'))
# 5 训练模型
forest_classifier = RandomForestClassifier()
xgb_classifier = XGBClassifier()
# 使用5折交叉验证的方式进行性能评估
forest_mean_score = cross_val_score(forest_classifier, x_train, y_train, cv=5).mean()
print(forest_mean_score)
xgb_mean_score = cross_val_score(xgb_classifier, x_train, y_train, cv=5).mean()
print(xgb_mean_score)
# 6 使用并行网格搜索的方式选择更好的超参组合
params = {
'max_depth': range(2, 8), 'n_estimators': range(100, 1200, 200),
'learning_rate': [0.05, 0.1, 0.25, 0.5, 1.0]
}
xgbc_best = XGBClassifier()
grid_search_cv = GridSearchCV(xgbc_best, params, n_jobs=-1, cv=5)
grid_search_cv.fit(x_train, y_train)
print(grid_search_cv.best_score_)
print(grid_search_cv.best_params_)
# 7 预测结果并写入文件
predict_result = grid_search_cv.predict(x_test)
submission_data = pandas.DataFrame({'PassengerId': test_data['PassengerId'], 'Survived': predict_result})
submission_data.to_csv('data/submission/titanic_submission.csv', index=False)
开发者ID:ACEGuiPeng,项目名称:kaggle_demo_tests,代码行数:60,代码来源:example_titanic_pratice.py
示例7: test_cross_val_score_fit_params
def test_cross_val_score_fit_params():
clf = MockClassifier()
n_samples = X.shape[0]
n_classes = len(np.unique(y))
W_sparse = coo_matrix((np.array([1]), (np.array([1]), np.array([0]))),
shape=(10, 1))
P_sparse = coo_matrix(np.eye(5))
DUMMY_INT = 42
DUMMY_STR = '42'
DUMMY_OBJ = object()
def assert_fit_params(clf):
# Function to test that the values are passed correctly to the
# classifier arguments for non-array type
assert_equal(clf.dummy_int, DUMMY_INT)
assert_equal(clf.dummy_str, DUMMY_STR)
assert_equal(clf.dummy_obj, DUMMY_OBJ)
fit_params = {'sample_weight': np.ones(n_samples),
'class_prior': np.ones(n_classes) / n_classes,
'sparse_sample_weight': W_sparse,
'sparse_param': P_sparse,
'dummy_int': DUMMY_INT,
'dummy_str': DUMMY_STR,
'dummy_obj': DUMMY_OBJ,
'callback': assert_fit_params}
cross_val_score(clf, X, y, fit_params=fit_params)
开发者ID:447327642,项目名称:scikit-learn,代码行数:30,代码来源:test_validation.py
示例8: Random_forest
def Random_forest(features,target,test_size_percent=0.2,cv_split=3):
X_array = features.as_matrix()
y_array = target.as_matrix()
model_rdf = RandomForestRegressor()
X_train, X_test, y_train, y_test = train_test_split(X_array, y_array.T.squeeze(), test_size=test_size_percent, random_state=4)
model_rdf.fit(X_train,y_train)
test_prediction = model_rdf.predict(X_test)
tscv = TimeSeriesSplit(cv_split)
training_score = cross_val_score(model_rdf,X_train,y_train,cv=tscv.n_splits)
testing_score = cross_val_score(model_rdf,X_test,y_test,cv=tscv.n_splits)
print"Cross-val Training score:", training_score.mean()
# print"Cross-val Testing score:", testing_score.mean()
training_predictions = cross_val_predict(model_rdf,X_train,y_train,cv=tscv.n_splits)
testing_predictions = cross_val_predict(model_rdf,X_test,y_test,cv=tscv.n_splits)
training_accuracy = metrics.r2_score(y_train,training_predictions)
# test_accuracy_model = metrics.r2_score(y_test,test_prediction_model)
test_accuracy = metrics.r2_score(y_test,testing_predictions)
# print"Cross-val predicted accuracy:", training_accuracy
print"Test-predictions accuracy:",test_accuracy
plot_model(target,y_train,y_test,training_predictions,testing_predictions)
return model_rdf
开发者ID:SOLIMAN68,项目名称:Data-driven_Building_simulation_Polimi_EETBS,代码行数:25,代码来源:master_1_4_eachBuilding_allModels.py
示例9: svm_regressor
def svm_regressor(features,target,test_size_percent=0.2,cv_split=5):
scale=preprocessing.MinMaxScaler()
X_array = scale.fit_transform(features)
y_array = scale.fit_transform(target)
X_train, X_test, y_train, y_test = train_test_split(X_array, y_array.T.squeeze(), test_size=test_size_percent, random_state=4)
svr = SVR(kernel='rbf',C=10,gamma=1)
svr.fit(X_train,y_train.ravel())
test_prediction = svr.predict(X_test)
tscv = TimeSeriesSplit(cv_split)
training_score = cross_val_score(svr,X_train,y_train,cv=tscv.n_splits)
testing_score = cross_val_score(svr,X_test,y_test,cv=tscv.n_splits)
print"Cross-val Training score:", training_score.mean()
# print"Cross-val Testing score:", testing_score.mean()
training_predictions = cross_val_predict(svr,X_train,y_train,cv=tscv.n_splits)
testing_predictions = cross_val_predict(svr,X_test,y_test,cv=tscv.n_splits)
training_accuracy = metrics.r2_score(y_train,training_predictions)
# test_accuracy_model = metrics.r2_score(y_test,test_prediction_model)
test_accuracy = metrics.r2_score(y_test,testing_predictions)
# print"Cross-val predicted accuracy:", training_accuracy
print"Test-predictions accuracy:",test_accuracy
return svr
开发者ID:SOLIMAN68,项目名称:Data-driven_Building_simulation_Polimi_EETBS,代码行数:25,代码来源:master_1_4_eachBuilding_allModels.py
示例10: linear_regression
def linear_regression(features,target,test_size_percent=0.2,cv_split=5):
''' Features -> Pandas Dataframe with attributes as columns
target -> Pandas Dataframe with target column for prediction
Test_size_percent -> Percentage of data point to be used for testing'''
X_array = features.as_matrix()
y_array = target.as_matrix()
ols = linear_model.LinearRegression()
X_train, X_test, y_train, y_test = train_test_split(X_array, y_array.T.squeeze(), test_size=test_size_percent, random_state=4)
# model = ols.fit(X_train, y_train)
ols.fit(X_train, y_train)
# test_prediction_model = ols.predict(X_test)
tscv = TimeSeriesSplit(cv_split)
training_score = cross_val_score(ols,X_train,y_train,cv=tscv.n_splits)
testing_score = cross_val_score(ols,X_test,y_test,cv=tscv.n_splits)
print"Cross-val Training score:", training_score.mean()
# print"Cross-val Testing score:", testing_score.mean()
training_predictions = cross_val_predict(ols,X_train,y_train,cv=tscv.n_splits)
testing_predictions = cross_val_predict(ols,X_test,y_test,cv=tscv.n_splits)
training_accuracy = metrics.r2_score(y_train,training_predictions)
# test_accuracy_model = metrics.r2_score(y_test,test_prediction_model)
test_accuracy = metrics.r2_score(y_test,testing_predictions)
# print"Cross-val predicted accuracy:", training_accuracy
print"Test-predictions accuracy:",test_accuracy
plot_model(target,y_train,y_test,training_predictions,testing_predictions)
return ols
开发者ID:SOLIMAN68,项目名称:Data-driven_Building_simulation_Polimi_EETBS,代码行数:29,代码来源:master_1_4_eachBuilding_allModels.py
示例11: fit
def fit(self, X_train, y_train):
# intrusion
X_intrusion = X_train[self.features].values
y_intrusion = X_train["intrusion_cutoff"].apply(lambda x: int(x))
self.pipe_intrusion = Pipeline(steps=[
('rfe', RFE(XGBClassifier(n_estimators=self.n_estimators, reg_alpha=1, scale_pos_weight=3), self.rfe)),
('classifier', XGBClassifier(n_estimators=self.n_estimators, reg_alpha=1, scale_pos_weight=3))])
self.pipe_intrusion.fit(X_intrusion, y_intrusion)
scores = cross_val_score(self.pipe_intrusion, X_intrusion, y_intrusion, scoring='precision', cv=StratifiedKFold(5))
print(f"intrusion {sum(scores)/5}")
self.pipe_intrusion.fit(X_intrusion, y_intrusion)
# avoidance
X_avoidance = X_train[self.features].values
y_avoidance = X_train["avoidance_cutoff"].apply(lambda x: int(x))
self.pipe_avoidance = Pipeline(steps=[
('rfe', RFE(XGBClassifier(n_estimators=self.n_estimators, reg_alpha=1, scale_pos_weight=6), self.rfe)),
('classifier', XGBClassifier(n_estimators=self.n_estimators, reg_alpha=1, scale_pos_weight=6))])
self.pipe_avoidance.fit(X_avoidance, y_avoidance)
scores = cross_val_score(self.pipe_avoidance, X_avoidance, y_avoidance, scoring='precision', cv=StratifiedKFold(5))
print(f"avoidance {sum(scores)/5}")
self.pipe_avoidance.fit(X_avoidance, y_avoidance)
# hypertension
X_hypertension = X_train[self.features].values
y_hypertention = X_train["hypertention_cutoff"].apply(lambda x: int(x))
self.pipe_hypertension = Pipeline(steps=[
('rfe', RFE(XGBClassifier(n_estimators=self.n_estimators, reg_alpha=1, scale_pos_weight=4), self.rfe)),
('classifier', XGBClassifier(n_estimators=self.n_estimators, reg_alpha=1, scale_pos_weight=4))])
self.pipe_hypertension.fit(X_hypertension, y_hypertention)
scores = cross_val_score(self.pipe_hypertension, X_hypertension, y_hypertention, scoring='precision', cv=StratifiedKFold(5))
print(f"hypertension {sum(scores)/5}")
self.pipe_hypertension.fit(X_hypertension, y_hypertention)
# regression
X_regression = X_train[self.features].values
y_regression = X_train["PCL3"]
self.pipe_regression = Pipeline(steps=[
('classifier', Ridge())])
self.pipe_regression.fit(X_regression, y_regression)
# target
y_pred_hypertension = self.pipe_hypertension.predict(X_hypertension)
y_pred_avoidance = self.pipe_avoidance.predict(X_avoidance)
y_pred_intrusion = self.pipe_intrusion.predict(X_intrusion)
y_pred_regression = self.pipe_regression.predict(X_regression) >= self.cutoff
y_pred = (y_pred_hypertension & y_pred_avoidance & y_pred_intrusion & y_pred_regression & y_pred_regression)
y_target = y_train
acc = accuracy_score(y_target, y_pred)
f1 = f1_score(y_target, y_pred)
recall = recall_score(y_target, y_pred)
precision = precision_score(y_target, y_pred)
print("test scores")
print(f"acc-{acc}, f1- {f1}, recall-{recall}, precision - {precision}")
开发者ID:nogur9,项目名称:PTSD,代码行数:56,代码来源:ensembler_single_features.py
示例12: tune_spam
def tune_spam(X_train,y_train,alpha_list):
val_accuracy=[]
for alpha in alpha_list:
model = SVC(C=alpha)
val_accuracy.extend([np.mean(cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy'))])
print [np.mean(cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy'))]
max_index = val_accuracy.index(max( val_accuracy))
print "CV_val_error:", val_accuracy
print "Best C:",alpha_list[max_index]
return alpha_list[max_index]
开发者ID:ZhenqiWangC,项目名称:models,代码行数:10,代码来源:hw1.py
示例13: test_cross_val_score_allow_nans
def test_cross_val_score_allow_nans():
# Check that cross_val_score allows input data with NaNs
X = np.arange(200, dtype=np.float64).reshape(10, -1)
X[2, :] = np.nan
y = np.repeat([0, 1], X.shape[0] / 2)
p = Pipeline([
('imputer', Imputer(strategy='mean', missing_values='NaN')),
('classifier', MockClassifier()),
])
cross_val_score(p, X, y, cv=5)
开发者ID:447327642,项目名称:scikit-learn,代码行数:10,代码来源:test_validation.py
示例14: test_k_fold_cv
def test_k_fold_cv():
"""Test OneHotEncoder with categorical_features='auto'."""
boston = load_boston()
clf = make_pipeline(
OneHotEncoder(
categorical_features='auto',
sparse=False,
minimum_fraction=0.05
),
LinearRegression()
)
cross_val_score(clf, boston.data, boston.target, cv=KFold(n_splits=10, shuffle=True))
开发者ID:EpistasisLab,项目名称:tpot,代码行数:13,代码来源:one_hot_encoder_tests.py
示例15: test_precomputed_cross_validation
def test_precomputed_cross_validation():
# Ensure array is split correctly
rng = np.random.RandomState(0)
X = rng.rand(20, 2)
D = pairwise_distances(X, metric='euclidean')
y = rng.randint(3, size=20)
for Est in (neighbors.KNeighborsClassifier,
neighbors.RadiusNeighborsClassifier,
neighbors.KNeighborsRegressor,
neighbors.RadiusNeighborsRegressor):
metric_score = cross_val_score(Est(), X, y)
precomp_score = cross_val_score(Est(metric='precomputed'), D, y)
assert_array_equal(metric_score, precomp_score)
开发者ID:AlexandreAbraham,项目名称:scikit-learn,代码行数:13,代码来源:test_neighbors.py
示例16: get_results
def get_results(dataset):
X_full, y_full = dataset.data, dataset.target
n_samples = X_full.shape[0]
n_features = X_full.shape[1]
# Estimate the score on the entire dataset, with no missing values
estimator = RandomForestRegressor(random_state=0, n_estimators=100)
full_scores = cross_val_score(estimator, X_full, y_full,
scoring='neg_mean_squared_error')
# Add missing values in 75% of the lines
missing_rate = 0.75
n_missing_samples = int(np.floor(n_samples * missing_rate))
missing_samples = np.hstack((np.zeros(n_samples - n_missing_samples,
dtype=np.bool),
np.ones(n_missing_samples,
dtype=np.bool)))
rng.shuffle(missing_samples)
missing_features = rng.randint(0, n_features, n_missing_samples)
# Estimate the score after replacing missing values by 0
X_missing = X_full.copy()
X_missing[np.where(missing_samples)[0], missing_features] = 0
y_missing = y_full.copy()
estimator = RandomForestRegressor(random_state=0, n_estimators=100)
zero_impute_scores = cross_val_score(estimator, X_missing, y_missing,
scoring='neg_mean_squared_error')
# Estimate the score after imputation (mean strategy) of the missing values
X_missing = X_full.copy()
X_missing[np.where(missing_samples)[0], missing_features] = 0
y_missing = y_full.copy()
estimator = Pipeline([("imputer", SimpleImputer(missing_values=0,
strategy="mean")),
("forest", RandomForestRegressor(random_state=0,
n_estimators=100))])
mean_impute_scores = cross_val_score(estimator, X_missing, y_missing,
scoring='neg_mean_squared_error')
# Estimate the score after chained imputation of the missing values
estimator = Pipeline([("imputer", ChainedImputer(missing_values=0,
random_state=0)),
("forest", RandomForestRegressor(random_state=0,
n_estimators=100))])
chained_impute_scores = cross_val_score(estimator, X_missing, y_missing,
scoring='neg_mean_squared_error')
return ((full_scores.mean(), full_scores.std()),
(zero_impute_scores.mean(), zero_impute_scores.std()),
(mean_impute_scores.mean(), mean_impute_scores.std()),
(chained_impute_scores.mean(), chained_impute_scores.std()))
开发者ID:SuryodayBasak,项目名称:scikit-learn,代码行数:51,代码来源:plot_missing_values.py
示例17: test_pairwise_cross_val_score
def test_pairwise_cross_val_score():
clf_precomputed = svm.SVC(kernel='precomputed')
clf_notprecomputed = svm.SVC(kernel='linear')
X, y = iris.data, iris.target
for MultiClassClassifier in [OneVsRestClassifier, OneVsOneClassifier]:
ovr_false = MultiClassClassifier(clf_notprecomputed)
ovr_true = MultiClassClassifier(clf_precomputed)
linear_kernel = np.dot(X, X.T)
score_precomputed = cross_val_score(ovr_true, linear_kernel, y)
score_linear = cross_val_score(ovr_false, X, y)
assert_array_equal(score_precomputed, score_linear)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:14,代码来源:test_multiclass.py
示例18: test_nested_cv
def test_nested_cv():
# Test if nested cross validation works with different combinations of cv
rng = np.random.RandomState(0)
X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
labels = rng.randint(0, 5, 15)
cvs = [LeaveOneLabelOut(), LeaveOneOut(), LabelKFold(), StratifiedKFold(),
StratifiedShuffleSplit(n_iter=10, random_state=0)]
for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
gs = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]},
cv=inner_cv)
cross_val_score(gs, X=X, y=y, labels=labels, cv=outer_cv,
fit_params={'labels': labels})
开发者ID:absolutelyNoWarranty,项目名称:scikit-learn,代码行数:15,代码来源:test_split.py
示例19: test_cross_val_score_multilabel
def test_cross_val_score_multilabel():
X = np.array([[-3, 4], [2, 4], [3, 3], [0, 2], [-3, 1],
[-2, 1], [0, 0], [-2, -1], [-1, -2], [1, -2]])
y = np.array([[1, 1], [0, 1], [0, 1], [0, 1], [1, 1],
[0, 1], [1, 0], [1, 1], [1, 0], [0, 0]])
clf = KNeighborsClassifier(n_neighbors=1)
scoring_micro = make_scorer(precision_score, average='micro')
scoring_macro = make_scorer(precision_score, average='macro')
scoring_samples = make_scorer(precision_score, average='samples')
score_micro = cross_val_score(clf, X, y, scoring=scoring_micro, cv=5)
score_macro = cross_val_score(clf, X, y, scoring=scoring_macro, cv=5)
score_samples = cross_val_score(clf, X, y, scoring=scoring_samples, cv=5)
assert_almost_equal(score_micro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 3])
assert_almost_equal(score_macro, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
assert_almost_equal(score_samples, [1, 1 / 2, 3 / 4, 1 / 2, 1 / 4])
开发者ID:447327642,项目名称:scikit-learn,代码行数:15,代码来源:test_validation.py
示例20: generate_binary_crime_label
def generate_binary_crime_label():
y = retrieve_crime_count(2013)
threshold = np.median(y)
label = [1 if ele >= threshold else 0 for ele in y]
F = generate_corina_features()
from sklearn import svm, tree
from sklearn.model_selection import cross_val_score
clf1 = svm.SVC()
scores1 = cross_val_score(clf1, F[1], label, cv=10)
print scores1.mean(), scores1
clf2 = tree.DecisionTreeClassifier()
scores2 = cross_val_score(clf2, F[1], label, cv=10)
print scores2.mean(), scores2
pickle.dump(label, open("crime-label", 'w'))
return y, label, F[1]
开发者ID:thekingofkings,项目名称:chicago-crime,代码行数:15,代码来源:FeatureUtils.py
注:本文中的sklearn.model_selection.cross_val_score函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论