本文整理汇总了Python中sklearn.model_selection.KFold类的典型用法代码示例。如果您正苦于以下问题:Python KFold类的具体用法?Python KFold怎么用?Python KFold使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了KFold类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: calculate_val
def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, subtract_mean=False):
assert(embeddings1.shape[0] == embeddings2.shape[0])
assert(embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
k_fold = KFold(n_splits=nrof_folds, shuffle=False)
val = np.zeros(nrof_folds)
far = np.zeros(nrof_folds)
indices = np.arange(nrof_pairs)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
if subtract_mean:
mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0)
else:
mean = 0.0
dist = distance(embeddings1-mean, embeddings2-mean, distance_metric)
# Find the threshold that gives FAR = far_target
far_train = np.zeros(nrof_thresholds)
for threshold_idx, threshold in enumerate(thresholds):
_, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
if np.max(far_train)>=far_target:
f = interpolate.interp1d(far_train, thresholds, kind='slinear')
threshold = f(far_target)
else:
threshold = 0.0
val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
val_mean = np.mean(val)
far_mean = np.mean(far)
val_std = np.std(val)
return val_mean, val_std, far_mean
开发者ID:AzureWoods,项目名称:faceRecognition-yolo-facenet,代码行数:35,代码来源:facenet.py
示例2: model_train
def model_train(self, X_train, y_train, ignore_neutral=False):
if ignore_neutral:
X_train = X_train[y_train != 0]
y_train = y_train[y_train != 0]
self.ignore_neutral = ignore_neutral
model = LinearSVC()
classifier = model.fit(X_train, y_train)
# pred = classifier.predict(X_train)
# accu = np.mean(pred == y_train)
# print 'The accuracy of training data is {}'.format(accu)
# print confusion_matrix(y_train, pred)
# k-fold
kfold = KFold(n_splits=5)
for i, (train_index, test_index) in enumerate((kfold.split(X_train))):
X_split_train = X_train[train_index]
y_split_train = y_train[train_index]
X_split_valid = X_train[test_index]
y_split_valid = y_train[test_index]
classifier = model.fit(X_split_train, y_split_train)
pred = classifier.predict(X_split_valid)
accu = np.mean(pred == y_split_valid)
print 'Fold {} : the accuracy of validation data is {}'.format(i + 1, accu)
return classifier
开发者ID:l11x0m7,项目名称:NewsComment,代码行数:26,代码来源:dict_method.py
示例3: calculate_roc
def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False):
assert(embeddings1.shape[0] == embeddings2.shape[0])
assert(embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
k_fold = KFold(n_splits=nrof_folds, shuffle=False)
tprs = np.zeros((nrof_folds,nrof_thresholds))
fprs = np.zeros((nrof_folds,nrof_thresholds))
accuracy = np.zeros((nrof_folds))
indices = np.arange(nrof_pairs)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
if subtract_mean:
mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0)
else:
mean = 0.0
dist = distance(embeddings1-mean, embeddings2-mean, distance_metric)
# Find the best threshold for the fold
acc_train = np.zeros((nrof_thresholds))
for threshold_idx, threshold in enumerate(thresholds):
_, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
best_threshold_index = np.argmax(acc_train)
for threshold_idx, threshold in enumerate(thresholds):
tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
_, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
tpr = np.mean(tprs,0)
fpr = np.mean(fprs,0)
return tpr, fpr, accuracy
开发者ID:AzureWoods,项目名称:faceRecognition-yolo-facenet,代码行数:32,代码来源:facenet.py
示例4: validateseq2
def validateseq2(X_all, y, features, clf, score, v = False, esr=50, sk=5):
temp_user = target_order[(target_order.o_day_series < 336) & (target_order.o_day_series >= 274)][['user_id']].drop_duplicates().reset_index(drop=True)
temp_user['CreateGroup'] = 336
print('before delete: {}'.format(X_all.shape))
X = temp_user.merge(X_all,on=['user_id','CreateGroup'],how = 'left')
print('after delete: {}'.format(X.shape))
temp_user = target_order[(target_order.o_day_series < 306) & (target_order.o_day_series >= 215)][['user_id']].drop_duplicates().reset_index(drop=True)
temp_user['CreateGroup'] = 306
print('before delete: {}'.format(X_all.shape))
X2 = temp_user.merge(X_all,on=['user_id','CreateGroup'],how = 'left')
print('after delete: {}'.format(X.shape))
kf = KFold(n_splits=sk)
print(len(features))
X['Prob_x'] = 0
for train_index, test_index in kf.split(X2):
X_train, X_test = X2.ix[train_index,:], X2.ix[test_index,:]
X_train, X_test = X_train[features], X_test[features]
y_train, y_test = X2.ix[train_index,:].buy, X2.ix[test_index,:].buy
clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='auc', verbose=v, early_stopping_rounds=esr)
X['Prob_x'] = X['Prob_x'] + clf.predict_proba(X[features])[:,1]/sk
Performance = []
features.append('Prob_x')
for train_index, test_index in kf.split(X):
X_train, X_test = X.ix[train_index,:], X.ix[test_index,:]
X_train, X_test = X_train[features], X_test[features]
y_train, y_test = X.ix[train_index,:].buy, X.ix[test_index,:].buy
clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='auc', verbose=v, early_stopping_rounds=esr)
pred = clf.predict_proba(X_test)[:,1]
Performance.append(roc_auc_score(y_test,pred))
print("Mean Score: {}".format(np.mean(Performance)))
return np.mean(Performance),clf
开发者ID:ethanww,项目名称:JData-2018,代码行数:31,代码来源:Run_12.py
示例5: Get_KFolds
def Get_KFolds(data, y_label, num_folds, scale):
#Creates 5 folds from the train/test set each with a separate training and test set
folds = []
kf = KFold(n_splits = num_folds)
for train_index, test_index in kf.split(data):
training = []
test = []
tempdf = Normalize_Scale(data,scale)
train_x = tempdf.drop([y_label], axis=1).values
train_y = tempdf[y_label].values
#Creates a training set within the fold
x = []
y = []
for index in train_index:
x.append(train_x[index])
y.append(train_y[index])
training = [x,y]
#Creates a test set within the fold
x = []
y = []
for index in test_index:
x.append(train_x[index])
y.append(train_y[index])
test = [x,y]
folds.append([training,test])
return folds
开发者ID:Andymic,项目名称:Machine-Learning,代码行数:32,代码来源:API.py
示例6: hyperopt_obj
def hyperopt_obj(self,param,train_X,train_y):
# 5-fold crossvalidation error
#ret = xgb.cv(param,dtrain,num_boost_round=param['num_round'])
kf = KFold(n_splits = 3)
errors = []
r2 = []
int_params = ['max_depth','num_round']
for item in int_params:
param[item] = int(param[item])
for train_ind,test_ind in kf.split(train_X):
train_valid_x,train_valid_y = train_X[train_ind],train_y[train_ind]
test_valid_x,test_valid_y = train_X[test_ind],train_y[test_ind]
dtrain = xgb.DMatrix(train_valid_x,label = train_valid_y)
dtest = xgb.DMatrix(test_valid_x)
pred_model = xgb.train(param,dtrain,num_boost_round=int(param['num_round']))
pred_test = pred_model.predict(dtest)
errors.append(mean_squared_error(test_valid_y,pred_test))
r2.append(r2_score(test_valid_y,pred_test))
all_dtrain = xgb.DMatrix(train_X,label = train_y)
print('training score:')
pred_model = xgb.train(param,all_dtrain,num_boost_round= int(param['num_round']))
all_dtest = xgb.DMatrix(train_X)
pred_train = pred_model.predict(all_dtest)
print(str(r2_score(train_y,pred_train)))
print(np.mean(r2))
print('\n')
return {'loss':np.mean(errors),'status': STATUS_OK}
开发者ID:Matafight,项目名称:Kaggle,代码行数:27,代码来源:stacking.py
示例7: predict
def predict(X_all, X_new, features, clf, score, v = False, esr=50, sk=3, fn='submission'):
first_day = datetime.datetime.strptime('2017-08-31 00:00:00', '%Y-%m-%d %H:%M:%S')
temp_user = target_order[(target_order.o_day_series < 336) & (target_order.o_day_series >= 274)][['user_id']].drop_duplicates().reset_index(drop=True)
temp_user['CreateGroup'] = 336
print('before delete: {}'.format(X_all.shape))
X = temp_user.merge(X_all,on=['user_id','CreateGroup'],how = 'left')
print('after delete: {}'.format(X.shape))
temp_user = target_order[(target_order.o_day_series < 366) & \
(target_order.o_day_series >= 366 - 75)][['user_id']].drop_duplicates().reset_index(drop=True)
temp_user['CreateGroup'] = 366
print('before delete: {}'.format(X_new.shape))
X_new = temp_user.merge(X_new,on=['user_id','CreateGroup'],how = 'left')
print('Train: {}'.format(X_new.shape))
kf = KFold(n_splits=sk)
print(len(features))
Performance = []
X_new['Prob'] = 0
for train_index, test_index in kf.split(X):
X_train, X_test = X.ix[train_index,:], X.ix[test_index,:]
X_train, X_test = X_train[features], X_test[features]
y_train, y_test = X.ix[train_index,:].buy, X.ix[test_index,:].buy
clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='auc', verbose=v, early_stopping_rounds=esr)
pred = clf.predict_proba(X_test)[:,1]
X_new['Prob'] = X_new['Prob'] + clf.predict_proba(X_new[features])[:,1]/sk
Performance.append(roc_auc_score(y_test,pred))
print("Mean Score: {}".format(np.mean(Performance)))
X_new['Days'] = np.random.randint(15,size=len(X_new))
X_new['pred_date'] = X_new['Days'].apply(lambda x: (datetime.timedelta(days=x) + first_day).strftime("%Y-%m-%d"))
X_new.sort_values(by = ['Prob'], ascending = False, inplace = True)
X_new[['user_id','Prob']].to_csv('prob_{}.csv'.format(fn), index = None)
X_new[['user_id','pred_date']][:50000].to_csv('{}.csv'.format(fn), index = None)
return np.mean(Performance),clf
开发者ID:ethanww,项目名称:JData-2018,代码行数:32,代码来源:Run_12.py
示例8: computing_cv_accuracy_LDA
def computing_cv_accuracy_LDA(in_path=None, cv_n_fold=10):
def u65(mod_Y):
return 1.6 / mod_Y - 0.6 / mod_Y ** 2
def u80(mod_Y):
return 2.2 / mod_Y - 1.2 / mod_Y ** 2
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
data = export_data_set('iris.data') if in_path is None else pd.read_csv(in_path)
print("-----DATA SET TRAINING---", in_path)
X = data.iloc[:, :-1].values
y = np.array(data.iloc[:, -1].tolist())
kf = KFold(n_splits=cv_n_fold, random_state=None, shuffle=True)
lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
mean_u65, mean_u80 = 0, 0
for idx_train, idx_test in kf.split(y):
print("---k-FOLD-new-executing--")
X_cv_train, y_cv_train = X[idx_train], y[idx_train]
X_cv_test, y_cv_test = X[idx_test], y[idx_test]
lda.fit(X_cv_train, y_cv_train)
n_test = len(idx_test)
sum_u65, sum_u80 = 0, 0
for i, test in enumerate(X_cv_test):
evaluate = lda.predict([test])
print("-----TESTING-----", i)
if y_cv_test[i] in evaluate:
sum_u65 += u65(len(evaluate))
sum_u80 += u80(len(evaluate))
mean_u65 += sum_u65 / n_test
mean_u80 += sum_u80 / n_test
print("--->", mean_u65 / cv_n_fold, mean_u80 / cv_n_fold)
开发者ID:sdestercke,项目名称:classifip,代码行数:32,代码来源:qdatest.py
示例9: computing_cv_accuracy_imprecise
def computing_cv_accuracy_imprecise(in_path=None, ell_optimal=0.1, cv_n_fold=10):
def u65(mod_Y):
return 1.6 / mod_Y - 0.6 / mod_Y ** 2
def u80(mod_Y):
return 2.2 / mod_Y - 1.2 / mod_Y ** 2
data = export_data_set('iris.data') if in_path is None else pd.read_csv(in_path)
print("-----DATA SET TRAINING---", in_path)
X = data.iloc[:, :-1].values
y = np.array(data.iloc[:, -1].tolist())
mean_u65, mean_u80 = 0, 0
lqa = LinearDiscriminant(init_matlab=True)
kf = KFold(n_splits=cv_n_fold, random_state=None, shuffle=True)
for idx_train, idx_test in kf.split(y):
X_cv_train, y_cv_train = X[idx_train], y[idx_train]
X_cv_test, y_cv_test = X[idx_test], y[idx_test]
lqa.learn(X_cv_train, y_cv_train, ell=ell_optimal)
sum_u65, sum_u80 = 0, 0
n_test, _ = X_cv_test.shape
for i, test in enumerate(X_cv_test):
print("--TESTING-----", i, ell_optimal)
evaluate, _ = lqa.evaluate(test)
print(evaluate, "-----", y_cv_test[i])
if y_cv_test[i] in evaluate:
sum_u65 += u65(len(evaluate))
sum_u80 += u80(len(evaluate))
mean_u65 += sum_u65 / n_test
mean_u80 += sum_u80 / n_test
mean_u65 = mean_u65 / cv_n_fold
mean_u80 = mean_u80 / cv_n_fold
print("--ell-->", ell_optimal, "--->", mean_u65, mean_u80)
开发者ID:sdestercke,项目名称:classifip,代码行数:32,代码来源:qdatest.py
示例10: split_data
def split_data(root_path, num_splits=4):
mask_list = []
for ext in ('*.mhd', '*.hdr', '*.nii'):
mask_list.extend(sorted(glob(join(root_path,'masks',ext))))
assert len(mask_list) != 0, 'Unable to find any files in {}'.format(join(root_path,'masks'))
outdir = join(root_path,'split_lists')
try:
mkdir(outdir)
except:
pass
kf = KFold(n_splits=num_splits)
n = 0
for train_index, test_index in kf.split(mask_list):
with open(join(outdir,'train_split_' + str(n) + '.csv'), 'wb') as csvfile:
writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
for i in train_index:
writer.writerow([basename(mask_list[i])])
with open(join(outdir,'test_split_' + str(n) + '.csv'), 'wb') as csvfile:
writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
for i in test_index:
writer.writerow([basename(mask_list[i])])
n += 1
开发者ID:legendhua,项目名称:SegCaps,代码行数:25,代码来源:load_3D_data.py
示例11: KFold_method
def KFold_method(self):
kf = KFold(n_splits=10)
for train_index, test_index in kf.split(self.FeatureSet):
X_train = []
X_test = []
y_train = []
y_test = []
for trainid in train_index.tolist():
X_train.append(self.FeatureSet[trainid])
y_train.append(self.Label[trainid])
for testid in test_index.tolist():
X_test.append(self.FeatureSet[testid])
y_test.append(self.Label[testid])
tree = self.buildtree(X_train)
#self.post_pruning(tree, 0.3)
pre_labels = self.predict(X_test, tree)
# Modeal Evaluation
ACC = metrics.accuracy_score(y_test, pre_labels)
# MCC = metrics.matthews_corrcoef(y_test, pre_labels)
SN = self.performance(y_test, pre_labels)
# print SP, SN
print ACC, SN
开发者ID:wyl-hit,项目名称:job,代码行数:25,代码来源:DecisionTree_Hand.py
示例12: learn_decision_tree
def learn_decision_tree(data_set, label):
#Create depths
depths = list(range(1,14))
#Initialize the best model
best_model = [None, 0, float("-inf")]
#Create 13-fold
kf = KFold(n_splits=13)
track = []
for (train, test), cdepth in zip(kf.split(data_set), depths):
#Get training set
train_set = [data_set[i] for i in train]
train_label = [label[i] for i in train]
#Get validation set
valid_set = [data_set[i] for i in test]
valid_label = [label[i] for i in test]
#Learn the decision tree from data
clf = tree.DecisionTreeClassifier(max_depth=cdepth)
clf = clf.fit(train_set, train_label)
#Get accuracy from the model
accuraclabel = clf.score(valid_set, valid_label)
#Compare accuracies
track.append([cdepth, accuraclabel])
if accuraclabel > best_model[2]:
#Update the best model
best_model = [clf, cdepth, accuraclabel]
#Plot the graph
fig = plt.figure()
x = [x[0] for x in track]
y = [x[1] for x in track]
plt.xlabel('Depth')
plt.ylabel('Accuracy')
plt.title('Decision Tree')
plt.plot(x,y)
plt.savefig('decision_tree.png')
return best_model
开发者ID:x330930520,项目名称:HW3,代码行数:35,代码来源:Machine+Learning.py
示例13: calculate_val
def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10):
assert(embeddings1.shape[0] == embeddings2.shape[0])
assert(embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
k_fold = KFold(n_splits=nrof_folds, shuffle=False)
val = np.zeros(nrof_folds)
far = np.zeros(nrof_folds)
diff = np.subtract(embeddings1, embeddings2)
dist = np.sum(np.square(diff),1)
indices = np.arange(nrof_pairs)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
# Find the threshold that gives FAR = far_target
far_train = np.zeros(nrof_thresholds)
for threshold_idx, threshold in enumerate(thresholds):
_, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
if np.max(far_train)>=far_target:
f = interpolate.interp1d(far_train, thresholds, kind='slinear')
threshold = f(far_target)
else:
threshold = 0.0
val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
val_mean = np.mean(val)
far_mean = np.mean(far)
val_std = np.std(val)
return val_mean, val_std, far_mean
开发者ID:kissthink,项目名称:facenet_regonistant,代码行数:32,代码来源:facenet.py
示例14: original_data
def original_data():
for target in TARGETS:
for algo_str in ALGORITHMS:
algorithm = importlib.import_module('src.multi_class.' + algo_str)
encoded_data = input_preproc.readFromDataset(
INPUT_DIR + ORIGINAL_DATA_FILE,
INPUT_COLS['original'],
target
)
# Split into predictors and target
X = np.array(encoded_data[encoded_data.columns.difference([target])])
y = np.array(encoded_data[target])
kf = KFold(n_splits=CROSS_VALIDATION_K, shuffle=True)
f1s = []
for train_index, test_index in kf.split(X):
X_train, y_train = X[train_index], y[train_index]
X_test, y_test = X[test_index], y[test_index]
scaler = preprocessing.StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train)) # , columns=X_train.columns)
X_test = scaler.transform(X_test)
precision, recall, f1_score, accuracy = algorithm.runClassifier(X_train, X_test, y_train, y_test)
f1s.append(f1_score)
final_f1 = sum(f1s) / len(f1s)
print("\n================================")
print("%s, %s, F1 Score: %.6f" % (target, algo_str, final_f1))
print("================================\n")
开发者ID:cassinius,项目名称:right-to-forget-data,代码行数:31,代码来源:iMLBatchProcessing.py
示例15: cross_validation
def cross_validation(train_data, train_labels, k_range=np.arange(1,16)):
'''
Perform 10-fold cross validation to find the best value for k
Note: Previously this function took knn as an argument instead of train_data,train_labels.
The intention was for students to take the training data from the knn object - this should be clearer
from the new function signature.
'''
folds = 10
kf = KFold(n_splits=folds)
best_k = 1
average_accuracy_for_best_k = 0
for k in k_range:
accuracy_sum = 0
for train_index, test_index in kf.split(train_data):
X_train, X_test = train_data[train_index], train_data[test_index]
y_train, y_test = train_labels[train_index], train_labels[test_index]
knn = KNearestNeighbor(X_train, y_train)
validation_accuracy = classification_accuracy(knn, k, X_test, y_test)
accuracy_sum += validation_accuracy
average_accuracy = accuracy_sum/folds
if (average_accuracy > average_accuracy_for_best_k):
average_accuracy_for_best_k = average_accuracy
best_k = k
return best_k, average_accuracy_for_best_k
开发者ID:mchenchen,项目名称:Course-Work,代码行数:29,代码来源:q2_1.py
示例16: test_multiclass_classification
def test_multiclass_classification():
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
def check_pred(preds, labels, output_margin):
if output_margin:
err = sum(1 for i in range(len(preds))
if preds[i].argmax() != labels[i]) / float(len(preds))
else:
err = sum(1 for i in range(len(preds))
if preds[i] != labels[i]) / float(len(preds))
assert err < 0.4
iris = load_iris()
y = iris['target']
X = iris['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X, y):
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
preds = xgb_model.predict(X[test_index])
# test other params in XGBClassifier().fit
preds2 = xgb_model.predict(X[test_index], output_margin=True,
ntree_limit=3)
preds3 = xgb_model.predict(X[test_index], output_margin=True,
ntree_limit=0)
preds4 = xgb_model.predict(X[test_index], output_margin=False,
ntree_limit=3)
labels = y[test_index]
check_pred(preds, labels, output_margin=False)
check_pred(preds2, labels, output_margin=True)
check_pred(preds3, labels, output_margin=True)
check_pred(preds4, labels, output_margin=False)
开发者ID:dmlc,项目名称:xgboost,代码行数:33,代码来源:test_with_sklearn.py
示例17: select
def select(self):
warnings.filterwarnings("ignore", category=DeprecationWarning)
# Implement model selection using CV
NB_SPLITS = 3
mean_scores = []
split_method = KFold(random_state=self.random_state, n_splits=NB_SPLITS)
n_components = range(self.min_n_components, self.max_n_components + 1)
try:
for n_component in n_components:
model = self.base_model(n_component)
kfold_scores = []
for _, test_idx in split_method.split(self.sequences):
test_X, test_length = combine_sequences(test_idx, self.sequences)
kfold_scores.append(model.score(test_X, test_length))
mean_scores.append(np.mean(kfold_scores))
except Exception as e:
pass
if len(mean_scores) > 0:
states = n_components[np.argmax(mean_scores)]
else:
states = self.n_constant
return self.base_model(states)
开发者ID:blouargant,项目名称:UDACITY,代码行数:29,代码来源:my_model_selectors.py
示例18: test_regression_with_custom_objective
def test_regression_with_custom_objective():
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston
from sklearn.model_selection import KFold
def objective_ls(y_true, y_pred):
grad = (y_pred - y_true)
hess = np.ones(len(y_true))
return grad, hess
boston = load_boston()
y = boston['target']
X = boston['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X, y):
xgb_model = xgb.XGBRegressor(objective=objective_ls).fit(
X[train_index], y[train_index]
)
preds = xgb_model.predict(X[test_index])
labels = y[test_index]
assert mean_squared_error(preds, labels) < 25
# Test that the custom objective function is actually used
class XGBCustomObjectiveException(Exception):
pass
def dummy_objective(y_true, y_pred):
raise XGBCustomObjectiveException()
xgb_model = xgb.XGBRegressor(objective=dummy_objective)
np.testing.assert_raises(XGBCustomObjectiveException, xgb_model.fit, X, y)
开发者ID:dmlc,项目名称:xgboost,代码行数:31,代码来源:test_with_sklearn.py
示例19: test_boston_housing_regression
def test_boston_housing_regression():
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston
from sklearn.model_selection import KFold
boston = load_boston()
y = boston['target']
X = boston['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X, y):
xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])
preds = xgb_model.predict(X[test_index])
# test other params in XGBRegressor().fit
preds2 = xgb_model.predict(X[test_index], output_margin=True,
ntree_limit=3)
preds3 = xgb_model.predict(X[test_index], output_margin=True,
ntree_limit=0)
preds4 = xgb_model.predict(X[test_index], output_margin=False,
ntree_limit=3)
labels = y[test_index]
assert mean_squared_error(preds, labels) < 25
assert mean_squared_error(preds2, labels) < 350
assert mean_squared_error(preds3, labels) < 25
assert mean_squared_error(preds4, labels) < 350
开发者ID:dmlc,项目名称:xgboost,代码行数:26,代码来源:test_with_sklearn.py
示例20: cross_validate
def cross_validate(self, values_labels, folds=10, processes=1):
"""
Trains and tests the model agaists folds of labeled data.
:Parameters:
values_labels : [( `<feature_values>`, `<label>` )]
an iterable of labeled data Where <values_labels> is an ordered
collection of predictive values that correspond to the
`Feature` s provided to the constructor
folds : `int`
When set to 1, cross-validation will run in the parent thread.
When set to 2 or greater, a :class:`multiprocessing.Pool` will
be created.
"""
folds_i = KFold(n_splits=folds, shuffle=True,
random_state=0)
if processes == 1:
mapper = map
else:
pool = Pool(processes=processes or cpu_count())
mapper = pool.map
results = mapper(self._cross_score,
((i, [values_labels[i] for i in train_i],
[values_labels[i] for i in test_i])
for i, (train_i, test_i) in enumerate(
folds_i.split(values_labels))))
agg_score_labels = []
for score_labels in results:
agg_score_labels.extend(score_labels)
self.info['statistics'].fit(agg_score_labels)
return self.info['statistics']
开发者ID:wiki-ai,项目名称:revscoring,代码行数:33,代码来源:model.py
注:本文中的sklearn.model_selection.KFold类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论