• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python model_selection.KFold类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.model_selection.KFold的典型用法代码示例。如果您正苦于以下问题:Python KFold类的具体用法?Python KFold怎么用?Python KFold使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了KFold类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: calculate_val

def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, subtract_mean=False):
    assert(embeddings1.shape[0] == embeddings2.shape[0])
    assert(embeddings1.shape[1] == embeddings2.shape[1])
    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
    nrof_thresholds = len(thresholds)
    k_fold = KFold(n_splits=nrof_folds, shuffle=False)
    
    val = np.zeros(nrof_folds)
    far = np.zeros(nrof_folds)
    
    indices = np.arange(nrof_pairs)
    
    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
        if subtract_mean:
            mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0)
        else:
          mean = 0.0
        dist = distance(embeddings1-mean, embeddings2-mean, distance_metric)
      
        # Find the threshold that gives FAR = far_target
        far_train = np.zeros(nrof_thresholds)
        for threshold_idx, threshold in enumerate(thresholds):
            _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
        if np.max(far_train)>=far_target:
            f = interpolate.interp1d(far_train, thresholds, kind='slinear')
            threshold = f(far_target)
        else:
            threshold = 0.0
    
        val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
  
    val_mean = np.mean(val)
    far_mean = np.mean(far)
    val_std = np.std(val)
    return val_mean, val_std, far_mean
开发者ID:AzureWoods,项目名称:faceRecognition-yolo-facenet,代码行数:35,代码来源:facenet.py


示例2: model_train

    def model_train(self, X_train, y_train, ignore_neutral=False):
        if ignore_neutral:
            X_train = X_train[y_train != 0]
            y_train = y_train[y_train != 0]
        self.ignore_neutral = ignore_neutral

        model = LinearSVC()
        classifier = model.fit(X_train, y_train)
        # pred = classifier.predict(X_train)
        # accu = np.mean(pred == y_train)
        # print 'The accuracy of training data is {}'.format(accu)
        # print confusion_matrix(y_train, pred)

        # k-fold
        kfold = KFold(n_splits=5)
        for i, (train_index, test_index) in enumerate((kfold.split(X_train))):
            X_split_train = X_train[train_index]
            y_split_train = y_train[train_index]
            X_split_valid = X_train[test_index]
            y_split_valid = y_train[test_index]
            classifier = model.fit(X_split_train, y_split_train)
            pred = classifier.predict(X_split_valid)
            accu = np.mean(pred == y_split_valid)
            print 'Fold {} : the accuracy of validation data is {}'.format(i + 1, accu)

        return classifier
开发者ID:l11x0m7,项目名称:NewsComment,代码行数:26,代码来源:dict_method.py


示例3: calculate_roc

def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False):
    assert(embeddings1.shape[0] == embeddings2.shape[0])
    assert(embeddings1.shape[1] == embeddings2.shape[1])
    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
    nrof_thresholds = len(thresholds)
    k_fold = KFold(n_splits=nrof_folds, shuffle=False)
    
    tprs = np.zeros((nrof_folds,nrof_thresholds))
    fprs = np.zeros((nrof_folds,nrof_thresholds))
    accuracy = np.zeros((nrof_folds))
    
    indices = np.arange(nrof_pairs)
    
    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
        if subtract_mean:
            mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0)
        else:
          mean = 0.0
        dist = distance(embeddings1-mean, embeddings2-mean, distance_metric)
        
        # Find the best threshold for the fold
        acc_train = np.zeros((nrof_thresholds))
        for threshold_idx, threshold in enumerate(thresholds):
            _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
        best_threshold_index = np.argmax(acc_train)
        for threshold_idx, threshold in enumerate(thresholds):
            tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
        _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
          
        tpr = np.mean(tprs,0)
        fpr = np.mean(fprs,0)
    return tpr, fpr, accuracy
开发者ID:AzureWoods,项目名称:faceRecognition-yolo-facenet,代码行数:32,代码来源:facenet.py


示例4: validateseq2

def validateseq2(X_all, y, features, clf, score, v = False, esr=50, sk=5):
    temp_user = target_order[(target_order.o_day_series < 336) & (target_order.o_day_series >= 274)][['user_id']].drop_duplicates().reset_index(drop=True)
    temp_user['CreateGroup'] = 336
    print('before delete: {}'.format(X_all.shape))
    X = temp_user.merge(X_all,on=['user_id','CreateGroup'],how = 'left')
    print('after delete: {}'.format(X.shape))
    temp_user = target_order[(target_order.o_day_series < 306) & (target_order.o_day_series >= 215)][['user_id']].drop_duplicates().reset_index(drop=True)
    temp_user['CreateGroup'] = 306
    print('before delete: {}'.format(X_all.shape))
    X2 = temp_user.merge(X_all,on=['user_id','CreateGroup'],how = 'left')
    print('after delete: {}'.format(X.shape))
    kf = KFold(n_splits=sk)
    print(len(features))
    X['Prob_x'] = 0
    for train_index, test_index in kf.split(X2):
        X_train, X_test = X2.ix[train_index,:], X2.ix[test_index,:]
        X_train, X_test = X_train[features], X_test[features]
        y_train, y_test = X2.ix[train_index,:].buy, X2.ix[test_index,:].buy
        clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='auc', verbose=v, early_stopping_rounds=esr)
        X['Prob_x'] = X['Prob_x'] + clf.predict_proba(X[features])[:,1]/sk
    Performance = []
    features.append('Prob_x')
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.ix[train_index,:], X.ix[test_index,:]
        X_train, X_test = X_train[features], X_test[features]
        y_train, y_test = X.ix[train_index,:].buy, X.ix[test_index,:].buy
        clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='auc', verbose=v, early_stopping_rounds=esr)
        pred = clf.predict_proba(X_test)[:,1]
        Performance.append(roc_auc_score(y_test,pred))
    print("Mean Score: {}".format(np.mean(Performance)))
    return np.mean(Performance),clf
开发者ID:ethanww,项目名称:JData-2018,代码行数:31,代码来源:Run_12.py


示例5: Get_KFolds

def Get_KFolds(data, y_label, num_folds, scale):
    #Creates 5 folds from the train/test set each with a separate training and test set
    folds = []
    kf = KFold(n_splits = num_folds)
    for train_index, test_index in kf.split(data):
        training = []
        test = []
        
        tempdf = Normalize_Scale(data,scale)
        train_x = tempdf.drop([y_label], axis=1).values
        train_y = tempdf[y_label].values
        
        #Creates a training set within the fold
        x = []
        y = []
        
        for index in train_index:
            x.append(train_x[index])
            y.append(train_y[index])
        training = [x,y]
        
        #Creates a test set within the fold
        x = []
        y = []
        for index in test_index:
            x.append(train_x[index])
            y.append(train_y[index])
        test = [x,y]

        folds.append([training,test])
    
    return folds
开发者ID:Andymic,项目名称:Machine-Learning,代码行数:32,代码来源:API.py


示例6: hyperopt_obj

 def hyperopt_obj(self,param,train_X,train_y):
     # 5-fold crossvalidation error
     #ret = xgb.cv(param,dtrain,num_boost_round=param['num_round'])
     kf = KFold(n_splits = 3)
     errors = []
     r2 = []
     int_params = ['max_depth','num_round']
     for item in int_params:
         param[item] = int(param[item])
     for train_ind,test_ind in kf.split(train_X):
         train_valid_x,train_valid_y = train_X[train_ind],train_y[train_ind]
         test_valid_x,test_valid_y = train_X[test_ind],train_y[test_ind]
         dtrain = xgb.DMatrix(train_valid_x,label = train_valid_y)
         dtest = xgb.DMatrix(test_valid_x)
         pred_model = xgb.train(param,dtrain,num_boost_round=int(param['num_round']))
         pred_test = pred_model.predict(dtest)
         errors.append(mean_squared_error(test_valid_y,pred_test))
         r2.append(r2_score(test_valid_y,pred_test))
     all_dtrain = xgb.DMatrix(train_X,label = train_y)
     print('training score:')
     pred_model = xgb.train(param,all_dtrain,num_boost_round= int(param['num_round']))
     all_dtest = xgb.DMatrix(train_X)
     pred_train = pred_model.predict(all_dtest)
     print(str(r2_score(train_y,pred_train)))
     print(np.mean(r2))
     print('\n')
     return {'loss':np.mean(errors),'status': STATUS_OK}
开发者ID:Matafight,项目名称:Kaggle,代码行数:27,代码来源:stacking.py


示例7: predict

def predict(X_all, X_new, features, clf, score, v = False, esr=50, sk=3, fn='submission'):
    first_day = datetime.datetime.strptime('2017-08-31 00:00:00', '%Y-%m-%d %H:%M:%S')
    temp_user = target_order[(target_order.o_day_series < 336) & (target_order.o_day_series >= 274)][['user_id']].drop_duplicates().reset_index(drop=True)
    temp_user['CreateGroup'] = 336
    print('before delete: {}'.format(X_all.shape))
    X = temp_user.merge(X_all,on=['user_id','CreateGroup'],how = 'left')
    print('after delete: {}'.format(X.shape))
    temp_user = target_order[(target_order.o_day_series < 366) & \
                             (target_order.o_day_series >= 366 - 75)][['user_id']].drop_duplicates().reset_index(drop=True)
    temp_user['CreateGroup'] = 366
    print('before delete: {}'.format(X_new.shape))
    X_new = temp_user.merge(X_new,on=['user_id','CreateGroup'],how = 'left')
    print('Train: {}'.format(X_new.shape))
    kf = KFold(n_splits=sk)
    print(len(features))
    Performance = []
    X_new['Prob'] = 0
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.ix[train_index,:], X.ix[test_index,:]
        X_train, X_test = X_train[features], X_test[features]
        y_train, y_test = X.ix[train_index,:].buy, X.ix[test_index,:].buy
        clf.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_test, y_test)], eval_metric='auc', verbose=v, early_stopping_rounds=esr)
        pred = clf.predict_proba(X_test)[:,1]
        X_new['Prob'] = X_new['Prob'] + clf.predict_proba(X_new[features])[:,1]/sk
        Performance.append(roc_auc_score(y_test,pred))
    print("Mean Score: {}".format(np.mean(Performance)))
    X_new['Days'] = np.random.randint(15,size=len(X_new))
    X_new['pred_date'] = X_new['Days'].apply(lambda x: (datetime.timedelta(days=x) + first_day).strftime("%Y-%m-%d"))
    X_new.sort_values(by = ['Prob'], ascending = False, inplace = True)
    X_new[['user_id','Prob']].to_csv('prob_{}.csv'.format(fn), index = None)
    X_new[['user_id','pred_date']][:50000].to_csv('{}.csv'.format(fn), index = None)
    return np.mean(Performance),clf
开发者ID:ethanww,项目名称:JData-2018,代码行数:32,代码来源:Run_12.py


示例8: computing_cv_accuracy_LDA

def computing_cv_accuracy_LDA(in_path=None, cv_n_fold=10):
    def u65(mod_Y):
        return 1.6 / mod_Y - 0.6 / mod_Y ** 2

    def u80(mod_Y):
        return 2.2 / mod_Y - 1.2 / mod_Y ** 2

    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

    data = export_data_set('iris.data') if in_path is None else pd.read_csv(in_path)
    print("-----DATA SET TRAINING---", in_path)
    X = data.iloc[:, :-1].values
    y = np.array(data.iloc[:, -1].tolist())
    kf = KFold(n_splits=cv_n_fold, random_state=None, shuffle=True)
    lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
    mean_u65, mean_u80 = 0, 0
    for idx_train, idx_test in kf.split(y):
        print("---k-FOLD-new-executing--")
        X_cv_train, y_cv_train = X[idx_train], y[idx_train]
        X_cv_test, y_cv_test = X[idx_test], y[idx_test]
        lda.fit(X_cv_train, y_cv_train)
        n_test = len(idx_test)
        sum_u65, sum_u80 = 0, 0
        for i, test in enumerate(X_cv_test):
            evaluate = lda.predict([test])
            print("-----TESTING-----", i)
            if y_cv_test[i] in evaluate:
                sum_u65 += u65(len(evaluate))
                sum_u80 += u80(len(evaluate))
        mean_u65 += sum_u65 / n_test
        mean_u80 += sum_u80 / n_test
    print("--->", mean_u65 / cv_n_fold, mean_u80 / cv_n_fold)
开发者ID:sdestercke,项目名称:classifip,代码行数:32,代码来源:qdatest.py


示例9: computing_cv_accuracy_imprecise

def computing_cv_accuracy_imprecise(in_path=None, ell_optimal=0.1, cv_n_fold=10):
    def u65(mod_Y):
        return 1.6 / mod_Y - 0.6 / mod_Y ** 2

    def u80(mod_Y):
        return 2.2 / mod_Y - 1.2 / mod_Y ** 2

    data = export_data_set('iris.data') if in_path is None else pd.read_csv(in_path)
    print("-----DATA SET TRAINING---", in_path)
    X = data.iloc[:, :-1].values
    y = np.array(data.iloc[:, -1].tolist())
    mean_u65, mean_u80 = 0, 0
    lqa = LinearDiscriminant(init_matlab=True)
    kf = KFold(n_splits=cv_n_fold, random_state=None, shuffle=True)
    for idx_train, idx_test in kf.split(y):
        X_cv_train, y_cv_train = X[idx_train], y[idx_train]
        X_cv_test, y_cv_test = X[idx_test], y[idx_test]
        lqa.learn(X_cv_train, y_cv_train, ell=ell_optimal)
        sum_u65, sum_u80 = 0, 0
        n_test, _ = X_cv_test.shape
        for i, test in enumerate(X_cv_test):
            print("--TESTING-----", i, ell_optimal)
            evaluate, _ = lqa.evaluate(test)
            print(evaluate, "-----", y_cv_test[i])
            if y_cv_test[i] in evaluate:
                sum_u65 += u65(len(evaluate))
                sum_u80 += u80(len(evaluate))
        mean_u65 += sum_u65 / n_test
        mean_u80 += sum_u80 / n_test
    mean_u65 = mean_u65 / cv_n_fold
    mean_u80 = mean_u80 / cv_n_fold
    print("--ell-->", ell_optimal, "--->", mean_u65, mean_u80)
开发者ID:sdestercke,项目名称:classifip,代码行数:32,代码来源:qdatest.py


示例10: split_data

def split_data(root_path, num_splits=4):
    mask_list = []
    for ext in ('*.mhd', '*.hdr', '*.nii'):
        mask_list.extend(sorted(glob(join(root_path,'masks',ext))))

    assert len(mask_list) != 0, 'Unable to find any files in {}'.format(join(root_path,'masks'))

    outdir = join(root_path,'split_lists')
    try:
        mkdir(outdir)
    except:
        pass

    kf = KFold(n_splits=num_splits)
    n = 0
    for train_index, test_index in kf.split(mask_list):
        with open(join(outdir,'train_split_' + str(n) + '.csv'), 'wb') as csvfile:
            writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
            for i in train_index:
                writer.writerow([basename(mask_list[i])])
        with open(join(outdir,'test_split_' + str(n) + '.csv'), 'wb') as csvfile:
            writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
            for i in test_index:
                writer.writerow([basename(mask_list[i])])
        n += 1
开发者ID:legendhua,项目名称:SegCaps,代码行数:25,代码来源:load_3D_data.py


示例11: KFold_method

    def KFold_method(self):
        kf = KFold(n_splits=10)
        for train_index, test_index in kf.split(self.FeatureSet):
            X_train = []
            X_test = []
            y_train = []
            y_test = []
            for trainid in train_index.tolist():
                X_train.append(self.FeatureSet[trainid])
                y_train.append(self.Label[trainid])

            for testid in test_index.tolist():
                X_test.append(self.FeatureSet[testid])
                y_test.append(self.Label[testid])

            tree = self.buildtree(X_train)
            #self.post_pruning(tree, 0.3)
            pre_labels = self.predict(X_test, tree)

            # Modeal Evaluation
            ACC = metrics.accuracy_score(y_test, pre_labels)
        #    MCC = metrics.matthews_corrcoef(y_test, pre_labels)
            SN = self.performance(y_test, pre_labels)
        #    print SP, SN
            print ACC, SN
开发者ID:wyl-hit,项目名称:job,代码行数:25,代码来源:DecisionTree_Hand.py


示例12: learn_decision_tree

def learn_decision_tree(data_set, label):
	#Create depths 
	depths = list(range(1,14))
	#Initialize the best model
	best_model = [None, 0, float("-inf")]
	#Create 13-fold
	kf = KFold(n_splits=13)
	track = []
	for (train, test), cdepth in zip(kf.split(data_set), depths):
        #Get training set
		train_set = [data_set[i] for i in train]
		train_label = [label[i] for i in train]
		#Get validation set
		valid_set = [data_set[i] for i in test]
		valid_label = [label[i] for i in test]
		#Learn the decision tree from data
		clf = tree.DecisionTreeClassifier(max_depth=cdepth)
		clf = clf.fit(train_set, train_label)
		#Get accuracy from the model
		accuraclabel = clf.score(valid_set, valid_label)
		#Compare accuracies
		track.append([cdepth, accuraclabel])
		if accuraclabel > best_model[2]:
			#Update the best model
			best_model = [clf, cdepth, accuraclabel]
	#Plot the graph
	fig = plt.figure()
	x = [x[0] for x in track]
	y = [x[1] for x in track]
	plt.xlabel('Depth')
	plt.ylabel('Accuracy')
	plt.title('Decision Tree')
	plt.plot(x,y)
	plt.savefig('decision_tree.png')
	return best_model
开发者ID:x330930520,项目名称:HW3,代码行数:35,代码来源:Machine+Learning.py


示例13: calculate_val

def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10):
    assert(embeddings1.shape[0] == embeddings2.shape[0])
    assert(embeddings1.shape[1] == embeddings2.shape[1])
    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
    nrof_thresholds = len(thresholds)
    k_fold = KFold(n_splits=nrof_folds, shuffle=False)
    
    val = np.zeros(nrof_folds)
    far = np.zeros(nrof_folds)
    
    diff = np.subtract(embeddings1, embeddings2)
    dist = np.sum(np.square(diff),1)
    indices = np.arange(nrof_pairs)
    
    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
      
        # Find the threshold that gives FAR = far_target
        far_train = np.zeros(nrof_thresholds)
        for threshold_idx, threshold in enumerate(thresholds):
            _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
        if np.max(far_train)>=far_target:
            f = interpolate.interp1d(far_train, thresholds, kind='slinear')
            threshold = f(far_target)
        else:
            threshold = 0.0
    
        val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
  
    val_mean = np.mean(val)
    far_mean = np.mean(far)
    val_std = np.std(val)
    return val_mean, val_std, far_mean
开发者ID:kissthink,项目名称:facenet_regonistant,代码行数:32,代码来源:facenet.py


示例14: original_data

def original_data():
    for target in TARGETS:
        for algo_str in ALGORITHMS:
            algorithm = importlib.import_module('src.multi_class.' + algo_str)
            encoded_data = input_preproc.readFromDataset(
                INPUT_DIR + ORIGINAL_DATA_FILE,
                INPUT_COLS['original'],
                target
            )
            # Split into predictors and target
            X = np.array(encoded_data[encoded_data.columns.difference([target])])
            y = np.array(encoded_data[target])
            kf = KFold(n_splits=CROSS_VALIDATION_K, shuffle=True)

            f1s = []

            for train_index, test_index in kf.split(X):
                X_train, y_train = X[train_index], y[train_index]
                X_test, y_test = X[test_index], y[test_index]

                scaler = preprocessing.StandardScaler()
                X_train = pd.DataFrame(scaler.fit_transform(X_train))  # , columns=X_train.columns)
                X_test = scaler.transform(X_test)

                precision, recall, f1_score, accuracy = algorithm.runClassifier(X_train, X_test, y_train, y_test)
                f1s.append(f1_score)

            final_f1 = sum(f1s) / len(f1s)
            print("\n================================")
            print("%s, %s, F1 Score: %.6f" % (target, algo_str, final_f1))
            print("================================\n")
开发者ID:cassinius,项目名称:right-to-forget-data,代码行数:31,代码来源:iMLBatchProcessing.py


示例15: cross_validation

def cross_validation(train_data, train_labels, k_range=np.arange(1,16)):
    '''
    Perform 10-fold cross validation to find the best value for k

    Note: Previously this function took knn as an argument instead of train_data,train_labels.
    The intention was for students to take the training data from the knn object - this should be clearer
    from the new function signature.
    '''
    folds = 10
    kf = KFold(n_splits=folds)
    best_k = 1
    average_accuracy_for_best_k = 0
    
    for k in k_range:
        accuracy_sum = 0
        for train_index, test_index in kf.split(train_data):
            X_train, X_test = train_data[train_index], train_data[test_index]
            y_train, y_test = train_labels[train_index], train_labels[test_index]
            
            knn = KNearestNeighbor(X_train, y_train)
            validation_accuracy = classification_accuracy(knn, k, X_test, y_test)
            accuracy_sum += validation_accuracy
        
        average_accuracy = accuracy_sum/folds
        if (average_accuracy > average_accuracy_for_best_k):
            average_accuracy_for_best_k = average_accuracy
            best_k = k 
            
    return best_k, average_accuracy_for_best_k
开发者ID:mchenchen,项目名称:Course-Work,代码行数:29,代码来源:q2_1.py


示例16: test_multiclass_classification

def test_multiclass_classification():
    from sklearn.datasets import load_iris
    from sklearn.model_selection import KFold

    def check_pred(preds, labels, output_margin):
        if output_margin:
            err = sum(1 for i in range(len(preds))
                      if preds[i].argmax() != labels[i]) / float(len(preds))
        else:
            err = sum(1 for i in range(len(preds))
                      if preds[i] != labels[i]) / float(len(preds))
        assert err < 0.4

    iris = load_iris()
    y = iris['target']
    X = iris['data']
    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
    for train_index, test_index in kf.split(X, y):
        xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
        preds = xgb_model.predict(X[test_index])
        # test other params in XGBClassifier().fit
        preds2 = xgb_model.predict(X[test_index], output_margin=True,
                                   ntree_limit=3)
        preds3 = xgb_model.predict(X[test_index], output_margin=True,
                                   ntree_limit=0)
        preds4 = xgb_model.predict(X[test_index], output_margin=False,
                                   ntree_limit=3)
        labels = y[test_index]

        check_pred(preds, labels, output_margin=False)
        check_pred(preds2, labels, output_margin=True)
        check_pred(preds3, labels, output_margin=True)
        check_pred(preds4, labels, output_margin=False)
开发者ID:dmlc,项目名称:xgboost,代码行数:33,代码来源:test_with_sklearn.py


示例17: select

    def select(self):  
           
        warnings.filterwarnings("ignore", category=DeprecationWarning)

        # Implement model selection using CV
        NB_SPLITS = 3   
        mean_scores = []
        split_method = KFold(random_state=self.random_state, n_splits=NB_SPLITS)
        n_components = range(self.min_n_components, self.max_n_components + 1)
        
        try:
            for n_component in n_components:
                model = self.base_model(n_component)
                kfold_scores = []
                for _, test_idx in split_method.split(self.sequences):
                    test_X, test_length = combine_sequences(test_idx, self.sequences)
                    kfold_scores.append(model.score(test_X, test_length))
                    
                mean_scores.append(np.mean(kfold_scores))
                
        except Exception as e:
            pass
        
        if len(mean_scores) > 0:
            states = n_components[np.argmax(mean_scores)]
        else:
            states = self.n_constant

        return self.base_model(states)
开发者ID:blouargant,项目名称:UDACITY,代码行数:29,代码来源:my_model_selectors.py


示例18: test_regression_with_custom_objective

def test_regression_with_custom_objective():
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import load_boston
    from sklearn.model_selection import KFold

    def objective_ls(y_true, y_pred):
        grad = (y_pred - y_true)
        hess = np.ones(len(y_true))
        return grad, hess

    boston = load_boston()
    y = boston['target']
    X = boston['data']
    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
    for train_index, test_index in kf.split(X, y):
        xgb_model = xgb.XGBRegressor(objective=objective_ls).fit(
            X[train_index], y[train_index]
        )
        preds = xgb_model.predict(X[test_index])
        labels = y[test_index]
    assert mean_squared_error(preds, labels) < 25

    # Test that the custom objective function is actually used
    class XGBCustomObjectiveException(Exception):
        pass

    def dummy_objective(y_true, y_pred):
        raise XGBCustomObjectiveException()

    xgb_model = xgb.XGBRegressor(objective=dummy_objective)
    np.testing.assert_raises(XGBCustomObjectiveException, xgb_model.fit, X, y)
开发者ID:dmlc,项目名称:xgboost,代码行数:31,代码来源:test_with_sklearn.py


示例19: test_boston_housing_regression

def test_boston_housing_regression():
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import load_boston
    from sklearn.model_selection import KFold

    boston = load_boston()
    y = boston['target']
    X = boston['data']
    kf = KFold(n_splits=2, shuffle=True, random_state=rng)
    for train_index, test_index in kf.split(X, y):
        xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])

        preds = xgb_model.predict(X[test_index])
        # test other params in XGBRegressor().fit
        preds2 = xgb_model.predict(X[test_index], output_margin=True,
                                   ntree_limit=3)
        preds3 = xgb_model.predict(X[test_index], output_margin=True,
                                   ntree_limit=0)
        preds4 = xgb_model.predict(X[test_index], output_margin=False,
                                   ntree_limit=3)
        labels = y[test_index]

        assert mean_squared_error(preds, labels) < 25
        assert mean_squared_error(preds2, labels) < 350
        assert mean_squared_error(preds3, labels) < 25
        assert mean_squared_error(preds4, labels) < 350
开发者ID:dmlc,项目名称:xgboost,代码行数:26,代码来源:test_with_sklearn.py


示例20: cross_validate

    def cross_validate(self, values_labels, folds=10, processes=1):
        """
        Trains and tests the model agaists folds of labeled data.

        :Parameters:
            values_labels : [( `<feature_values>`, `<label>` )]
                an iterable of labeled data Where <values_labels> is an ordered
                collection of predictive values that correspond to the
                `Feature` s provided to the constructor
            folds : `int`
                When set to 1, cross-validation will run in the parent thread.
                When set to 2 or greater, a :class:`multiprocessing.Pool` will
                be created.
        """
        folds_i = KFold(n_splits=folds, shuffle=True,
                        random_state=0)
        if processes == 1:
            mapper = map
        else:
            pool = Pool(processes=processes or cpu_count())
            mapper = pool.map
        results = mapper(self._cross_score,
                         ((i, [values_labels[i] for i in train_i],
                           [values_labels[i] for i in test_i])
                          for i, (train_i, test_i) in enumerate(
                              folds_i.split(values_labels))))
        agg_score_labels = []
        for score_labels in results:
            agg_score_labels.extend(score_labels)

        self.info['statistics'].fit(agg_score_labels)

        return self.info['statistics']
开发者ID:wiki-ai,项目名称:revscoring,代码行数:33,代码来源:model.py



注:本文中的sklearn.model_selection.KFold类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python model_selection.RandomizedSearchCV类代码示例发布时间:2022-05-27
下一篇:
Python model_selection.GridSearchCV类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap