• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python ensemble.GradientBoostingClassifier类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.ensemble.GradientBoostingClassifier的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingClassifier类的具体用法?Python GradientBoostingClassifier怎么用?Python GradientBoostingClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了GradientBoostingClassifier类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: main

def main(args):
    global verbose
    verbose = args.verbose

    # Load files
    if verbose: logger.info('Loading {}'.format(args.train_file))
    train_X, train_y = load_file(args.train_file)
    if verbose: logger.info('Loading {}'.format(args.test_file))
    test_X, test_y = load_file(args.test_file)

    # # Codes for Grid Search
    # params = [
    #     {'n_estimators': [50000], 'learning_rate': [2**i for i in np.arange(-10, -9, .25)], 'max_features': ['log2',], 'max_depth': [7,]},
    # ]
    # method = GradientBoostingClassifier(random_state=1, verbose=1)
    # gscv = GridSearchCV(method, params, scoring='roc_auc', verbose=verbose, n_jobs=5)
    # gscv.fit(train_X.toarray(), train_y)
    # if verbose:
    #     for params, mean_score, all_scores in gscv.grid_scores_:
    #         logger.info('{:.6f} (+/- {:.6f}) for {}'.format(mean_score, all_scores.std() / 2, params))
    #     logger.info('params:{params}'.format(params=gscv.best_params_))
    #     logger.info('score:{params}'.format(params=gscv.best_score_))
    # pred = gscv.best_estimator_.predict_proba(test_X.toarray())

    # Best parameters for the competition data
    method = GradientBoostingClassifier(n_estimators=50000, learning_rate=2**(-9,5),
                                        max_features='log2', max_depth=7
                                        random_state=1, verbose=1)
    method.fit(train_X.toarray(), train_y)
    pred = method.predict_proba(test_X.toarray())

    np.savetxt(args.output, pred[:, 1], fmt='%.6f')
    if verbose: logger.info('Wrote preds to {file}'.format(file=args.output))

    return 0
开发者ID:rsingh2083,项目名称:uob,代码行数:35,代码来源:gbc.py


示例2: test_partial_dependecy_input

def test_partial_dependecy_input():
    # Test input validation of partial dependence.
    clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
    clf.fit(X, y)

    assert_raises(ValueError, partial_dependence,
                  clf, [0], grid=None, X=None)

    assert_raises(ValueError, partial_dependence,
                  clf, [0], grid=[0, 1], X=X)

    # first argument must be an instance of BaseGradientBoosting
    assert_raises(ValueError, partial_dependence,
                  {}, [0], X=X)

    # Gradient boosting estimator must be fit
    assert_raises(ValueError, partial_dependence,
                  GradientBoostingClassifier(), [0], X=X)

    assert_raises(ValueError, partial_dependence, clf, [-1], X=X)

    assert_raises(ValueError, partial_dependence, clf, [100], X=X)

    # wrong ndim for grid
    grid = np.random.rand(10, 2, 1)
    assert_raises(ValueError, partial_dependence, clf, [0], grid=grid)
开发者ID:kevin-coder,项目名称:scikit-learn-fork,代码行数:26,代码来源:test_partial_dependence.py


示例3: test_gradient_boosting_early_stopping

def test_gradient_boosting_early_stopping():
    X, y = make_classification(n_samples=1000, random_state=0)

    gbc = GradientBoostingClassifier(n_estimators=1000,
                                     n_iter_no_change=10,
                                     learning_rate=0.1, max_depth=3,
                                     random_state=42)

    gbr = GradientBoostingRegressor(n_estimators=1000, n_iter_no_change=10,
                                    learning_rate=0.1, max_depth=3,
                                    random_state=42)

    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        random_state=42)
    # Check if early_stopping works as expected
    for est, tol, early_stop_n_estimators in ((gbc, 1e-1, 24), (gbr, 1e-1, 13),
                                              (gbc, 1e-3, 36),
                                              (gbr, 1e-3, 28)):
        est.set_params(tol=tol)
        est.fit(X_train, y_train)
        assert_equal(est.n_estimators_, early_stop_n_estimators)
        assert est.score(X_test, y_test) > 0.7

    # Without early stopping
    gbc = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1,
                                     max_depth=3, random_state=42)
    gbc.fit(X, y)
    gbr = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1,
                                    max_depth=3, random_state=42)
    gbr.fit(X, y)

    assert gbc.n_estimators_ == 100
    assert gbr.n_estimators_ == 200
开发者ID:amueller,项目名称:scikit-learn,代码行数:33,代码来源:test_gradient_boosting.py


示例4: PlotFeaturesImportance

def PlotFeaturesImportance(X,y,featureNames,dataName):
    '''
    Plot the relative contribution/importance of the features.
    Best to reduce to top X features first - for interpretability
    Code example from:
    http://bugra.github.io/work/notes/2014-11-22/an-introduction-to-supervised-learning-scikit-learn/
    '''
    gbc = GradientBoostingClassifier(n_estimators=40)
    gbc.fit(X, y)
    # Get Feature Importance from the classifier
    feature_importance = gbc.feature_importances_
    # Normalize The Features
    feature_importance = 100 * (feature_importance / feature_importance.max())
    sorted_idx = numpy.argsort(feature_importance)
    pos = numpy.arange(sorted_idx.shape[0]) + 4.5
    # pos = numpy.arange(sorted_idx.shape[0])
    # plt.figure(figsize=(16, 12))
    plt.figure(figsize=(14, 9), dpi=250)
    plt.barh(pos, feature_importance[sorted_idx], align='center', color='#7A68A6')
    #plt.yticks(pos, numpy.asanyarray(df.columns.tolist())[sorted_idx]) #ORIG
    plt.yticks(pos, numpy.asanyarray(featureNames)[sorted_idx])

    plt.xlabel('Relative Importance')
    plt.title('%s: Top Features' %(dataName))
    plt.grid('off')
    plt.ion()
    plt.show()
    plt.savefig(str(dataName)+'TopFeatures.png',dpi=200)
开发者ID:MichaelDoron,项目名称:ProFET,代码行数:28,代码来源:VisualizeBestFeatures.py


示例5: train_GBDT

 def train_GBDT(self):
     samples=self.trainset.values
     target=self.trainlabel.values
     classifier_GB=GradientBoostingClassifier(n_estimators=1000)
     classifier_GB.fit(samples,target)
     
     return classifier_GB
开发者ID:NatureBlack,项目名称:Kaggle,代码行数:7,代码来源:titanic.py


示例6: train

def train():
    posi_result = {}
    train_feature, test_feature, train_id_list, test_id_list, train_tar_list = merge_feature(feature_str)
    tmp1 = [m < 32 for m in trainTarList]
    tmp1 = np.array(tmp1)
    # train_feature = train_feature[tmp1]
    target_list = np.array(trainTarList)
    target_list = target_list[tmp1]
    # train_id_list = np.array(train_id_list)
    # train_id_list = train_id_list[tmp1]
    c_feature = trainFeature.columns[:]
    clf1 = RandomForestClassifier(n_estimators=200, min_samples_split=17)
    clf1.fit(trainFeature[c_feature], target_list)
    # rf_preds = clf1.predict(test_feature)
    rf_prob = clf1.predict_proba(test_feature)
    gbdt1 = GradientBoostingClassifier(n_estimators=150, min_samples_split=17)
    gbdt1.fit(trainFeature[c_feature], target_list)
    # gbdt_preds = gbdt1.predict(test_feature)
    gbdt_prob = gbdt1.predict_proba(test_feature)
    all_prob = rf_prob + gbdt_prob
    all_preds = []
    print all_prob.shape
    for k in range(all_prob.shape[0]):
        prob1 = list(allProb[k, :])
        ind1 = prob.index(max(prob1))
        allPreds.append(ind1)
    for j in range(len(all_preds)):
        all_pre_name = dl.get_num_position(all_preds[j])
        posi_result[test_id_list[j]] = all_pre_name
    return posi_result
开发者ID:yinzhao0312,项目名称:position-predict,代码行数:30,代码来源:posi_predict.py


示例7: main

def main():
	makeSub = True
	featureImportance = False
	cvfold = True
	df = pd.read_csv('../data/cprobTrain15NA.csv')

	X, y = np.array(pd.read_csv('../data/train.csv',usecols=range(1,9))), np.array(pd.read_csv('../data/train.csv').ACTION)
	X = np.hstack((X,np.array(df)))

	params = {'max_depth':4, 'subsample':0.5, 'verbose':0, 'random_state':1337,
		'min_samples_split':10, 'min_samples_leaf':10, 'max_features':10,
		'n_estimators': 350, 'learning_rate': 0.05}	

	clf = GradientBoostingClassifier(**params)
	prefix = 'lib/gbm350d4m10c15'
	if cvfold:
		c = classifier.Classifier(X,y)
		c.validate(clf,nFolds=10,out=prefix+'Train.csv')

	if makeSub:
		Xt = np.array(pd.read_csv('../data/test.csv',usecols=range(1,9)))
		Xt = np.hstack((Xt,np.array(pd.read_csv('../data/cprobTest15NA.csv'))))
		clf.fit(X,y)
		y_ = clf.predict_proba(Xt)[:,1]
		out = pd.read_csv('subs/nbBaseTest.csv')
		out.ACTION = y_
		out.to_csv(prefix+'Test.csv',index=False)

	if featureImportance:
		print "Feature ranking:"
		importances = clf.feature_importances_
		indices = np.argsort(importances)[::-1]
		np.savetxt('indices.txt',indices,delimiter=',')
		for f in xrange(df.shape[1]):
			print "%d. feature (%s,%f)" % (f + 1, df.columns[indices[f]], importances[indices[f]])
开发者ID:nmkridler,项目名称:access,代码行数:35,代码来源:predict.py


示例8: partial_dependence

def partial_dependence(df, y):
    '''
    INPUT: X = features
           y = target variable binary, imbalanced classes
    OUPUT: X = features oversampled to have balanced target classes
           y = target variable oversample to have balanced classes

    Discovers the minority class and then oversamples until eah class makes up
    50% of your data.
    '''
    X_train, X_test, y_train, y_test = oversample_train_test(df, y)
    # X_train, X_test, y_train, y_test = train_test_split(df, y, random_state=42)

    feature_engineering = Pipeline([
        ('lists', ListSplitter()),
        ('race', RaceDummies()),
        ('crime_sentence', CrimeAndSentence()),
        ('feat_eng', FeatureEngineer()),
        ('columns', ColumnFilter(prejudice=False))
    ])

    X = feature_engineering.fit_transform(X_train.copy(), y_train)
    X_test = feature_engineering.fit_transform(X_test.copy(), y_test)

    gbc = GradientBoostingClassifier(n_estimators=850, learning_rate=.75)
    gbc.fit(X.copy(), y_train)
    most_imp = np.argsort(gbc.feature_importances_)[-6:]

    names = list(X_test.columns)
    feats = list(most_imp)
    fig, axs = plot_partial_dependence(gbc, X_test, feats, feature_names=names,
                                       n_jobs=3, grid_resolution=50)
开发者ID:dannyprikaz,项目名称:megans_law_project,代码行数:32,代码来源:functions.py


示例9: trainModelComb4

 def trainModelComb4(self):
     ntrain = self.data_train.shape[0]
     self.xtra = 5
     est_prob = np.zeros([ntrain,self.xtra+1]) #for original data, essay and others, which would be fed to a second gb 
     
     self.mlmodel2 = [LogisticRegression() for i in range(self.xtra)]
     for i in range(self.xtra-1):  
         self.mlmodel2[i].fit(self.data_train,self.labels_train[:,i+1])
         set_result =  self.mlmodel2[i].predict_proba(self.data_train)
         est_prob[:,i] = set_result[:,1]
                 
     self.mlmodel2[self.xtra-1].fit(self.data_train_ess,self.labels_train[:,0])
     set_result2 = self.mlmodel2[self.xtra-1].predict_proba(self.data_train_ess)
     est_prob[:,self.xtra-1] = set_result2[:,1]
     
     #self.data_train = np.hstack((self.data_train,est_prob))
     
     #self.mlmodel = AdaBoostClassifier()
     self.mlmodel = GradientBoostingClassifier(learning_rate=0.2,subsample=0.4)
     #self.mlmodel = RandomForestClassifier(n_estimators = 200, n_jobs=3,verbose =1)    
     self.mlmodel.fit(self.data_train,self.labels_train[:,0])
     set_result3 = self.mlmodel.predict_proba(self.data_train)
     est_prob[:,self.xtra] = set_result3[:,1]
     
     #2nd layer GB
     self.mlmodel3 = GradientBoostingClassifier(learning_rate=0.1)
     self.mlmodel3.fit(est_prob,self.labels_train[:,0])
开发者ID:thusithaC,项目名称:KDD2014,代码行数:27,代码来源:learnModel.py


示例10: run_gradient_boosting_classifier

def run_gradient_boosting_classifier(data, _max_depth):
    (feature_train, feature_test, label_train, label_test) = train_test_split(data[:, 0:-1], data[:, -1].astype(int),
                                                                              test_size=0.25)
    # TODO: Vary Number of Estimators and Learning Rate
    gbc = GradientBoostingClassifier(learning_rate=0.1, n_estimators=50, max_depth=_max_depth, verbose = True)
    gbc.fit(feature_train, label_train)
    training_error = gbc.score(feature_train, label_train)
    #cross_validation_score = cross_val_score(gbc, feature_train, label_train, cv=10)
    testing_error = gbc.score(feature_test, label_test)

    print "Random Forest Results for Max Depth:", _max_depth
    print "Training Accuracy:", training_error
    #print "10-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (cross_validation_score.mean(), cross_validation_score.std() * 2)
    print "Testing Accuracy:", testing_error

    feature_importance = gbc.feature_importances_
    stddev = np.std([tree[0].feature_importances_ for tree in gbc.estimators_], axis=0)
    indices = np.argsort(feature_importance)[::-1]

    # Print the feature ranking
    print("Feature ranking:")
    for f in range(len(feature_importance)):
        print("%d. feature %d (%f)" % (f + 1, indices[f], feature_importance[indices[f]]))

    plot_feature_importance(feature_importance, indices, stddev, "gradient-boosted-classifier-feature-importance-depth-" + str(_max_depth))
开发者ID:BeifeiZhou,项目名称:social-network-recommendation,代码行数:25,代码来源:runClassifier.py


示例11: main

def main():
    print '[INFO, time: %s] Getting Data....' % (time.strftime('%H:%M:%S'))
    testing_file = file('test.p', 'r')
    training_file = file('train.p', 'r')

    train = pickle.load(training_file)
    test = pickle.load(testing_file)

    testing_file.close()
    training_file.close()
    
    trainX = train[:,:-1]
    trainy = train[:,-1]
    
    testX = test[:,:-1]
    testy = test[:,-1]

    print '[INFO, time: %s] Fitting %s ...' % (time.strftime('%H:%M:%S'), 'GradientBoostingClassifier(n_estimators=1000)')
    clf = GradientBoostingClassifier(n_estimators=1000)
    clf.fit(trainX, trainy)

    print '[INFO, time: %s] Making Predictions...' % (time.strftime('%H:%M:%S'))
    prediction = clf.predict(testX)
    print '[RESULT, time: %s] accuracy = %f' % (time.strftime('%H:%M:%S'),accuracy_score(testy, prediction))


    model_save_file = file('gradient_1000.p', 'w')
    pickle.dump(clf, model_save_file)
    model_save_file.close()
    print 'All done'
开发者ID:sanketrahul,项目名称:cs-412_ml_course_project,代码行数:30,代码来源:gradientboosting_1000estimators.py


示例12: predict

def predict(fea, df, t, t9):
    Un = df.columns == 'Blank'
    for f in Fea:
        '''        
        try:
            df[(f+'_y')] = df[(f+'_x')] - df[(f+'_y')]
            print(1)
        except:
            pass
        '''
        Un = Un | (df.columns == f)
        Un = Un | (df.columns == (f+'_x'))
        Un = Un | (df.columns == (f+'_y'))
    Un = Un & (df.columns != 'New_y')    
    clf = GradientBoostingClassifier()
    y = df[t].label
    X = df[t].ix[:,Un]
    X_train, X_test, y_train, y_test=train_test_split(X, y, test_size = 0.9, random_state = 1)
    clf.fit(X_train, y_train)
    re = 'Testing AUC: \t' + str(roc_auc_score(y_test,clf.predict_proba(X_test)[:,1]))  
    print re
    re =  'September AUC: \t' + str(roc_auc_score(df[t9].label,clf.predict_proba(df[t9].ix[:,Un])[:,1]))
    print re
    print(X.columns)
    print(clf.feature_importances_)
    return Un, clf
开发者ID:duxuhao,项目名称:wo_plus,代码行数:26,代码来源:ThreeMonth2.py


示例13: ctr_gbdt

def ctr_gbdt(model='sklearn-clicklog', from_cache=False, train_dataset_length=100000, test_dataset_length=100000):
    TRAIN_FILE, TEST_FILE = create_dataset(model, from_cache, train_dataset_length, test_dataset_length)

    prediction_model = GradientBoostingClassifier(
        loss='deviance',
        learning_rate=0.1,
        n_estimators=30,
        subsample=1.0,
        min_samples_split=2,
        min_samples_leaf=1,
        min_weight_fraction_leaf=0.0,
        max_depth=5,
    )

    x_train, y_train = clean_data(TRAIN_FILE)
    x_test, y_test = clean_data(TEST_FILE)

    with Timer('fit model'):
        prediction_model.fit(x_train, y_train)

    with Timer('evaluate model'):
        y_prediction_train = prediction_model.predict_proba(x_train)
        y_prediction_test = prediction_model.predict_proba(x_test)

        loss_train = log_loss(y_train, y_prediction_train)
        loss_test = log_loss(y_test, y_prediction_test)

    print 'loss_train: %s' % loss_train
    print 'loss_test: %s' % loss_test
开发者ID:kazarinov,项目名称:hccf,代码行数:29,代码来源:sklearn_experiments.py


示例14: train_classifiers

def train_classifiers(X_data, y_data):
    ############ Linear SVM: 0.908 #############
    clf_LSVM = svm.SVC(kernel = 'linear')
    clf_LSVM.fit(X_data, y_data)
    
    ############ MultinomialNB: 0.875 #############
    clf_MNB = MultinomialNB()
    clf_MNB.fit(X_data, y_data)
    
    ############ Random Forest: 0.910 #############
    clf_RF = RandomForestClassifier(n_estimators=200, criterion='entropy')
    clf_RF.fit(X_data, y_data)
    
    ############ Extra Tree: 0.915 ##################
    clf_ETC = ExtraTreesClassifier(n_estimators=500, max_depth=None, min_samples_split=1, random_state=0)
    clf_ETC.fit(X_data, y_data)
    
    ############ AdaBoost: 0.88 ##################
    clf_Ada = AdaBoostClassifier()
    clf_Ada.fit(X_data, y_data)
    
    ############ rbf SVM: 0.895 #############
    clf_rbf = svm.SVC(C=200, gamma=0.06, kernel='rbf')
    clf_rbf.fit(X_data, y_data)
    
    ############ GradientBoosting: 0.88 #############
    clf_GBC = GradientBoostingClassifier()
    clf_GBC.fit(X_data, y_data)
    
    return clf_LSVM, clf_MNB, clf_RF, clf_ETC, clf_Ada, clf_rbf, clf_GBC    
开发者ID:yiwuxie15,项目名称:yiwuxie15-urlBootstrap,代码行数:30,代码来源:urlAnalysis.py


示例15: ada_boost

def ada_boost():
    savefile = open('traindata.pkl', 'rb')
    (x_train, y_train, t1) = cPickle.load(savefile)
    savefile.close()
    savefile = open('testdata.pkl', 'rb')
    (x_test, t1, name1) = cPickle.load(savefile)
    savefile.close()
    
#    X_train, X_valid, y_train, y_valid = cross_validation.train_test_split(
#    X, y, test_size=0.1, random_state=42)
    
    x_train = np.asarray(x_train,dtype=np.float32)
    y_train = np.asarray(y_train, dtype='int32')-1   
    
    nest = 190
    lr = .1
    md = 6
#    clf1 = DecisionTreeClassifier(max_depth=2)
#    clf = AdaBoostClassifier(clf1, n_estimators=200, learning_rate=.25)
    clf = GradientBoostingClassifier(n_estimators=nest, learning_rate=lr, max_depth=md, random_state=0)
#    clf = RandomForestClassifier(n_estimators=200) #.81
#    clf = ExtraTreesClassifier(n_estimators=1000, max_depth=None, min_samples_split=10, random_state=0,n_jobs=8) #.81
#    clf = KNeighborsClassifier(15)
    if 1:
        clf.fit(x_train, y_train)
        ypred = clf.predict_proba(x_test)
        y_str = ['Class_1','Class_2','Class_3','Class_4','Class_5','Class_6','Class_7','Class_8','Class_9']
        kcsv.print_csv(ypred, name1, y_str,indexname='id')
        print (nest, lr, md) 
    
    if 0:
        multiclass_log_loss = make_scorer(score_func=logloss_mc, greater_is_better=True, needs_proba=True)
        scores = cross_val_score(clf, x_train, y_train, n_jobs=8, cv=5,scoring=multiclass_log_loss)
        print scores
        print (nest, lr, md, scores.mean())  
开发者ID:coreyhahn,项目名称:kaggle_otto,代码行数:35,代码来源:ada_base00.py


示例16: Blender

class Blender(BaseEstimator, ClassifierMixin):
    def __init__(self, trained_clfs):
        self.clfs = trained_clfs
        # self.classifier = make_pipeline(OneHotEncoder(), DenseTransformer(),
        #                                 GradientBoostingClassifier())
        self.classifier = GradientBoostingClassifier()
        # self.classifier = make_pipeline(
        #     OneHotEncoder(), LogisticRegression(class_weight='auto'))

    def fit(self, data, target):
        # self.enc = LabelEncoder().fit(target)
        probs = self.transform_input(data)
        # self.classifier.fit(predictions, target)
        self.classifier.fit(probs, target)

    def predict(self, data):
        predictions = self.transform_input(data)
        return self.classifier.predict(predictions)

    def transform_input(self, data):
        probabilities = [clf.predict_proba(data) for clf in self.clfs]

        probabilities = np.array(probabilities)
        # features, samples = probabilities.shape
        n_clfs, samples, features = probabilities.shape
        probabilities = np.reshape(probabilities, (samples, n_clfs * features))
        probabilities[np.isnan(probabilities)] = 0
        return probabilities
开发者ID:Kyo91,项目名称:recipe-learners,代码行数:28,代码来源:blender.py


示例17: gradientboost_prediction

def gradientboost_prediction(features_train, labels_train, features_test, ids):

    class RandomForestClassifier_compability(RandomForestClassifier):
        def predict(self, X):
            return self.predict_proba(X)[:, 1][:,np.newaxis]

    base_estimator = RandomForestClassifier_compability()

    clf = GradientBoostingClassifier(loss='deviance', learning_rate=0.1,
                             n_estimators=5, subsample=0.3,
                             min_samples_split=2,
                             min_samples_leaf=1,
                             max_depth=3,
                             init=base_estimator,
                             random_state=None,
                             max_features=None,
                             verbose=2,
                             learn_rate=None)

    clf = clf.fit(features_train, labels_train)

    pred = clf.predict_proba(features_test)[:,1]

    # feature_importance = clf.feature_importances_
    #
    # print (feature_importance)

    predictions_file = open("data/rf_prediction.csv", "wb")
    predictions_file_object = csv.writer(predictions_file)
    predictions_file_object.writerow(["ID", "TARGET"])
    predictions_file_object.writerows(zip(ids, pred))
    predictions_file.close()
开发者ID:canivel,项目名称:Kaggle-Santander,代码行数:32,代码来源:regular_classifiers.py


示例18: test_oob_improvement

def test_oob_improvement():
    """Test if oob improvement has correct shape and regression test. """
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1, subsample=0.5)
    clf.fit(X, y)
    assert clf.oob_improvement_.shape[0] == 100
    # hard-coded regression test - change if modification in OOB computation
    assert_array_almost_equal(clf.oob_improvement_[:5], np.array([0.19, 0.15, 0.12, -0.12, -0.11]), decimal=2)
开发者ID:Anubhav27,项目名称:scikit-learn,代码行数:7,代码来源:test_gradient_boosting.py


示例19: final_run

def final_run(X,Y,Xtest,n_est):
    clf = GradientBoostingClassifier(n_estimators=n_est,random_state=n_est)
    clf = clf.fit(X,Y)
    #np.savetxt('gb_oob_improve_{}'.format(n_est),clf.oob_score_)
    #np.savetxt('gb_train_score_{}'.format(n_est),clf.train_score_)
    Ytest=clf.predict(Xtest)
    output(Ytest,'gradient_boost_{}.csv'.format(n_est))
开发者ID:chrinide,项目名称:us,代码行数:7,代码来源:gradient_boost.py


示例20: gbdt_train

    def gbdt_train(self, data, task_id, window=DEFAULT_WINDOW):
        """
        Train a gbdt model.

        :param data: Training dataset.
        :param task_id: The id of the training task.
        :param window: the length of window
        """
        X_train = []
        y_train = []
        features = self.__calculate_features(data, window)
        if features:
            return TSD_LACK_SAMPLE
        for index in features:
            X_train.append(index[0])
            y_train.append(index[1])
        X_train = np.array(X_train)
        y_train = np.array(y_train)
        try:
            grd = GradientBoostingClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth, learning_rate=self.learning_rate)
            grd.fit(X_train, y_train)
            model_name = MODEL_PATH + task_id + "_model"
            joblib.dump(grd, model_name)
        except Exception as ex:
            return TSD_TRAIN_ERR, str(ex)
        return TSD_OP_SUCCESS, ""
开发者ID:lixuefeng123,项目名称:Metis,代码行数:26,代码来源:gbdt.py



注:本文中的sklearn.ensemble.GradientBoostingClassifier类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python ensemble.GradientBoostingRegressor类代码示例发布时间:2022-05-27
下一篇:
Python ensemble.ExtraTreesRegressor类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap