• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python ensemble.GradientBoostingRegressor类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.ensemble.GradientBoostingRegressor的典型用法代码示例。如果您正苦于以下问题:Python GradientBoostingRegressor类的具体用法?Python GradientBoostingRegressor怎么用?Python GradientBoostingRegressor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了GradientBoostingRegressor类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: train_model

def train_model(features, label, params):
    #Preprocessing
    #scaled_features = preprocessing.scale(features);
    scaled_features  = features;

    total_rmse  = 0.0;
    count       = 0;

    kf          = KFold(len(scaled_features), n_folds=10);

    for train_index, validation_index in kf:

        X_train, X_validation = scaled_features[train_index], scaled_features[validation_index];
        Y_train, Y_validation = label[train_index], label[validation_index];

        #estimator               = SVR(**params)
        #estimator               = RandomForestRegressor(**params)
        estimator                = GradientBoostingRegressor(**params)

        estimator.fit(X_train, Y_train);

        current_rmse             = calculate_RMSE(estimator, X_validation, Y_validation);

        total_rmse              += current_rmse;
        count                   += 1;

    #Average across all samples
    avg_current_rmse   = total_rmse / float(count);
    print("Avg Current RMSE " + str(avg_current_rmse));

    return  (params, avg_current_rmse);
开发者ID:Amortized,项目名称:Restaurant-Revenue-Predictor,代码行数:31,代码来源:process.py


示例2: test_gradient_boosting_validation_fraction

def test_gradient_boosting_validation_fraction():
    X, y = make_classification(n_samples=1000, random_state=0)

    gbc = GradientBoostingClassifier(n_estimators=100,
                                     n_iter_no_change=10,
                                     validation_fraction=0.1,
                                     learning_rate=0.1, max_depth=3,
                                     random_state=42)
    gbc2 = clone(gbc).set_params(validation_fraction=0.3)
    gbc3 = clone(gbc).set_params(n_iter_no_change=20)

    gbr = GradientBoostingRegressor(n_estimators=100, n_iter_no_change=10,
                                    learning_rate=0.1, max_depth=3,
                                    validation_fraction=0.1,
                                    random_state=42)
    gbr2 = clone(gbr).set_params(validation_fraction=0.3)
    gbr3 = clone(gbr).set_params(n_iter_no_change=20)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    # Check if validation_fraction has an effect
    gbc.fit(X_train, y_train)
    gbc2.fit(X_train, y_train)
    assert gbc.n_estimators_ != gbc2.n_estimators_

    gbr.fit(X_train, y_train)
    gbr2.fit(X_train, y_train)
    assert gbr.n_estimators_ != gbr2.n_estimators_

    # Check if n_estimators_ increase monotonically with n_iter_no_change
    # Set validation
    gbc3.fit(X_train, y_train)
    gbr3.fit(X_train, y_train)
    assert gbr.n_estimators_ < gbr3.n_estimators_
    assert gbc.n_estimators_ < gbc3.n_estimators_
开发者ID:amueller,项目名称:scikit-learn,代码行数:34,代码来源:test_gradient_boosting.py


示例3: test_feature_importance_regression

def test_feature_importance_regression():
    """Test that Gini importance is calculated correctly.

    This test follows the example from [1]_ (pg. 373).

    .. [1] Friedman, J., Hastie, T., & Tibshirani, R. (2001). The elements
       of statistical learning. New York: Springer series in statistics.
    """
    california = fetch_california_housing()
    X, y = california.data, california.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    reg = GradientBoostingRegressor(loss='huber', learning_rate=0.1,
                                    max_leaf_nodes=6, n_estimators=100,
                                    random_state=0)
    reg.fit(X_train, y_train)
    sorted_idx = np.argsort(reg.feature_importances_)[::-1]
    sorted_features = [california.feature_names[s] for s in sorted_idx]

    # The most important feature is the median income by far.
    assert sorted_features[0] == 'MedInc'

    # The three subsequent features are the following. Their relative ordering
    # might change a bit depending on the randomness of the trees and the
    # train / test split.
    assert set(sorted_features[1:4]) == {'Longitude', 'AveOccup', 'Latitude'}
开发者ID:amueller,项目名称:scikit-learn,代码行数:26,代码来源:test_gradient_boosting.py


示例4: gbdt_model

def gbdt_model(trains):

    trains = np.array(trains)

    gbdt=GradientBoostingRegressor(
      loss='ls',
      learning_rate=0.1,
      n_estimators=100,
      subsample=1,
      min_samples_split=2,
      min_samples_leaf=1,
      max_depth=3,
      init=None,
      random_state=None,
      max_features=None,
      alpha=0.9,
      verbose=0,
      max_leaf_nodes=None,
      warm_start=False
    )

#     pdb.set_trace()
    train_set = trains[:, :-1]
    label_set = trains[:, -1]

    gbdt.fit(train_set, label_set)
    return gbdt
开发者ID:fengkaicnic,项目名称:pyml,代码行数:27,代码来源:gbdt_model.py


示例5: gbm_fit

def gbm_fit(params, cv_folds):
    gbm = GradientBoostingRegressor(**params)
    gbm.fit(x_train, y_train)

    # Check accuracy of model
    # No need for validation data because of cross validation
    # Training data is split up into cv_folds folds:
    # Model trained on (cv_folds - 1) of the folds; last fold is saved as validation set
    cv_scores_mse = cross_validation.cross_val_score(gbm, x_train, y_train, cv=cv_folds, scoring='mean_squared_error')
    print '\nModel Report'
    print ('MSE Score: Mean - %.7g | Std - %.7g | Min - %.7g | Max - %.7g' %
          (np.mean(cv_scores_mse), np.std(cv_scores_mse), np.min(cv_scores_mse), np.max(cv_scores_mse)))
    feat_imp = pd.Series(gbm.feature_importances_, features).sort_values(ascending=False)
    feat_imp.plot(kind='bar', title='Feature Importances')
    plt.ylabel('Feature Importance Score')
    plt.show()

    # Check actual performance on test data
    final_predictions = gbm.predict(x_test)
    test['health_score_in_week'] = final_predictions
    test.to_csv(output_file, columns=['user_id', 'date', 'steps', 'total_sleep', 'resting_hr',
                                      'step_week_slope', 'sleep_week_slope', 'hr_week_slope',
                                      'curr_health_score', 'health_score_in_week'])

    # Save the model to file 'health_prediction.pkl'
    joblib.dump(gbm, 'health_prediction.pkl', compress=1)
开发者ID:Fitomo,项目名称:Prediction-Service,代码行数:26,代码来源:predicted_health_algorithm.py


示例6: GBRModel

def GBRModel(X_train,X_cv,y_train,y_cv):
	targets = get_target_array()
	#print len(train_features)
	#print train_features[0]

	#print len(test_features)
	n_estimators = [50, 100]#, 1500, 5000]
	max_depth = [3,8]
	

	best_GBR = None
	best_mse = float('inf')
	best_score = -float('inf')

	print "################# Performing Gradient Boosting Regression ####################### \n\n\n\n"
	for estm in n_estimators:
		for cur_depth in max_depth:
			#random_forest = RandomForestRegressor(n_estimators=estm)
			regr_GBR = GradientBoostingRegressor(n_estimators=estm, max_depth= cur_depth)
			predictor = regr_GBR.fit(X_train,y_train)
			score = regr_GBR.score(X_cv,y_cv)
			mse = np.mean((regr_GBR.predict(X_cv) - y_cv) **2)
			print "Number of estimators used: ",estm
			print "Tree depth used: ",cur_depth
			print "Residual sum of squares: %.2f "%mse
			print "Variance score: %.2f \n"%score
			if best_score <= score:
				if best_mse > mse:
					best_mse = mse
					best_score = score
					best_GBR = predictor	
	print "\nBest score: ",best_score
	print "Best mse: ",best_mse
	return best_GBR
开发者ID:SaarthakKhanna2104,项目名称:Home-Depot-Product-Search-Relevance,代码行数:34,代码来源:GBR.py


示例7: gradient_boosting

def gradient_boosting(features_values_temp, rows_temp, columns_temp, prediction_values_temp, kernel, threshold):
	#kernel: linear, poly, rbf, sigmoid, precomputed

	rows = 0
	while rows_temp > 0:
		rows = rows + 1
		rows_temp = rows_temp - 1

	columns = 0
	while columns_temp > 0:
		columns = columns + 1
		columns_temp = columns_temp - 1

	features_values = [x for x in features_values_temp]
	prediction_values = [y for y in prediction_values_temp]



	rotated = convert_list_to_matrix(features_values, rows, columns)
	scores = np.array(prediction_values)

	threshold = float(threshold)

	estimator = SVR(kernel=kernel) # try to change to the model for which the test is gonna run (lasso, ridge, etc.)

	 X, y = make_friedman1(n_samples=1200, random_state=0, noise=1.0)
	 X_train, X_test = X[:200], X[200:]
	 y_train, y_test = y[:200], y[200:]
	 est = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0, loss='ls').fit(X_train, y_train)
	 mean_squared_error(y_test, est.predict(X_test)) 
开发者ID:adityasubramanian,项目名称:kaggle_titanic,代码行数:30,代码来源:feature_selection.py


示例8: fit

def fit(filename, treename, inputsname, targetname, workingpoint=0.9, test=False):
    # Reading inputs and targets
    ninputs = len(inputsname)
    branches = copy.deepcopy(inputsname)
    branches.append(targetname)
    data = root2array(filename, treename=treename, branches=branches)
    data = data.view((np.float64, len(data.dtype.names)))
    # Extract and format inputs and targets from numpy array
    inputs = data[:, range(ninputs)].astype(np.float32)
    targets = data[:, [ninputs]].astype(np.float32).ravel()
    # if test requested, use 60% of events for training and 40% for testing
    inputs_train = inputs
    targets_train = targets
    if test:
        inputs_train, inputs_test, targets_train, targets_test = cross_validation.train_test_split(inputs, targets, test_size=0.4, random_state=0)
    # Define and fit quantile regression (quantile = workingpoint)
    # Default training parameters are used
    regressor = GradientBoostingRegressor(loss='quantile', alpha=workingpoint)
    regressor.fit(inputs_train, targets_train)
    if test:
        # Compare regression prediction with the true value and count the fraction of time it falls below
        # This should give the working point value
        predict_test = regressor.predict(inputs_test)
        compare = np.less(targets_test, predict_test)
        print 'Testing regression with inputs', inputsname, 'and working point', workingpoint
        print '    Test efficiency =', float(list(compare).count(True))/float(len(compare))
        # TODO: add 1D efficiency graphs vs input variables
    return regressor
开发者ID:jbsauvan,项目名称:L1T-Utilities,代码行数:28,代码来源:quantile_regression.py


示例9: impute

def impute(df,imp_val,headers):
	if np.isnan(imp_val):
		imp_val = -500	
	log("imputing...",1)

	model = GradientBoostingRegressor(loss='ls', learning_rate=0.1, n_estimators=100, subsample=1.0, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, init=None, random_state=None, max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None, warm_start=False, presort='auto')
	data = np.array(df[headers].get_values())
	data[np.isnan(data)] = -500

	for col in range(0,len(headers)):
		#print "Working on column: "+str(col)
		##for the current column, remove rows where the current (row,column) value is not equal to zero
		##this way we are only training on data with non-zero target values
		reduced_data = data[np.logical_not(data[:,col] == imp_val)] #remove row if row,col_num value is zero
		target_set = reduced_data[:,col]
		training_set = np.delete(reduced_data,col,1)
		model.fit(training_set,target_set)
		row_num=0
		for row in data:
			remaining = np.delete(row,col,0)
			if data[row_num,col] == imp_val:
				data[row_num,col] = model.predict(remaining)
			row_num+=1
	cntr=0
	for h in headers:
		df[h] = data[:,cntr];cntr+=1
	return df
开发者ID:reventropy,项目名称:maxquant_differential_analysis,代码行数:27,代码来源:mq_diff_1.7.py


示例10: modelTheData

def modelTheData(data,target):

#    params = {'n_estimators': 400, 'max_depth': 4, 'min_samples_split': 2,
#          'subsample': 0.5,'min_samples_leaf': 2,
#          'learning_rate': 0.01, 'loss': 'ls'}


#beijing
    myMachine = GradientBoostingRegressor(alpha=0.9, init=None, learn_rate=None,
             learning_rate=0.05, loss='ls', max_depth=1, max_features=None,
             min_samples_leaf=2, min_samples_split=2, n_estimators=300,
             random_state=None, subsample=0.5, verbose=0)

#shanghai
#    myMachine = GradientBoostingRegressor(alpha=0.9, init=None, learn_rate=None,
#             learning_rate=0.05, loss='ls', max_depth=3, max_features=None,
#             min_samples_leaf=2, min_samples_split=2, n_estimators=500,
#             random_state=None, subsample=0.5, verbose=0)





#    myMachine = GradientBoostingRegressor(**params)
    myMachine.fit(data,target)

    return myMachine
开发者ID:wybert,项目名称:PMpredict,代码行数:27,代码来源:modelSHData.py


示例11: build_models

    def build_models(self):

        self.remove_columns(
            [
                "institute_latitude",
                "institute_longitude",
                "institute_state",
                "institute_country",
                "var10",
                "var11",
                "var12",
                "var13",
                "var14",
                "var15",
                "instructor_past_performance",
                "instructor_association_industry_expert",
                "secondary_area",
                "var24",
            ]
        )

        model1 = GradientBoostingRegressor(learning_rate=0.1, n_estimators=200, subsample=0.8)
        model2 = RandomForestRegressor(n_estimators=50)
        model3 = ExtraTreesRegressor(n_estimators=50)

        model1.fit(self.X, self.y)
        model2.fit(self.X, self.y)
        model3.fit(self.X, self.y)

        return [model1, model2, model3]
开发者ID:numb3r33,项目名称:predict-grants,代码行数:30,代码来源:model.py


示例12: fit

    def fit(self,data_train,target):
        self.target_train = target
        self.catcol = data_train.filter(like='var').columns.tolist()
        #start_gbr_tr = time.clock()
        self.gbr = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
        self.gbr.fit(data_train,self.target_train)
        self.transformed_train_gbr = self.gbr.transform(data_train,threshold="0.35*mean")
        self.gbr_tr_fit = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
        self.gbr_tr_fit.fit(self.transformed_train_gbr,self.target_train)
        #end_gbr_tr = time.clock()
        #print >> log, "time_gbr_tr = ", end_gbr_tr-start_gbr_tr

        #start_xfr_tr = time.clock()
        self.xfr= ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
        self.xfr.fit(data_train,self.target_train)
        self.transformed_train_xfr = self.xfr.transform(data_train,threshold="0.35*mean")
        self.xfr_tr_fit = ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
        self.xfr_tr_fit.fit(self.transformed_train_xfr,self.target_train)
        #end_xfr_tr = time.clock()
        #print >> log, "time_xfr_tr = ", end_xfr_tr-start_xfr_tr

        #start_gbr_cat = time.clock()
        self.gbr_cat_fit = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
        self.gbr_cat_fit.fit(data_train[self.catcol],self.target_train)
        #end_gbr_cat = time.clock()
        #print >> log, "time_gbr_cat = ", end_gbr_cat-start_gbr_cat

        #start_xfr_cat = time.clock()
        self.xfr_cat_fit = ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
        self.xfr_cat_fit.fit(data_train[self.catcol],self.target_train)
        #end_xfr_cat = time.clock()
        #print >> log, "time_xfr_cat = ", end_xfr_cat-start_xfr_cat
        return self
开发者ID:kirilligum,项目名称:cdips-fire,代码行数:33,代码来源:cvbari.py


示例13: gbdrtrain

def gbdrtrain(x, y, pre_x):
	x, pre_x = datscater(x, pre_x)
	clf = GradientBoostingRegressor(n_estimators=740, min_samples_leaf = 0.8, min_samples_split = 40, learning_rate=0.1,max_depth=7, random_state=400, loss='huber').fit(x, y)
	# clf = GradientBoostingRegressor(n_estimators=200,max_leaf_nodes =20, learning_rate=0.1,max_depth=6, random_state=400, loss='ls').fit(x, y)

	pred = clf.predict(pre_x)
	return pred
开发者ID:pthaike,项目名称:comp,代码行数:7,代码来源:predict.py


示例14: train

def train(targets, features, model_file, params):
    model = GradientBoostingRegressor(**params)
    print "Training hard..."
    model.fit(features, targets)
    print "Saving model..."
    pickle.dump(model, open(model_file, 'wb'))
    return model
开发者ID:DenXX,项目名称:irlab,代码行数:7,代码来源:train.py


示例15: add_new_weak_learner

    def add_new_weak_learner(self):
        '''
        Summary:
            Adds a new function, h, to self.weak_learners by solving for Eq. 1 using multiple additive regression trees:

            [Eq. 1] h = argmin_h (sum_i Q_A(s_i,a_i) + h(s_i, a_i) - (r_i + max_b Q_A(s'_i, b)))

        '''
        if len(self.most_recent_episode) == 0:
            # If this episode contains no data, don't do anything.
            return

        # Build up data sets of features and loss terms
        data = np.zeros((len(self.most_recent_episode), self.max_state_features + 1))
        total_loss = np.zeros(len(self.most_recent_episode))

        for i, experience in enumerate(self.most_recent_episode):
            # Grab the experience.
            s, a, r, s_prime = experience

            # Pad in case the state features are too short (as in Atari sometimes).
            features = self._pad_features_with_zeros(s, a)
            loss = (r + self.gamma * self.get_max_q_value(s_prime) - self.get_q_value(s, a))
            
            # Add to relevant lists.
            data[i] = features
            total_loss[i] = loss

        # Compute new regressor and add it to the weak learners.
        estimator = GradientBoostingRegressor(loss='ls', n_estimators=1, max_depth=self.max_depth)
        estimator.fit(data, total_loss)
        self.weak_learners.append(estimator)
开发者ID:david-abel,项目名称:simple_rl,代码行数:32,代码来源:GradientBoostingAgentClass.py


示例16: CaGBMModel

def CaGBMModel(X_train, Y_train, X_test, Y_test, cv_iterator):
    
    #===========================================================================
    # modelCV = GradientBoostingRegressor(subsample = 1, random_state = 42)
    # param_grid = {'loss':['ls'],
    #               'learning_rate':[0.1],
    #               'n_estimators':[100],
    #               'max_depth':[5, 50, 150],
    #               'min_samples_split':[2],
    #               'min_samples_leaf':[5, 15, 30],
    #               'max_features':["auto"]
    #               }
    # 
    # search = GridSearchCV(modelCV, param_grid, scoring="mean_squared_error", cv=cv_iterator, n_jobs = -1)
    # search.fit(X_train, Y_train["P"])
    # search.grid_scores_
    # model = search.best_estimator_
    # mse = search.best_score_
    # print (time.strftime("%H:%M:%S"))
    #===========================================================================
    gbm = GradientBoostingRegressor(loss='ls', learning_rate=0.1, n_estimators=100, max_depth=50, min_samples_leaf=20, max_features=None, random_state=76)
    gbm.fit(X_train, Y_train["Ca"])
    
    yhat_gbm = gbm.predict(X_test)
    test_error = math.sqrt(mean_squared_error(Y_test["Ca"], yhat_gbm))

    return gbm, test_error
开发者ID:pkravik,项目名称:kaggle,代码行数:27,代码来源:ca_models.py


示例17: grid_search

def grid_search():
    results_list_of_tuples = list()
    num_folds = 3
    best_result = tuple()
    for item1 in gd_grid['learning_rate']:
        for item2 in gd_grid['max_depth']:
            for item3 in gd_grid['min_samples_leaf']:
                for item4 in gd_grid['n_estimators']:
                    for item5 in gd_grid['random_state']:
                        instance =                           'LR {}, max_depth {}, min_samp_leaf {}, n_est {}, rs {}'.format(item1,                                                                                           item2, item3,                                                                                           item4, item5)
                        print instance
                        gbrt = GradientBoostingRegressor(random_state=item5,                                                          n_estimators=item4,                                                          min_samples_leaf=item3,                                                          max_depth=item2,                                                          learning_rate=item1                                                         )
                        kf = KFold(X.shape[0], n_folds=num_folds)
                        mse_list = []
                        for train_index, test_index in kf:
                            X_train, X_test = X[train_index], X[test_index]
                            y_train, y_test = y[train_index], y[test_index]
                            w_train, w_test = weights[train_index], weights[test_index]
                            gbrt.fit(X_train, y_train, w_train)                 
                            y_pred = gbrt.predict(X_test)
                            mse = mean_squared_error(y_test, y_pred, sample_weight=w_test)
                            mse_list.append(mse)

                        kf_mse = np.mean(np.array(mse_list))
                        results_list_of_tuples.append((instance, kf_mse))
                        
    return results_list_of_tuples
开发者ID:jerrystsai,项目名称:project-upwork,代码行数:27,代码来源:gbrt.py


示例18: testingGBM

def testingGBM(X_train, Y_train, X_test, Y_test):
    params = {'verbose':2, 'n_estimators':100, 'max_depth':50, 'min_samples_leaf':20, 'learning_rate':0.1, 'loss':'ls', 'max_features':None}
    test_init = Ridge(alpha = 0.1, normalize = True, fit_intercept=True)
    gbm2 = GradientBoostingRegressor(**params)
    gbm2.fit(X_train, Y_train["Ca"])
    yhat_gbm = gbm2.predict(X_test)
    mean_squared_error(Y_test["Ca"], yhat_gbm)
    math.sqrt(mean_squared_error(Y_test["Ca"], yhat_gbm))
    
    test_score = np.zeros((params['n_estimators'],), dtype=np.float64)
    
    for i, y_pred in enumerate(gbm2.staged_decision_function(X_test)):
        test_score[i]=mean_squared_error(Y_test["Ca"], y_pred)
    
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.title('Deviance')
    plt.plot(np.arange(params['n_estimators']) + 1, gbm2.train_score_, 'b-',
             label='Training Set Deviance')
    plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',
             label='Test Set Deviance')
    
    plt.legend(loc='upper right')
    plt.xlabel('Boosting Iterations')
    plt.ylabel('Deviance')
    plt.show()
开发者ID:pkravik,项目名称:kaggle,代码行数:26,代码来源:ca_models.py


示例19: pipeline

def pipeline():
        val = data[data.watch==0]
        val_a_b = val[['item_id','store_code','a','b']]
        val_x = val.drop(['label','watch','item_id','store_code','a','b'],axis=1)

        train = data[data.watch!=0]
        train_y = train.label

        
        a = list(train.a)
        b = list(train.b)
        train_weight = []
        for i in range(len(a)):
            train_weight.append(min(a[i],b[i]))
        train_weight = np.array(train_weight)

        train_x = train.drop(['label','watch','item_id','store_code','a','b'],axis=1)

        train_x.fillna(train_x.median(),inplace=True)
        val_x.fillna(val_x.median(),inplace=True)
        

        model = GradientBoostingRegressor(loss='lad',learning_rate=0.01,n_estimators=400,subsample=0.75,max_depth=6,random_state=1024, max_features=0.75)

	#train
	model.fit(train_x,train_y, sample_weight=train_weight)


	#predict val set
	val_a_b['pred'] = model.predict(val_x)
        val_a_b.to_csv('gbrt_3.csv',index=None)
开发者ID:foxchopin,项目名称:CaiNiao-DemandForecast-StoragePlaning,代码行数:31,代码来源:gbrt.py


示例20: train_and_score

def train_and_score(i):
	global X_train
	global X_test 
	global Y_train
	global dist_train
	global dist_test
	
	# GBR performed best but we experimented with other models as well (see the paper)
	cl = GradientBoostingRegressor(n_estimators=100, loss='ls', learning_rate=0.1)

	# we add user distance from i-th branch (for which we do prediction) to train set
	dist_from_target_branch_train = dist_train[:,i].reshape((len(dist_train[:,i]),1))  # dist from i-th branch
	X_train = np.hstack((X_train, dist_from_target_branch_train))
	# we add mean user activity distance from i-th branch (for which we do prediction) to train set
	ab_dist_train = act_branch_dist_train[:,i].reshape((len(act_branch_dist_train[:,i]),1))  # dist from i-th branch
	X_train = np.hstack((X_train, ab_dist_train))

	# we also experimented with Standard Scaler, without much success
	# mmscaler_train = StandardScaler()
	# X_train = mmscaler_train.fit_transform(X_train)

	cl.fit(X_train,Y_train[:,i])

	# same as above for test set
	dist_from_target_branch_test = dist_test[:,i].reshape((len(dist_test[:,i]),1))  # dist from i-th branch
	X_test = np.hstack((X_test, dist_from_target_branch_test))
	ab_dist_test = act_branch_dist_test[:,i].reshape((len(act_branch_dist_test[:,i]),1))  # dist from i-th branch
	X_test = np.hstack((X_test, ab_dist_test))

	# mmscaler_test = StandardScaler()
	# X_test = mmscaler_test.fit_transform(X_test)

	return cl.predict(X_test)
开发者ID:SandraMNE,项目名称:ECMLChallenge2016,代码行数:33,代码来源:t1_mpp2.py



注:本文中的sklearn.ensemble.GradientBoostingRegressor类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python ensemble.IsolationForest类代码示例发布时间:2022-05-27
下一篇:
Python ensemble.GradientBoostingClassifier类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap