• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python xgboost.train函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中xgboost.train函数的典型用法代码示例。如果您正苦于以下问题:Python train函数的具体用法?Python train怎么用?Python train使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了train函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: xgbTuning

	def xgbTuning(self, pX, change = 3):
		w = self.getWeight(self.y)
		dm = xgb.DMatrix(pX, self.y, weight=w)
		best_auc = 0
		n = pX.shape[0]
		best_params = None
		for i in range(change):
			randp = np.random.random_sample(3)
			param = {
				'bst:eta': randp[0],
				'max_depth': int(3+6*randp[1]) , 
				'nthread':4, 
				'silent':1,
				'alpha':randp[2],
				'eval_metric':'auc',
				'objective': 'binary:logistic' 
			}
			m = xgb.cv(param, dm, metrics='auc', nfold=3, num_boost_round = 50,early_stopping_rounds=5)
			auc = m['test-auc-mean'].max()
			if auc > best_auc :
				print 'xgb:' + str(auc)
				best_auc = auc
				best_params = param
		Xtrain, Xtest, ytrain, ytest = train_test_split(pX, self.y, test_size=.33)
		trainw = self.getWeight(ytrain)
		testw = self.getWeight(ytest)
		dtrain = xgb.DMatrix(Xtrain, label = ytrain, feature_names=Xtrain.columns, weight = trainw)
		dtest = xgb.DMatrix(Xtest, label = ytest, feature_names=Xtest.columns, weight = testw)
		evallist = [(dtrain, 'train'), (dtest, 'eval')]
		booster = xgb.train(best_params, dtrain, evals=evallist, num_boost_round=100,early_stopping_rounds=10)
		rounds = booster.attr("best_iteration")
		best_auc = booster.attr("best_score")
		return float(best_auc), xgb.train(best_params, dtrain, num_boost_round=int(rounds))
开发者ID:Gnostikoi,项目名称:orange,代码行数:33,代码来源:model_generator.py


示例2: evalModelOHE

def evalModelOHE(train_data, eval_data, train_labels, eval_labels):
    params = {}
#    params["objective"] = "reg:linear"
#    params["eta"] = 0.05
#    params["min_child_weight"] = 8
#    params["subsample"] = 0.7
#    params["colsample_bytree"] = 0.7
#    params["scale_pos_weight"] = 1.0
#    params["silent"] = 1
#    params["max_depth"] = 8
#    params["max_delta_step"]=2
    params["objective"] = "reg:linear"
    params["eta"] = 0.013
    params["min_child_weight"] = 6
    params["subsample"] = 0.51
    params["colsample_bytree"] = 0.6
    params["scale_pos_weight"] = 1.0
    params["silent"] = 1
    params["max_depth"] = 10
    params["max_delta_step"]=1
    plst = list(params.items())
    
    xgtrain = xgb.DMatrix(train_data,label=train_labels)
    xgeval = xgb.DMatrix(eval_data,label=eval_labels)
    evallist  = [(xgeval,'eval'), (xgtrain,'train')]
    xgb.train(plst, xgtrain, num_boost_round=5000, evals=evallist,feval=evalerror)
开发者ID:shashankchaudhry,项目名称:caterpillar-tube-pricing,代码行数:26,代码来源:new_model_08-31_n_fold_CV_framework_OHE_mean.py


示例3: test_fast_histmaker

    def test_fast_histmaker(self):
        variable_param = {'tree_method': ['hist'],
                          'max_depth': [2, 8],
                          'max_bin': [2, 256],
                          'grow_policy': ['depthwise', 'lossguide'],
                          'max_leaves': [64, 0],
                          'verbosity': [0]}
        for param in parameter_combinations(variable_param):
            result = run_suite(param)
            assert_results_non_increasing(result, 1e-2)

        # hist must be same as exact on all-categorial data
        dpath = 'demo/data/'
        ag_dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
        ag_dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
        ag_param = {'max_depth': 2,
                    'tree_method': 'hist',
                    'eta': 1,
                    'verbosity': 0,
                    'objective': 'binary:logistic',
                    'eval_metric': 'auc'}
        hist_res = {}
        exact_res = {}

        xgb.train(ag_param, ag_dtrain, 10,
                  [(ag_dtrain, 'train'), (ag_dtest, 'test')],
                  evals_result=hist_res)
        ag_param["tree_method"] = "exact"
        xgb.train(ag_param, ag_dtrain, 10,
                  [(ag_dtrain, 'train'), (ag_dtest, 'test')],
                  evals_result=exact_res)
        assert hist_res['train']['auc'] == exact_res['train']['auc']
        assert hist_res['test']['auc'] == exact_res['test']['auc']
开发者ID:dmlc,项目名称:xgboost,代码行数:33,代码来源:test_updaters.py


示例4: train_predict

	def train_predict(self,train_x,train_y,test_x):
		xgmat_train = xgb.DMatrix(train_x, label=train_y, missing=-9999)
		test_size = test_x.shape[0]
		params = {
			'booster':'gbtree',
			'objective':'binary:logistic',
			'silent':self.silent,
			'eta':self.eta,
			'gamma':self.gamma,
			'max_depth':self.max_depth,
			'min_chile_weitght':self.min_chile_weight,
			'subsample':self.subsample,
			'lambda':self.lambda_,
			'scale_pos_weight':self.scale_pos_weight,
			"colsample_bytree": self.colsample_bytree,
			'eval_metirc':'auc',
			'seed':2014,
			'nthread':self.threads
		}

		watchlist = [ (xgmat_train,'train') ]
		num_round = self.num_boost_round

		bst = xgb.train( params, xgmat_train, num_round, watchlist )
		xgmat_test = xgb.DMatrix(test_x,missing=-9999)

		if self.exist_prediction:
			tmp_train = bst.predict(xgmat_train, output_margin=True)
			tmp_test = bst.predict(xgmat_test, output_margin=True)
			xgmat_train.set_base_margin(tmp_train)
			xgmat_test.set_base_margin(tmp_test)
			bst = xgb.train(params, xgmat_train, self.exist_num_boost_round, watchlist )

		ypred = bst.predict(xgmat_test)
		return ypred
开发者ID:Sandy4321,项目名称:Xgboost_Datacastle_MoralQualityPrediction,代码行数:35,代码来源:xgb_class.py


示例5: hyperopt_obj

 def hyperopt_obj(self,param,train_X,train_y):
     # 5-fold crossvalidation error
     #ret = xgb.cv(param,dtrain,num_boost_round=param['num_round'])
     kf = KFold(n_splits = 3)
     errors = []
     r2 = []
     int_params = ['max_depth','num_round']
     for item in int_params:
         param[item] = int(param[item])
     for train_ind,test_ind in kf.split(train_X):
         train_valid_x,train_valid_y = train_X[train_ind],train_y[train_ind]
         test_valid_x,test_valid_y = train_X[test_ind],train_y[test_ind]
         dtrain = xgb.DMatrix(train_valid_x,label = train_valid_y)
         dtest = xgb.DMatrix(test_valid_x)
         pred_model = xgb.train(param,dtrain,num_boost_round=int(param['num_round']))
         pred_test = pred_model.predict(dtest)
         errors.append(mean_squared_error(test_valid_y,pred_test))
         r2.append(r2_score(test_valid_y,pred_test))
     all_dtrain = xgb.DMatrix(train_X,label = train_y)
     print('training score:')
     pred_model = xgb.train(param,all_dtrain,num_boost_round= int(param['num_round']))
     all_dtest = xgb.DMatrix(train_X)
     pred_train = pred_model.predict(all_dtest)
     print(str(r2_score(train_y,pred_train)))
     print(np.mean(r2))
     print('\n')
     return {'loss':np.mean(errors),'status': STATUS_OK}
开发者ID:Matafight,项目名称:Kaggle,代码行数:27,代码来源:stacking.py


示例6: test_predict

    def test_predict(self):
        iterations = 10
        np.random.seed(1)
        test_num_rows = [10, 1000, 5000]
        test_num_cols = [10, 50, 500]
        for num_rows in test_num_rows:
            for num_cols in test_num_cols:
                dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
                dval = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
                dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
                watchlist = [(dtrain, 'train'), (dval, 'validation')]
                res = {}
                param = {
                    "objective": "binary:logistic",
                    "predictor": "gpu_predictor",
                    'eval_metric': 'auc',
                }
                bst = xgb.train(param, dtrain, iterations, evals=watchlist, evals_result=res)
                assert self.non_decreasing(res["train"]["auc"])
                gpu_pred_train = bst.predict(dtrain, output_margin=True)
                gpu_pred_test = bst.predict(dtest, output_margin=True)
                gpu_pred_val = bst.predict(dval, output_margin=True)

                param["predictor"] = "cpu_predictor"
                bst_cpu = xgb.train(param, dtrain, iterations, evals=watchlist)
                cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True)
                cpu_pred_test = bst_cpu.predict(dtest, output_margin=True)
                cpu_pred_val = bst_cpu.predict(dval, output_margin=True)
                np.testing.assert_allclose(cpu_pred_train, gpu_pred_train, rtol=1e-5)
                np.testing.assert_allclose(cpu_pred_val, gpu_pred_val, rtol=1e-5)
                np.testing.assert_allclose(cpu_pred_test, gpu_pred_test, rtol=1e-5)
开发者ID:Ihaveadreammoonlighter,项目名称:xgboost,代码行数:31,代码来源:test_gpu_prediction.py


示例7: XgbTrain

        def XgbTrain(self, submitfile):
              offset = 5000
              X_train, y_train = self.dataMat, self.labelMat
              X_test = self.testData
              xgtest = xgb.DMatrix(X_test)
              
              xgtrain_train = xgb.DMatrix(X_train[offset:,:], label=y_train[offset:])
              xgtrain_val = xgb.DMatrix(X_train[:offset,:], label=y_train[:offset])
              
                      
              watchlist = [(xgtrain_train, 'train'),(xgtrain_val, 'val')]
              model = xgb.train(self.params_best, xgtrain_train, self.num_rounds_best, watchlist,early_stopping_rounds=self.early_stopping_rounds_best)
              preds1 = model.predict(xgtest)
                      
              X_train = X_train[::-1,:]
              y_train = y_train[::-1]

              xgtrain_train = xgb.DMatrix(X_train[offset:,:], label=y_train[offset:])
              xgtrain_val = xgb.DMatrix(X_train[:offset,:], label=y_train[:offset])

              watchlist = [(xgtrain_train, 'train'),(xgtrain_val, 'val')]
              model = xgb.train(self.params_best, xgtrain_train, self.num_rounds_best, watchlist, early_stopping_rounds=self.early_stopping_rounds_best)
              preds2 = model.predict(xgtest)
                      
              preds = preds1 + preds2
              #preds = pd.DataFrame({"Id": self.testid, "Hazard": preds})
              if submitfile!='':
                writer=csv.writer(open(submitfile,'wb'))
                writer.writerow(['ID','Hazard'])
                for i in range(len(preds)):
                    line = [self.testid[i], preds[i]]
                    writer.writerow(line)
开发者ID:kevinmtian,项目名称:Kaggle,代码行数:32,代码来源:KgModelXgb.py


示例8: run_benchmark

def run_benchmark(args, gpu_algorithm, cpu_algorithm):
    print("Generating dataset: {} rows * {} columns".format(args.rows,args.columns))
    tmp = time.time()
    X, y = make_classification(args.rows, n_features=args.columns, random_state=7)
    print ("Generate Time: %s seconds" % (str(time.time() - tmp)))
    tmp = time.time()
    print ("DMatrix Start")
    # omp way
    dtrain = xgb.DMatrix(X, y, nthread=-1)
    # non-omp way
    #dtrain = xgb.DMatrix(X, y)
    print ("DMatrix Time: %s seconds" % (str(time.time() - tmp)))

    param = {'objective': 'binary:logistic',
             'max_depth': 6,
             'silent': 0,
             'n_gpus': 1,
             'gpu_id': 0,
             'eval_metric': 'auc'}

    param['tree_method'] = gpu_algorithm
    print("Training with '%s'" % param['tree_method'])
    tmp = time.time()
    xgb.train(param, dtrain, args.iterations)
    print ("Train Time: %s seconds" % (str(time.time() - tmp)))

    param['silent'] = 1
    param['tree_method'] = cpu_algorithm
    print("Training with '%s'" % param['tree_method'])
    tmp = time.time()
    xgb.train(param, dtrain, args.iterations)
    print ("Time: %s seconds" % (str(time.time() - tmp)))
开发者ID:ChangXiaodong,项目名称:xgboost-withcomments,代码行数:32,代码来源:benchmark.py


示例9: test_multi_predict

    def test_multi_predict(self):
        from sklearn.datasets import make_regression
        from sklearn.model_selection import train_test_split

        n = 1000
        X, y = make_regression(n, random_state=rng)
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=123)
        dtrain = xgb.DMatrix(X_train, label=y_train)
        dtest = xgb.DMatrix(X_test)

        params = {}
        params["tree_method"] = "gpu_hist"

        params['predictor'] = "gpu_predictor"
        bst_gpu_predict = xgb.train(params, dtrain)

        params['predictor'] = "cpu_predictor"
        bst_cpu_predict = xgb.train(params, dtrain)

        predict0 = bst_gpu_predict.predict(dtest)
        predict1 = bst_gpu_predict.predict(dtest)
        cpu_predict = bst_cpu_predict.predict(dtest)

        assert np.allclose(predict0, predict1)
        assert np.allclose(predict0, cpu_predict)
开发者ID:rfru,项目名称:xgboost,代码行数:26,代码来源:test_gpu_prediction.py


示例10: run

def run(train_matrix,test_matrix):
    params = {'booster': 'gbtree',
              #'objective': 'multi:softmax',
              'objective': 'multi:softprob',
              'eval_metric': 'mlogloss',
              'gamma': 1,
              'min_child_weight': 1.5,
              'max_depth': 5,
              'lambda': 10,
              'subsample': 0.7,
              'colsample_bytree': 0.7,
              'colsample_bylevel': 0.7,
              'eta': 0.03,
              'tree_method': 'exact',
              'seed': 2017,
              'nthread': 12,
              "num_class":3
              }
    num_round = 10000
    early_stopping_rounds = 50
    watchlist = [(train_matrix, 'train'),
                 (test_matrix, 'eval')
                 ]
    if test_matrix:
        model = xgb.train(params, train_matrix, num_boost_round=num_round, evals=watchlist,
                      early_stopping_rounds=early_stopping_rounds
                      )
        pred_test_y = model.predict(test_matrix,ntree_limit=model.best_iteration)
        return pred_test_y, model
    else:
        model = xgb.train(params, train_matrix, num_boost_round=num_round
                      )
        return model
开发者ID:bifeng,项目名称:Rental-Listing-Inquiries,代码行数:33,代码来源:xgb.py


示例11: train

    def train(self, X, Y, getApproxError=False):
        dtrain = xgb.DMatrix(X, label=Y)
        self.bst = xgb.train(self.param, dtrain, self.nRounds)

        if getApproxError:

            e = 0.0
            c = 0.0

            kf = KFold(Y.shape[0], n_folds=4)
            for train_index, test_index in kf:

                XTrain = X[train_index, :]
                XTest  = X[test_index, :]

                YTrain = Y[train_index]
                YTest  = Y[test_index]

                dtrain2 = xgb.DMatrix(XTrain, label=YTrain)
                bst = xgb.train(self.param, dtrain2, self.nRounds)
              

                dtest = xgb.DMatrix(XTest)
                probs = bst.predict(dtest)
                ypred =numpy.argmax(probs, axis=1)

                

                error = float(numpy.sum(ypred != YTest))
                e += error
                c += float(len(YTest))

            e/=c

            return e
开发者ID:DerThorsten,项目名称:nifty,代码行数:35,代码来源:tools.py


示例12: xgboost_model

def xgboost_model(train, test, num_round, params):
    """
    Takes in: training set, test set, number of estimators, params is a list

    Returns: predictions in correct format
    """
    X = train.as_matrix(train.columns[:-1]).astype(float)
    y = train.as_matrix(["cost"])[:, 0].astype(float)
    ylog1p = np.log1p(y)
    X_test = test.as_matrix(test.columns[:-1]).astype(float)

    xgb_train = xgb.DMatrix(X, label=ylog1p)
    xgb_test = xgb.DMatrix(X_test)

    # Round 1
    bst1 = xgb.train(params, xgb_train, num_round)
    y_pred1 = bst1.predict(xgb_test)

    # Round 2
    # num_round2 = 2000
    # bst2 = xgb.train(params, xgb_train, 2000)
    # y_pred2 = bst2.predict(xgb_test)

    # Power Train
    ypower3 = np.power(y, 1 / 47.0)
    xgb_train3 = xgb.DMatrix(X, label=ypower3)
    xst3 = xgb.train(params, xgb_train3, num_round)
    y_predp3 = xst3.predict(xgb_test)

    p = 0.5
    y_pred = p * np.expm1(y_pred1) + (1 - p) * np.power(y_predp3, 47.0)

    return y_pred
开发者ID:evanslt,项目名称:CDIPS15_TeamCat,代码行数:33,代码来源:cv_run.py


示例13: train_predict

    def train_predict(self,X_train,y_train,X_test,base_train_prediction,base_test_prediction):
        xgmat_train = xgb.DMatrix(X_train, label=y_train,missing=-999)
        test_size = X_test.shape[0]
        param = {}
        param['objective'] = 'binary:logistic'

        param['bst:eta'] = self.eta
        param['colsample_bytree']=1
        param['min_child_weight']=self.min_child_weight
        param['bst:max_depth'] = self.depth
        param['eval_metric'] = 'auc'
        param['silent'] = 1
        param['nthread'] = self.threads
        plst = list(param.items())

        watchlist = [ (xgmat_train,'train') ]
        num_round = self.num_round

        xgmat_test = xgb.DMatrix(X_test,missing=-999)
    
        if self.boost_from_exist_prediction:
        # train xgb with existing predictions
        # see more at https://github.com/tqchen/xgboost/blob/master/demo/guide-python/boost_from_prediction.py
       
            xgmat_train.set_base_margin(base_train_prediction)
            xgmat_test.set_base_margin(base_test_prediction)
            bst = xgb.train(param, xgmat_train, self.exist_num_round, watchlist )
        else:
            bst = xgb.train( plst, xgmat_train, num_round, watchlist )
        ypred = bst.predict(xgmat_test)
        return ypred
开发者ID:thekannman,项目名称:kaggle,代码行数:31,代码来源:xgb_classifier.py


示例14: run_benchmark

def run_benchmark(args):
    print("Generating dataset: {} rows * {} columns".format(args.rows, args.columns))
    print("{}/{} test/train split".format(args.test_size, 1.0 - args.test_size))
    tmp = time.time()
    X, y = make_classification(args.rows, n_features=args.columns, random_state=7)
    if args.sparsity < 1.0:
       X = np.array([[np.nan if rng.uniform(0, 1) < args.sparsity else x for x in x_row] for x_row in X])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=args.test_size, random_state=7)
    print ("Generate Time: %s seconds" % (str(time.time() - tmp)))
    tmp = time.time()
    print ("DMatrix Start")
    dtrain = xgb.DMatrix(X_train, y_train, nthread=-1)
    dtest = xgb.DMatrix(X_test, y_test, nthread=-1)
    print ("DMatrix Time: %s seconds" % (str(time.time() - tmp)))

    param = {'objective': 'binary:logistic'}
    if args.params is not '':
        param.update(ast.literal_eval(args.params))

    param['tree_method'] = args.tree_method
    print("Training with '%s'" % param['tree_method'])
    tmp = time.time()
    xgb.train(param, dtrain, args.iterations, evals=[(dtest, "test")])
    print ("Train Time: %s seconds" % (str(time.time() - tmp)))
开发者ID:wyj2046,项目名称:xgboost,代码行数:25,代码来源:benchmark.py


示例15: fit

  def fit(self, X, y):    
    X = self.build_matrix(X, y)
    param = {
      'silent': 1 if self.silent else 0, 
      'use_buffer': int(self.use_buffer),
      'num_round': self.num_round,
      'ntree_limit': self.ntree_limit,
      'nthread': self.nthread,
      'booster': self.booster,
      'eta': self.eta,
      'gamma': self.gamma,
      'max_depth': self.max_depth,
      'min_child_weight': self.min_child_weight,
      'subsample': self.subsample,
      'colsample_bytree': self.colsample_bytree,
      'max_delta_step': self.max_delta_step,
      'l': self.l,
      'alpha': self.alpha,
      'lambda_bias': self.lambda_bias,
      'objective': self.objective,
      'eval_metric': self.eval_metric,
      'seed': self.seed          
    }
    if self.num_class is not None:
      param['num_class']= self.num_class

    watchlist  = [(X,'train')]    
    if self.early_stopping_rounds > 0:
      self.bst = xgb.train(param, X, self.num_round, watchlist, early_stopping_rounds=self.early_stopping_rounds)
    else:
      self.bst = xgb.train(param, X, self.num_round, watchlist)

    return self
开发者ID:eugeneyan,项目名称:py_ml_utils,代码行数:33,代码来源:XGBoostClassifier.py


示例16: test_custom_objective

	def test_custom_objective(self):
		param = {'max_depth':2, 'eta':1, 'silent':1 }
		watchlist  = [(dtest,'eval'), (dtrain,'train')]
		num_round = 2
		def logregobj(preds, dtrain):
			labels = dtrain.get_label()
			preds = 1.0 / (1.0 + np.exp(-preds))
			grad = preds - labels
			hess = preds * (1.0-preds)
			return grad, hess
		def evalerror(preds, dtrain):
			labels = dtrain.get_label()
			return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
		
		# test custom_objective in training
		bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
		assert isinstance(bst, xgb.core.Booster)
		preds = bst.predict(dtest)
		labels = dtest.get_label()
		err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
		assert err < 0.1

		# test custom_objective in cross-validation
		xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
	       obj = logregobj, feval=evalerror)

		# test maximize parameter
		def neg_evalerror(preds, dtrain):
			labels = dtrain.get_label()
			return 'error', float(sum(labels == (preds > 0.0))) / len(labels)
		bst2 = xgb.train(param, dtrain, num_round, watchlist, logregobj, neg_evalerror, maximize=True)
		preds2 = bst2.predict(dtest)
		err2 = sum(1 for i in range(len(preds2)) if int(preds2[i]>0.5)!=labels[i]) / float(len(preds2))
		assert err == err2
开发者ID:ChrisBarker-NOAA,项目名称:xgboost,代码行数:34,代码来源:test_models.py


示例17: xgb_features

def xgb_features(X,y,Xtest,params=None,random_state=0,n_folds=4,early_stop=20,eval_with_gini=False):
	try:
		if params['objective'] == 'reg:logistic':
			yt = MinMaxScaler().fit_transform(y*1.)		
		else:
			yt = y
		skf = StratifiedKFold(yt, n_folds=n_folds,shuffle=True,random_state=random_state)
		ypred_test = np.zeros(Xtest.shape[0])
		ypred_train =np.zeros(X.shape[0])
		seed = random_state;
		dtest = xgb.DMatrix(data=Xtest)
		for train_index, test_index in skf:
			X_train, X_test = X[train_index], X[test_index]
			y_train, y_test = yt[train_index], yt[test_index]
			dtrain = xgb.DMatrix(data=X_train,label=y_train)
			dvalid = xgb.DMatrix(data=X_test,label=y_test)
			evallist = [(dtrain,'train'),(dvalid,'valid')]
			num_round = 5000
			params['seed'] = seed+1
			seed+=1
			plst = params.items()
			if eval_with_gini:
				bst = xgb.train( plst, dtrain, num_round,evallist,early_stopping_rounds=early_stop,feval=evalerror)
			else :
				bst = xgb.train( plst, dtrain, num_round,evallist,early_stopping_rounds=early_stop)
			ypred = bst.predict(dtest,ntree_limit=bst.best_iteration)
			ypred_valid = bst.predict(dvalid)
			print ("\tcross validation gini score %s: %f"%(params['objective'],gini(y_test,ypred_valid)))
			ypred_test += ypred
			ypred_train[test_index] = ypred_valid
	except KeyboardInterrupt:
		ypred_test = np.zeros(Xtest.shape[0]);
		ypred_train = np.zeros(X.shape[0]);
		return ypred_train, ypred_test		
	return ypred_train, ypred_test*1./n_folds
开发者ID:hiendang,项目名称:kaggle-crowdflower-search,代码行数:35,代码来源:utility.py


示例18: runXGB

def runXGB(train_X, train_y, test_X, test_y=None, feature_names=None, seed_val=0, num_rounds=1000):
    param = {}
    param['objective'] = 'multi:softprob'
    param['eta'] = 0.1
    param['max_depth'] = 6
    param['silent'] = 1
    param['num_class'] = 3
    param['eval_metric'] = "mlogloss"
    param['min_child_weight'] = 1
    param['subsample'] = 0.7
    param['colsample_bytree'] = 0.7
    param['seed'] = seed_val
    num_rounds = num_rounds

    plst = list(param.items())
    xgtrain = xgb.DMatrix(train_X, label=train_y)

    if test_y is not None:
        xgtest = xgb.DMatrix(test_X, label=test_y)
        watchlist = [ (xgtrain,'train'), (xgtest, 'test') ]
        model = xgb.train(plst, xgtrain, num_rounds, watchlist, early_stopping_rounds=20)
    else:
        xgtest = xgb.DMatrix(test_X)
        model = xgb.train(plst, xgtrain, num_rounds)

    pred_test_y = model.predict(xgtest)
    return pred_test_y, model
开发者ID:Paliking,项目名称:ML_examples,代码行数:27,代码来源:XGB_my.py


示例19: run_benchmark

def run_benchmark(args, gpu_algorithm, cpu_algorithm):
    print("Generating dataset: {} rows * {} columns".format(args.rows, args.columns))
    print("{}/{} test/train split".format(args.test_size, 1.0 - args.test_size))
    tmp = time.time()
    X, y = make_classification(args.rows, n_features=args.columns, random_state=7)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=args.test_size, random_state=7)
    print ("Generate Time: %s seconds" % (str(time.time() - tmp)))
    tmp = time.time()
    print ("DMatrix Start")
    # omp way
    dtrain = xgb.DMatrix(X_train, y_train, nthread=-1)
    dtest = xgb.DMatrix(X_test, y_test, nthread=-1)
    print ("DMatrix Time: %s seconds" % (str(time.time() - tmp)))

    param = {'objective': 'binary:logistic',
             'max_depth': 6,
             'silent': 0,
             'n_gpus': 1,
             'gpu_id': 0,
             'eval_metric': 'error',
             'debug_verbose': 0,
             }

    param['tree_method'] = gpu_algorithm
    print("Training with '%s'" % param['tree_method'])
    tmp = time.time()
    xgb.train(param, dtrain, args.iterations, evals=[(dtest, "test")])
    print ("Train Time: %s seconds" % (str(time.time() - tmp)))

    param['silent'] = 1
    param['tree_method'] = cpu_algorithm
    print("Training with '%s'" % param['tree_method'])
    tmp = time.time()
    xgb.train(param, dtrain, args.iterations, evals=[(dtest, "test")])
    print ("Time: %s seconds" % (str(time.time() - tmp)))
开发者ID:felixdae,项目名称:xgboost,代码行数:35,代码来源:benchmark.py


示例20: fit

    def fit(self, X, y):
        #data
        data = X
        label = y
        skf = cross_validation.StratifiedKFold(label, n_folds=10)
        for train_index, val_index in skf:
            dtrain = xgb.DMatrix(data[train_index], label=label[train_index])
            dval = xgb.DMatrix(data[val_index], label=label[val_index])
            break

        #set params
        plst = self.param.items()
        plst += [('eval_metric', 'merror')]
        evallist = [ (dtrain,'train'), (dval,'eval')]

        #train
        if self.bst == None:
            num_round = 100000
            self.bst = xgb.train(plst, dtrain, num_round, evals=evallist, early_stopping_rounds=100)
            self.best_score_ = 1-self.bst.best_score
            self.best_params_= self.bst.best_iteration

        #refit
        dtrain = xgb.DMatrix(data, label=label)
        num_round = self.best_params_
        self.bst = xgb.train(plst, dtrain, num_round, evals=evallist)
开发者ID:g0ulash,项目名称:pumpitup,代码行数:26,代码来源:XGBoostClassifier.py



注:本文中的xgboost.train函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python xgboost.XGBClassifier类代码示例发布时间:2022-05-26
下一篇:
Python xgboost.plot_importance函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap