本文整理汇总了Python中xgboost.train函数的典型用法代码示例。如果您正苦于以下问题:Python train函数的具体用法?Python train怎么用?Python train使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了train函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: xgbTuning
def xgbTuning(self, pX, change = 3):
w = self.getWeight(self.y)
dm = xgb.DMatrix(pX, self.y, weight=w)
best_auc = 0
n = pX.shape[0]
best_params = None
for i in range(change):
randp = np.random.random_sample(3)
param = {
'bst:eta': randp[0],
'max_depth': int(3+6*randp[1]) ,
'nthread':4,
'silent':1,
'alpha':randp[2],
'eval_metric':'auc',
'objective': 'binary:logistic'
}
m = xgb.cv(param, dm, metrics='auc', nfold=3, num_boost_round = 50,early_stopping_rounds=5)
auc = m['test-auc-mean'].max()
if auc > best_auc :
print 'xgb:' + str(auc)
best_auc = auc
best_params = param
Xtrain, Xtest, ytrain, ytest = train_test_split(pX, self.y, test_size=.33)
trainw = self.getWeight(ytrain)
testw = self.getWeight(ytest)
dtrain = xgb.DMatrix(Xtrain, label = ytrain, feature_names=Xtrain.columns, weight = trainw)
dtest = xgb.DMatrix(Xtest, label = ytest, feature_names=Xtest.columns, weight = testw)
evallist = [(dtrain, 'train'), (dtest, 'eval')]
booster = xgb.train(best_params, dtrain, evals=evallist, num_boost_round=100,early_stopping_rounds=10)
rounds = booster.attr("best_iteration")
best_auc = booster.attr("best_score")
return float(best_auc), xgb.train(best_params, dtrain, num_boost_round=int(rounds))
开发者ID:Gnostikoi,项目名称:orange,代码行数:33,代码来源:model_generator.py
示例2: evalModelOHE
def evalModelOHE(train_data, eval_data, train_labels, eval_labels):
params = {}
# params["objective"] = "reg:linear"
# params["eta"] = 0.05
# params["min_child_weight"] = 8
# params["subsample"] = 0.7
# params["colsample_bytree"] = 0.7
# params["scale_pos_weight"] = 1.0
# params["silent"] = 1
# params["max_depth"] = 8
# params["max_delta_step"]=2
params["objective"] = "reg:linear"
params["eta"] = 0.013
params["min_child_weight"] = 6
params["subsample"] = 0.51
params["colsample_bytree"] = 0.6
params["scale_pos_weight"] = 1.0
params["silent"] = 1
params["max_depth"] = 10
params["max_delta_step"]=1
plst = list(params.items())
xgtrain = xgb.DMatrix(train_data,label=train_labels)
xgeval = xgb.DMatrix(eval_data,label=eval_labels)
evallist = [(xgeval,'eval'), (xgtrain,'train')]
xgb.train(plst, xgtrain, num_boost_round=5000, evals=evallist,feval=evalerror)
开发者ID:shashankchaudhry,项目名称:caterpillar-tube-pricing,代码行数:26,代码来源:new_model_08-31_n_fold_CV_framework_OHE_mean.py
示例3: test_fast_histmaker
def test_fast_histmaker(self):
variable_param = {'tree_method': ['hist'],
'max_depth': [2, 8],
'max_bin': [2, 256],
'grow_policy': ['depthwise', 'lossguide'],
'max_leaves': [64, 0],
'verbosity': [0]}
for param in parameter_combinations(variable_param):
result = run_suite(param)
assert_results_non_increasing(result, 1e-2)
# hist must be same as exact on all-categorial data
dpath = 'demo/data/'
ag_dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
ag_dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
ag_param = {'max_depth': 2,
'tree_method': 'hist',
'eta': 1,
'verbosity': 0,
'objective': 'binary:logistic',
'eval_metric': 'auc'}
hist_res = {}
exact_res = {}
xgb.train(ag_param, ag_dtrain, 10,
[(ag_dtrain, 'train'), (ag_dtest, 'test')],
evals_result=hist_res)
ag_param["tree_method"] = "exact"
xgb.train(ag_param, ag_dtrain, 10,
[(ag_dtrain, 'train'), (ag_dtest, 'test')],
evals_result=exact_res)
assert hist_res['train']['auc'] == exact_res['train']['auc']
assert hist_res['test']['auc'] == exact_res['test']['auc']
开发者ID:dmlc,项目名称:xgboost,代码行数:33,代码来源:test_updaters.py
示例4: train_predict
def train_predict(self,train_x,train_y,test_x):
xgmat_train = xgb.DMatrix(train_x, label=train_y, missing=-9999)
test_size = test_x.shape[0]
params = {
'booster':'gbtree',
'objective':'binary:logistic',
'silent':self.silent,
'eta':self.eta,
'gamma':self.gamma,
'max_depth':self.max_depth,
'min_chile_weitght':self.min_chile_weight,
'subsample':self.subsample,
'lambda':self.lambda_,
'scale_pos_weight':self.scale_pos_weight,
"colsample_bytree": self.colsample_bytree,
'eval_metirc':'auc',
'seed':2014,
'nthread':self.threads
}
watchlist = [ (xgmat_train,'train') ]
num_round = self.num_boost_round
bst = xgb.train( params, xgmat_train, num_round, watchlist )
xgmat_test = xgb.DMatrix(test_x,missing=-9999)
if self.exist_prediction:
tmp_train = bst.predict(xgmat_train, output_margin=True)
tmp_test = bst.predict(xgmat_test, output_margin=True)
xgmat_train.set_base_margin(tmp_train)
xgmat_test.set_base_margin(tmp_test)
bst = xgb.train(params, xgmat_train, self.exist_num_boost_round, watchlist )
ypred = bst.predict(xgmat_test)
return ypred
开发者ID:Sandy4321,项目名称:Xgboost_Datacastle_MoralQualityPrediction,代码行数:35,代码来源:xgb_class.py
示例5: hyperopt_obj
def hyperopt_obj(self,param,train_X,train_y):
# 5-fold crossvalidation error
#ret = xgb.cv(param,dtrain,num_boost_round=param['num_round'])
kf = KFold(n_splits = 3)
errors = []
r2 = []
int_params = ['max_depth','num_round']
for item in int_params:
param[item] = int(param[item])
for train_ind,test_ind in kf.split(train_X):
train_valid_x,train_valid_y = train_X[train_ind],train_y[train_ind]
test_valid_x,test_valid_y = train_X[test_ind],train_y[test_ind]
dtrain = xgb.DMatrix(train_valid_x,label = train_valid_y)
dtest = xgb.DMatrix(test_valid_x)
pred_model = xgb.train(param,dtrain,num_boost_round=int(param['num_round']))
pred_test = pred_model.predict(dtest)
errors.append(mean_squared_error(test_valid_y,pred_test))
r2.append(r2_score(test_valid_y,pred_test))
all_dtrain = xgb.DMatrix(train_X,label = train_y)
print('training score:')
pred_model = xgb.train(param,all_dtrain,num_boost_round= int(param['num_round']))
all_dtest = xgb.DMatrix(train_X)
pred_train = pred_model.predict(all_dtest)
print(str(r2_score(train_y,pred_train)))
print(np.mean(r2))
print('\n')
return {'loss':np.mean(errors),'status': STATUS_OK}
开发者ID:Matafight,项目名称:Kaggle,代码行数:27,代码来源:stacking.py
示例6: test_predict
def test_predict(self):
iterations = 10
np.random.seed(1)
test_num_rows = [10, 1000, 5000]
test_num_cols = [10, 50, 500]
for num_rows in test_num_rows:
for num_cols in test_num_cols:
dtrain = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
dval = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
dtest = xgb.DMatrix(np.random.randn(num_rows, num_cols), label=[0, 1] * int(num_rows / 2))
watchlist = [(dtrain, 'train'), (dval, 'validation')]
res = {}
param = {
"objective": "binary:logistic",
"predictor": "gpu_predictor",
'eval_metric': 'auc',
}
bst = xgb.train(param, dtrain, iterations, evals=watchlist, evals_result=res)
assert self.non_decreasing(res["train"]["auc"])
gpu_pred_train = bst.predict(dtrain, output_margin=True)
gpu_pred_test = bst.predict(dtest, output_margin=True)
gpu_pred_val = bst.predict(dval, output_margin=True)
param["predictor"] = "cpu_predictor"
bst_cpu = xgb.train(param, dtrain, iterations, evals=watchlist)
cpu_pred_train = bst_cpu.predict(dtrain, output_margin=True)
cpu_pred_test = bst_cpu.predict(dtest, output_margin=True)
cpu_pred_val = bst_cpu.predict(dval, output_margin=True)
np.testing.assert_allclose(cpu_pred_train, gpu_pred_train, rtol=1e-5)
np.testing.assert_allclose(cpu_pred_val, gpu_pred_val, rtol=1e-5)
np.testing.assert_allclose(cpu_pred_test, gpu_pred_test, rtol=1e-5)
开发者ID:Ihaveadreammoonlighter,项目名称:xgboost,代码行数:31,代码来源:test_gpu_prediction.py
示例7: XgbTrain
def XgbTrain(self, submitfile):
offset = 5000
X_train, y_train = self.dataMat, self.labelMat
X_test = self.testData
xgtest = xgb.DMatrix(X_test)
xgtrain_train = xgb.DMatrix(X_train[offset:,:], label=y_train[offset:])
xgtrain_val = xgb.DMatrix(X_train[:offset,:], label=y_train[:offset])
watchlist = [(xgtrain_train, 'train'),(xgtrain_val, 'val')]
model = xgb.train(self.params_best, xgtrain_train, self.num_rounds_best, watchlist,early_stopping_rounds=self.early_stopping_rounds_best)
preds1 = model.predict(xgtest)
X_train = X_train[::-1,:]
y_train = y_train[::-1]
xgtrain_train = xgb.DMatrix(X_train[offset:,:], label=y_train[offset:])
xgtrain_val = xgb.DMatrix(X_train[:offset,:], label=y_train[:offset])
watchlist = [(xgtrain_train, 'train'),(xgtrain_val, 'val')]
model = xgb.train(self.params_best, xgtrain_train, self.num_rounds_best, watchlist, early_stopping_rounds=self.early_stopping_rounds_best)
preds2 = model.predict(xgtest)
preds = preds1 + preds2
#preds = pd.DataFrame({"Id": self.testid, "Hazard": preds})
if submitfile!='':
writer=csv.writer(open(submitfile,'wb'))
writer.writerow(['ID','Hazard'])
for i in range(len(preds)):
line = [self.testid[i], preds[i]]
writer.writerow(line)
开发者ID:kevinmtian,项目名称:Kaggle,代码行数:32,代码来源:KgModelXgb.py
示例8: run_benchmark
def run_benchmark(args, gpu_algorithm, cpu_algorithm):
print("Generating dataset: {} rows * {} columns".format(args.rows,args.columns))
tmp = time.time()
X, y = make_classification(args.rows, n_features=args.columns, random_state=7)
print ("Generate Time: %s seconds" % (str(time.time() - tmp)))
tmp = time.time()
print ("DMatrix Start")
# omp way
dtrain = xgb.DMatrix(X, y, nthread=-1)
# non-omp way
#dtrain = xgb.DMatrix(X, y)
print ("DMatrix Time: %s seconds" % (str(time.time() - tmp)))
param = {'objective': 'binary:logistic',
'max_depth': 6,
'silent': 0,
'n_gpus': 1,
'gpu_id': 0,
'eval_metric': 'auc'}
param['tree_method'] = gpu_algorithm
print("Training with '%s'" % param['tree_method'])
tmp = time.time()
xgb.train(param, dtrain, args.iterations)
print ("Train Time: %s seconds" % (str(time.time() - tmp)))
param['silent'] = 1
param['tree_method'] = cpu_algorithm
print("Training with '%s'" % param['tree_method'])
tmp = time.time()
xgb.train(param, dtrain, args.iterations)
print ("Time: %s seconds" % (str(time.time() - tmp)))
开发者ID:ChangXiaodong,项目名称:xgboost-withcomments,代码行数:32,代码来源:benchmark.py
示例9: test_multi_predict
def test_multi_predict(self):
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
n = 1000
X, y = make_regression(n, random_state=rng)
X_train, X_test, y_train, y_test = train_test_split(X, y,
random_state=123)
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test)
params = {}
params["tree_method"] = "gpu_hist"
params['predictor'] = "gpu_predictor"
bst_gpu_predict = xgb.train(params, dtrain)
params['predictor'] = "cpu_predictor"
bst_cpu_predict = xgb.train(params, dtrain)
predict0 = bst_gpu_predict.predict(dtest)
predict1 = bst_gpu_predict.predict(dtest)
cpu_predict = bst_cpu_predict.predict(dtest)
assert np.allclose(predict0, predict1)
assert np.allclose(predict0, cpu_predict)
开发者ID:rfru,项目名称:xgboost,代码行数:26,代码来源:test_gpu_prediction.py
示例10: run
def run(train_matrix,test_matrix):
params = {'booster': 'gbtree',
#'objective': 'multi:softmax',
'objective': 'multi:softprob',
'eval_metric': 'mlogloss',
'gamma': 1,
'min_child_weight': 1.5,
'max_depth': 5,
'lambda': 10,
'subsample': 0.7,
'colsample_bytree': 0.7,
'colsample_bylevel': 0.7,
'eta': 0.03,
'tree_method': 'exact',
'seed': 2017,
'nthread': 12,
"num_class":3
}
num_round = 10000
early_stopping_rounds = 50
watchlist = [(train_matrix, 'train'),
(test_matrix, 'eval')
]
if test_matrix:
model = xgb.train(params, train_matrix, num_boost_round=num_round, evals=watchlist,
early_stopping_rounds=early_stopping_rounds
)
pred_test_y = model.predict(test_matrix,ntree_limit=model.best_iteration)
return pred_test_y, model
else:
model = xgb.train(params, train_matrix, num_boost_round=num_round
)
return model
开发者ID:bifeng,项目名称:Rental-Listing-Inquiries,代码行数:33,代码来源:xgb.py
示例11: train
def train(self, X, Y, getApproxError=False):
dtrain = xgb.DMatrix(X, label=Y)
self.bst = xgb.train(self.param, dtrain, self.nRounds)
if getApproxError:
e = 0.0
c = 0.0
kf = KFold(Y.shape[0], n_folds=4)
for train_index, test_index in kf:
XTrain = X[train_index, :]
XTest = X[test_index, :]
YTrain = Y[train_index]
YTest = Y[test_index]
dtrain2 = xgb.DMatrix(XTrain, label=YTrain)
bst = xgb.train(self.param, dtrain2, self.nRounds)
dtest = xgb.DMatrix(XTest)
probs = bst.predict(dtest)
ypred =numpy.argmax(probs, axis=1)
error = float(numpy.sum(ypred != YTest))
e += error
c += float(len(YTest))
e/=c
return e
开发者ID:DerThorsten,项目名称:nifty,代码行数:35,代码来源:tools.py
示例12: xgboost_model
def xgboost_model(train, test, num_round, params):
"""
Takes in: training set, test set, number of estimators, params is a list
Returns: predictions in correct format
"""
X = train.as_matrix(train.columns[:-1]).astype(float)
y = train.as_matrix(["cost"])[:, 0].astype(float)
ylog1p = np.log1p(y)
X_test = test.as_matrix(test.columns[:-1]).astype(float)
xgb_train = xgb.DMatrix(X, label=ylog1p)
xgb_test = xgb.DMatrix(X_test)
# Round 1
bst1 = xgb.train(params, xgb_train, num_round)
y_pred1 = bst1.predict(xgb_test)
# Round 2
# num_round2 = 2000
# bst2 = xgb.train(params, xgb_train, 2000)
# y_pred2 = bst2.predict(xgb_test)
# Power Train
ypower3 = np.power(y, 1 / 47.0)
xgb_train3 = xgb.DMatrix(X, label=ypower3)
xst3 = xgb.train(params, xgb_train3, num_round)
y_predp3 = xst3.predict(xgb_test)
p = 0.5
y_pred = p * np.expm1(y_pred1) + (1 - p) * np.power(y_predp3, 47.0)
return y_pred
开发者ID:evanslt,项目名称:CDIPS15_TeamCat,代码行数:33,代码来源:cv_run.py
示例13: train_predict
def train_predict(self,X_train,y_train,X_test,base_train_prediction,base_test_prediction):
xgmat_train = xgb.DMatrix(X_train, label=y_train,missing=-999)
test_size = X_test.shape[0]
param = {}
param['objective'] = 'binary:logistic'
param['bst:eta'] = self.eta
param['colsample_bytree']=1
param['min_child_weight']=self.min_child_weight
param['bst:max_depth'] = self.depth
param['eval_metric'] = 'auc'
param['silent'] = 1
param['nthread'] = self.threads
plst = list(param.items())
watchlist = [ (xgmat_train,'train') ]
num_round = self.num_round
xgmat_test = xgb.DMatrix(X_test,missing=-999)
if self.boost_from_exist_prediction:
# train xgb with existing predictions
# see more at https://github.com/tqchen/xgboost/blob/master/demo/guide-python/boost_from_prediction.py
xgmat_train.set_base_margin(base_train_prediction)
xgmat_test.set_base_margin(base_test_prediction)
bst = xgb.train(param, xgmat_train, self.exist_num_round, watchlist )
else:
bst = xgb.train( plst, xgmat_train, num_round, watchlist )
ypred = bst.predict(xgmat_test)
return ypred
开发者ID:thekannman,项目名称:kaggle,代码行数:31,代码来源:xgb_classifier.py
示例14: run_benchmark
def run_benchmark(args):
print("Generating dataset: {} rows * {} columns".format(args.rows, args.columns))
print("{}/{} test/train split".format(args.test_size, 1.0 - args.test_size))
tmp = time.time()
X, y = make_classification(args.rows, n_features=args.columns, random_state=7)
if args.sparsity < 1.0:
X = np.array([[np.nan if rng.uniform(0, 1) < args.sparsity else x for x in x_row] for x_row in X])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=args.test_size, random_state=7)
print ("Generate Time: %s seconds" % (str(time.time() - tmp)))
tmp = time.time()
print ("DMatrix Start")
dtrain = xgb.DMatrix(X_train, y_train, nthread=-1)
dtest = xgb.DMatrix(X_test, y_test, nthread=-1)
print ("DMatrix Time: %s seconds" % (str(time.time() - tmp)))
param = {'objective': 'binary:logistic'}
if args.params is not '':
param.update(ast.literal_eval(args.params))
param['tree_method'] = args.tree_method
print("Training with '%s'" % param['tree_method'])
tmp = time.time()
xgb.train(param, dtrain, args.iterations, evals=[(dtest, "test")])
print ("Train Time: %s seconds" % (str(time.time() - tmp)))
开发者ID:wyj2046,项目名称:xgboost,代码行数:25,代码来源:benchmark.py
示例15: fit
def fit(self, X, y):
X = self.build_matrix(X, y)
param = {
'silent': 1 if self.silent else 0,
'use_buffer': int(self.use_buffer),
'num_round': self.num_round,
'ntree_limit': self.ntree_limit,
'nthread': self.nthread,
'booster': self.booster,
'eta': self.eta,
'gamma': self.gamma,
'max_depth': self.max_depth,
'min_child_weight': self.min_child_weight,
'subsample': self.subsample,
'colsample_bytree': self.colsample_bytree,
'max_delta_step': self.max_delta_step,
'l': self.l,
'alpha': self.alpha,
'lambda_bias': self.lambda_bias,
'objective': self.objective,
'eval_metric': self.eval_metric,
'seed': self.seed
}
if self.num_class is not None:
param['num_class']= self.num_class
watchlist = [(X,'train')]
if self.early_stopping_rounds > 0:
self.bst = xgb.train(param, X, self.num_round, watchlist, early_stopping_rounds=self.early_stopping_rounds)
else:
self.bst = xgb.train(param, X, self.num_round, watchlist)
return self
开发者ID:eugeneyan,项目名称:py_ml_utils,代码行数:33,代码来源:XGBoostClassifier.py
示例16: test_custom_objective
def test_custom_objective(self):
param = {'max_depth':2, 'eta':1, 'silent':1 }
watchlist = [(dtest,'eval'), (dtrain,'train')]
num_round = 2
def logregobj(preds, dtrain):
labels = dtrain.get_label()
preds = 1.0 / (1.0 + np.exp(-preds))
grad = preds - labels
hess = preds * (1.0-preds)
return grad, hess
def evalerror(preds, dtrain):
labels = dtrain.get_label()
return 'error', float(sum(labels != (preds > 0.0))) / len(labels)
# test custom_objective in training
bst = xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror)
assert isinstance(bst, xgb.core.Booster)
preds = bst.predict(dtest)
labels = dtest.get_label()
err = sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) / float(len(preds))
assert err < 0.1
# test custom_objective in cross-validation
xgb.cv(param, dtrain, num_round, nfold = 5, seed = 0,
obj = logregobj, feval=evalerror)
# test maximize parameter
def neg_evalerror(preds, dtrain):
labels = dtrain.get_label()
return 'error', float(sum(labels == (preds > 0.0))) / len(labels)
bst2 = xgb.train(param, dtrain, num_round, watchlist, logregobj, neg_evalerror, maximize=True)
preds2 = bst2.predict(dtest)
err2 = sum(1 for i in range(len(preds2)) if int(preds2[i]>0.5)!=labels[i]) / float(len(preds2))
assert err == err2
开发者ID:ChrisBarker-NOAA,项目名称:xgboost,代码行数:34,代码来源:test_models.py
示例17: xgb_features
def xgb_features(X,y,Xtest,params=None,random_state=0,n_folds=4,early_stop=20,eval_with_gini=False):
try:
if params['objective'] == 'reg:logistic':
yt = MinMaxScaler().fit_transform(y*1.)
else:
yt = y
skf = StratifiedKFold(yt, n_folds=n_folds,shuffle=True,random_state=random_state)
ypred_test = np.zeros(Xtest.shape[0])
ypred_train =np.zeros(X.shape[0])
seed = random_state;
dtest = xgb.DMatrix(data=Xtest)
for train_index, test_index in skf:
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = yt[train_index], yt[test_index]
dtrain = xgb.DMatrix(data=X_train,label=y_train)
dvalid = xgb.DMatrix(data=X_test,label=y_test)
evallist = [(dtrain,'train'),(dvalid,'valid')]
num_round = 5000
params['seed'] = seed+1
seed+=1
plst = params.items()
if eval_with_gini:
bst = xgb.train( plst, dtrain, num_round,evallist,early_stopping_rounds=early_stop,feval=evalerror)
else :
bst = xgb.train( plst, dtrain, num_round,evallist,early_stopping_rounds=early_stop)
ypred = bst.predict(dtest,ntree_limit=bst.best_iteration)
ypred_valid = bst.predict(dvalid)
print ("\tcross validation gini score %s: %f"%(params['objective'],gini(y_test,ypred_valid)))
ypred_test += ypred
ypred_train[test_index] = ypred_valid
except KeyboardInterrupt:
ypred_test = np.zeros(Xtest.shape[0]);
ypred_train = np.zeros(X.shape[0]);
return ypred_train, ypred_test
return ypred_train, ypred_test*1./n_folds
开发者ID:hiendang,项目名称:kaggle-crowdflower-search,代码行数:35,代码来源:utility.py
示例18: runXGB
def runXGB(train_X, train_y, test_X, test_y=None, feature_names=None, seed_val=0, num_rounds=1000):
param = {}
param['objective'] = 'multi:softprob'
param['eta'] = 0.1
param['max_depth'] = 6
param['silent'] = 1
param['num_class'] = 3
param['eval_metric'] = "mlogloss"
param['min_child_weight'] = 1
param['subsample'] = 0.7
param['colsample_bytree'] = 0.7
param['seed'] = seed_val
num_rounds = num_rounds
plst = list(param.items())
xgtrain = xgb.DMatrix(train_X, label=train_y)
if test_y is not None:
xgtest = xgb.DMatrix(test_X, label=test_y)
watchlist = [ (xgtrain,'train'), (xgtest, 'test') ]
model = xgb.train(plst, xgtrain, num_rounds, watchlist, early_stopping_rounds=20)
else:
xgtest = xgb.DMatrix(test_X)
model = xgb.train(plst, xgtrain, num_rounds)
pred_test_y = model.predict(xgtest)
return pred_test_y, model
开发者ID:Paliking,项目名称:ML_examples,代码行数:27,代码来源:XGB_my.py
示例19: run_benchmark
def run_benchmark(args, gpu_algorithm, cpu_algorithm):
print("Generating dataset: {} rows * {} columns".format(args.rows, args.columns))
print("{}/{} test/train split".format(args.test_size, 1.0 - args.test_size))
tmp = time.time()
X, y = make_classification(args.rows, n_features=args.columns, random_state=7)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=args.test_size, random_state=7)
print ("Generate Time: %s seconds" % (str(time.time() - tmp)))
tmp = time.time()
print ("DMatrix Start")
# omp way
dtrain = xgb.DMatrix(X_train, y_train, nthread=-1)
dtest = xgb.DMatrix(X_test, y_test, nthread=-1)
print ("DMatrix Time: %s seconds" % (str(time.time() - tmp)))
param = {'objective': 'binary:logistic',
'max_depth': 6,
'silent': 0,
'n_gpus': 1,
'gpu_id': 0,
'eval_metric': 'error',
'debug_verbose': 0,
}
param['tree_method'] = gpu_algorithm
print("Training with '%s'" % param['tree_method'])
tmp = time.time()
xgb.train(param, dtrain, args.iterations, evals=[(dtest, "test")])
print ("Train Time: %s seconds" % (str(time.time() - tmp)))
param['silent'] = 1
param['tree_method'] = cpu_algorithm
print("Training with '%s'" % param['tree_method'])
tmp = time.time()
xgb.train(param, dtrain, args.iterations, evals=[(dtest, "test")])
print ("Time: %s seconds" % (str(time.time() - tmp)))
开发者ID:felixdae,项目名称:xgboost,代码行数:35,代码来源:benchmark.py
示例20: fit
def fit(self, X, y):
#data
data = X
label = y
skf = cross_validation.StratifiedKFold(label, n_folds=10)
for train_index, val_index in skf:
dtrain = xgb.DMatrix(data[train_index], label=label[train_index])
dval = xgb.DMatrix(data[val_index], label=label[val_index])
break
#set params
plst = self.param.items()
plst += [('eval_metric', 'merror')]
evallist = [ (dtrain,'train'), (dval,'eval')]
#train
if self.bst == None:
num_round = 100000
self.bst = xgb.train(plst, dtrain, num_round, evals=evallist, early_stopping_rounds=100)
self.best_score_ = 1-self.bst.best_score
self.best_params_= self.bst.best_iteration
#refit
dtrain = xgb.DMatrix(data, label=label)
num_round = self.best_params_
self.bst = xgb.train(plst, dtrain, num_round, evals=evallist)
开发者ID:g0ulash,项目名称:pumpitup,代码行数:26,代码来源:XGBoostClassifier.py
注:本文中的xgboost.train函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论