本文整理汇总了Python中sklearn.calibration.CalibratedClassifierCV类的典型用法代码示例。如果您正苦于以下问题:Python CalibratedClassifierCV类的具体用法?Python CalibratedClassifierCV怎么用?Python CalibratedClassifierCV使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了CalibratedClassifierCV类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: train_model_rfc_calibrated
def train_model_rfc_calibrated (features, labels) :
# First, set aside a some of the training set for calibration
# Use stratified shuffle split so that class ratios are maintained after the split
splitter = StratifiedShuffleSplit(labels, n_iter = 1, train_size = 0.7, random_state = 30)
# Length is 1 in this case since we have a single fold for splitting
print (len(splitter))
for train_idx, calib_idx in splitter:
features_train, features_calib = features[train_idx], features[calib_idx]
labels_train, labels_calib = labels[train_idx], labels[calib_idx]
print ("features_train shape: ", features_train.shape)
print ("features_calib shape: ", features_calib.shape)
print ("labels_train shape: ", labels_train.shape)
print ("labels_calib shape: ", labels_calib.shape)
print ("Performing Grid Search ...")
# params_dict = {'criterion': ['entropy'], 'n_estimators':[30, 35, 40, 45], 'max_depth':[5, 6], 'min_samples_leaf': [1, 2, 5], 'min_samples_split': [2, 5, 10]}
params_dict = {'criterion': ['entropy'], 'n_estimators':[60, 70, 80, 90], 'max_depth':[5, 6], 'min_samples_leaf': [1, 2, 5], 'min_samples_split': [2, 5, 10], 'max_features' : [6, 7, 8]}
clf = GridSearchCV(rfc(random_state = 30, n_jobs = 4), params_dict, scoring = 'roc_auc', cv = 5)
clf.fit(features_train, labels_train)
print ("Best estimator: ", clf.best_estimator_)
print ("Best best scores: %.4f" %(clf.best_score_))
# print ("Best grid scores: ", clf.grid_scores_)
# Perform calibration
# Use 'sigmoid' because sklearn cautions against using 'isotonic' for lesser than 1000 calibration samples as it can result in overfitting
print ("Performing Calibration now ...")
sigmoid = CalibratedClassifierCV(clf, cv='prefit', method='sigmoid')
sigmoid.fit(features_calib, labels_calib)
return sigmoid
开发者ID:sathishrvijay,项目名称:Kaggle-HumanVsRobot,代码行数:33,代码来源:classifier_exp.py
示例2: setTrainDataAndMakeModel
def setTrainDataAndMakeModel(X_train,Y_train,X_test):
clf = MultinomialNB(alpha=125535, class_prior=None, fit_prior=True)
calibrated_clf = CalibratedClassifierCV(clf, method='isotonic', cv=5)
calibrated_clf.fit(X_train, Y_train)
ypreds = calibrated_clf.predict_proba(X_test)
return ypreds
开发者ID:gokul180288,项目名称:Kaggle-1,代码行数:6,代码来源:MultinomialNB.py
示例3: predict
def predict(self, X, thres=0.5, return_proba=True):
"""
Predict class for X.
The predicted class of an input sample is a vote by the trees in
the forest, weighted by their probability estimates. That is,
the predicted class is the one with highest mean probability
estimate across the trees.
"""
if self._model == 'svc_lin':
from sklearn.base import clone
from sklearn.calibration import CalibratedClassifierCV
clf = CalibratedClassifierCV(clone(self._estimator).set_param(
**self._estimator.get_param()))
train_y = self._Xtrain[[self._rate_column]].values.ravel().tolist()
self._estimator = clf.fit(self._Xtrain, train_y)
proba = np.array(self._estimator.predict_proba(X))
if proba.shape[1] > 2:
pred = (proba > thres).astype(int)
else:
pred = (proba[:, 1] > thres).astype(int)
if return_proba:
return proba, pred
return pred
开发者ID:oesteban,项目名称:mriqc,代码行数:30,代码来源:helper.py
示例4: move_bias
def move_bias(self, data_matrix, estimator=None, nu=.5, cv=2):
'''
move bias until nu of data_matrix are in the negative class
then use scikits calibrate to calibrate self.estimator around the input
'''
# move bias
# l = [(estimator.decision_function(g)[0], g) for g in data_matrix]
# l.sort(key=lambda x: x[0])
# element = int(len(l) * nu)
# estimator.intercept_ -= l[element][0]
scores = [estimator.decision_function(sparse_vector)[0]
for sparse_vector in data_matrix]
scores_sorted = sorted(scores)
pivot = scores_sorted[int(len(scores_sorted) * self.nu)]
estimator.intercept_ -= pivot
# calibrate
if self.move_bias_recalibrate:
# data_matrix_binary = vstack([a[1] for a in l])
# data_y = numpy.asarray([0] * element + [1] * (len(l) - element))
data_y = numpy.asarray([1 if score >= pivot else -1 for score in scores])
self.testimator = SGDClassifier(loss='log')
self.testimator.fit(data_matrix, data_y)
# estimator = CalibratedClassifierCV(estimator, cv=cv, method='sigmoid')
estimator = CalibratedClassifierCV(self.testimator, cv=cv, method='sigmoid')
estimator.fit(data_matrix, data_y)
return estimator
开发者ID:smautner,项目名称:GraphLearn,代码行数:28,代码来源:estimate.py
示例5: svm_boost_calib_scale
def svm_boost_calib_scale(x,y,x_test,seed):
# normalize x+x_test
x_rows = x.shape[0]
X = preprocessing.scale(np.vstack((x,x_test)))
x = X[:x_rows,:]
x_test = X[x_rows:, :]
print x.shape
print x_test.shape
model = SVC(probability=True, class_weight='auto', random_state=seed,
C= 100,gamma=0.0)
boosted = AdaBoostClassifier(model, random_state=seed)
# avg CV AUC PLS
cv = StratifiedKFold(y, n_folds=10, random_state=seed)
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
calib = CalibratedClassifierCV(boosted, cv=10, method='isotonic')
for i, (train, test) in enumerate(cv):
probas_ = calib.fit(x[train], y[train]).predict_proba(x[test])
# Compute ROC curve and area the curve
fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
mean_tpr += interp(mean_fpr, fpr, tpr)
mean_tpr[0] = 0.0
mean_tpr /= len(cv)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
print('Training set 10CV AUC:\n{}'.format(mean_auc))
# return probs
#model = SVC(probability=True, random_state=seed)
#model = model.fit(x, y)
probs = np.average([cls.predict_proba(x_test) for cls in calib.calibrated_classifiers_], axis=0)
print probs.shape
#print('Training set acc:\n{}'.format(model2.score(x, y)))
#bids_test_probs = model2.predict_proba(x_test)
return probs
开发者ID:raresct,项目名称:fb_hr,代码行数:35,代码来源:make_sub.py
示例6: get_score
def get_score(self, params):
params['n_estimators'] = int(params['n_estimators'])
params['max_depth'] = int(params['max_depth'])
params['min_samples_split'] = int(params['min_samples_split'])
params['min_samples_leaf'] = int(params['min_samples_leaf'])
params['n_estimators'] = int(params['n_estimators'])
print('Training with params:')
print(params)
# cross validation here
scores = []
for train_ix, test_ix in makeKFold(5, self.y, 1):
X_train, y_train = self.X[train_ix, :], self.y[train_ix]
X_test, y_test = self.X[test_ix, :], self.y[test_ix]
weight = y_train.shape[0] / (2 * np.bincount(y_train))
sample_weight = np.array([weight[i] for i in y_train])
clf = RandomForestClassifier(**params)
cclf = CalibratedClassifierCV(base_estimator=clf,
method='isotonic',
cv=makeKFold(3, y_train, 1))
cclf.fit(X_train, y_train, sample_weight)
pred = cclf.predict(X_test)
scores.append(f1_score(y_true=y_test, y_pred=pred))
print(scores)
score = np.mean(scores)
print(score)
return {'loss': -score, 'status': STATUS_OK}
开发者ID:jingxiang-li,项目名称:kaggle-yelp,代码行数:31,代码来源:level3_model_rf.py
示例7: svm_calib
def svm_calib(x, y, x_test, seed):
model = SVC(probability=True, random_state=seed)
# avg CV AUC PLS
cv = StratifiedKFold(y, n_folds=10, random_state=seed)
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
calib = CalibratedClassifierCV(model, cv=10, method='isotonic')
for i, (train, test) in enumerate(cv):
probas_ = calib.fit(x[train], y[train]).predict_proba(x[test])
# Compute ROC curve and area the curve
fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
mean_tpr += interp(mean_fpr, fpr, tpr)
mean_tpr[0] = 0.0
mean_tpr /= len(cv)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
print('Training set 10CV AUC:\n{}'.format(mean_auc))
# return probs
#model = SVC(probability=True, random_state=seed)
#model = model.fit(x, y)
probs = np.average([cls.predict_proba(x_test) for cls in calib.calibrated_classifiers_], axis=0)
print probs.shape
#print('Training set acc:\n{}'.format(model2.score(x, y)))
#bids_test_probs = model2.predict_proba(x_test)
return probs
开发者ID:raresct,项目名称:fb_hr,代码行数:25,代码来源:make_sub.py
示例8: train
def train(model_id,train_x,train_y,valid_x,valid_y,test_x):
train_x,train_y=shuffle(train_x,train_y)
random_state=random.randint(0, 1000000)
print('random state: {state}'.format(state=random_state))
clf = RandomForestClassifier(bootstrap=False, class_weight=None,
criterion='entropy', max_depth=29008, max_features=36,
max_leaf_nodes=None, min_samples_leaf=5, min_samples_split=3,
min_weight_fraction_leaf=0.0, n_estimators=4494, n_jobs=8,
oob_score=False, random_state=979271, verbose=0,
warm_start=False)
clf.fit(train_x, train_y)
ccv = CalibratedClassifierCV(base_estimator=clf,method="sigmoid",cv="prefit")
ccv.fit(valid_x,valid_y)
valid_predictions = ccv.predict_proba(valid_x)
test_predictions= ccv.predict_proba(test_x)
loss = test(valid_y,valid_predictions,True)
if loss<0.52:
data.saveData(valid_predictions,"../valid_results/valid_"+str(model_id)+".csv")
data.saveData(test_predictions,"../results/results_"+str(model_id)+".csv")
开发者ID:hujiewang,项目名称:otto,代码行数:26,代码来源:rf2.py
示例9: svc_test2
def svc_test2():
"""
Submission:
E_val:
E_in:
E_out:
"""
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.cross_validation import StratifiedKFold
from sklearn.calibration import CalibratedClassifierCV
X, y = dataset.load_train()
raw_scaler = StandardScaler()
raw_scaler.fit(X)
X_scaled = raw_scaler.transform(X)
svc = SVC(kernel='linear', class_weight='auto', cache_size=10240)
svc.fit(X_scaled, y)
isotonic = CalibratedClassifierCV(svc, cv=StratifiedKFold(y, 5),
method='isotonic')
isotonic.fit(X_scaled, y)
logger.debug('Got best isotonic CalibratedClassifier.')
logger.debug('E_in (isotonic): %f', Util.auc_score(isotonic, X_scaled, y))
开发者ID:Divergent914,项目名称:yakddcup2015,代码行数:27,代码来源:modeling.py
示例10: simple_model
def simple_model(data, test):
targets = data.target
X, tX, y, ty = train_test_split(data.drop("target", axis=1),
targets,
test_size=0.2,
random_state=2016)
predictions = []
print("\n\nTraining")
# Sklearn GBM
clf = RandomForestClassifier(n_estimators=2500,
max_depth=2,
random_state=2015)
cal = CalibratedClassifierCV(clf, cv=5, method="isotonic")
cal.fit(X,y)
pred = cal.predict_proba(tX)[:,1]
print("\n\tValidation for Calibrated RFC")
print("\t", log_loss(ty, pred))
print("\t", roc_auc_score(ty, pred))
# ens["gbm"] = pred
predictions.append(cal.predict_proba(test)[:,1])
predictions = sum(predictions)/len(predictions)
return predictions
开发者ID:leonardodaniel,项目名称:kaggle_mosco,代码行数:30,代码来源:rfc.py
示例11: main
def main():
X, Y, encoder, scale = load_train_data('train.csv')
estimators = 500
X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=0.2, random_state=0)
X_train_real, X_test_real, Y_train_real, Y_test_real = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)
log.info('Loaded training file')
X_test, _ = load_csv_file('test.csv', cut_end=False)
log.info('Loaded test file')
#Classifier Setup
tree_clf = ExtraTreesClassifier(n_estimators=estimators, n_jobs=-1,
random_state=42, max_depth=55, min_samples_split=1)
clf = make_pipeline(TfidfTransformer(), DenseTransformer(), tree_clf)
log.info('Fitting GradientBoost')
clf.fit(X_train_real, Y_train_real)
clf_probs = clf.predict_proba(X_test_real)
score = log_loss(Y_test_real, clf_probs)
log.info('Log Loss score un-trained = %f' % score)
# Calibrate Classifier using ground truth in X,Y_valid
sig_clf = CalibratedClassifierCV(clf, method="isotonic", cv="prefit")
log.info('Fitting CalibratedClassifierCV')
sig_clf.fit(X_valid, Y_valid)
sig_clf_probs = sig_clf.predict_proba(X_test_real)
sig_score = log_loss(Y_test_real, sig_clf_probs)
log.info('Log loss score trained = %f' % sig_score)
# Ok lets predict the test data with our funky new classifier
sig_submission_probs = sig_clf.predict_proba(X_test)
write_out_submission(sig_submission_probs, 'submission.csv')
开发者ID:Almclean,项目名称:otto-group,代码行数:31,代码来源:main.py
示例12: internal_processing
def internal_processing(self, X, y, X_test):
"""
"""
Xs = np.hsplit(X, 5)
Xts = np.hsplit(X_test, 5)
Xts_cal = []
for i in range(len(Xs)):
Xts_cal.append(calibrate(Xs[i], y, Xts[i]))
XX_test = np.hstack(Xts_cal)
ec = EC(n_preds=5)
ec.fit(X, y)
y_ens = ec.predict_proba(XX_test)
# y_pred = ec.predict_proba(X_test)
#validation
yv = ec.predict_proba(X)
print 'Weights: %s' %(ec.w)
print 'Validation log-loss: %s' %(logloss_mc(y, yv))
cc = CalibratedClassifierCV(base_estimator=EC(n_preds=5),
method='isotonic', cv=10)
cc.fit(X, y)
y_cal = cc.predict_proba(XX_test)
y_pred = (y_ens + y_cal)/2.
return y_pred
开发者ID:chrinide,项目名称:kaggle_otto_group,代码行数:31,代码来源:ens_opt_cal.py
示例13: setTrainTestDataAndCheckModel
def setTrainTestDataAndCheckModel(X_train,Y_train,X_test,Y_test):
model = RandomForestClassifier(125)
model.fit(X_train,Y_train)
'''
clf = GridSearchCV(model,{'n_estimators':[100,125,150]},verbose=1)
clf.fit(X_train,Y_train)
print(clf.best_score_)
print(clf.best_params_)
output = model.predict(X_test)
print "-------------------RFC-----------------------"
#print accuracy_score(Y_test,output)
#print "%.2f" % log_loss(Y_test,output, eps=1e-15, normalize=True)
ypreds = model.predict_proba(X_test)
print "%.2f" % log_loss(Y_test,ypreds, eps=1e-15, normalize=True)
clfbag = BaggingClassifier(model, n_estimators=5)
clfbag.fit(X_train, Y_train)
ypreds = clfbag.predict(X_test)
#print accuracy_score(Y_test,ypreds)
ypreds = clfbag.predict_proba(X_test)
print "%.2f" % log_loss(Y_test,ypreds, eps=1e-15, normalize=True)
'''
calibrated_clf = CalibratedClassifierCV(model, method='isotonic', cv=5)
calibrated_clf.fit(X_train, Y_train)
#ypreds = calibrated_clf.predict(X_test)
#print accuracy_score(Y_test,ypreds)
ypreds = calibrated_clf.predict_proba(X_test)
print "%.2f" % log_loss(Y_test, ypreds, eps=1e-15, normalize=True)
开发者ID:gokul180288,项目名称:Kaggle-1,代码行数:34,代码来源:randomforestclassifier.py
示例14: calibrate_probs
def calibrate_probs(y_val, prob_val, prob_test, n_folds=2, method='isotonic', random_state=5968):
""" Calling from R:
suppressMessages(library("rPython")) # Load RPython
python.load("path/to/util_rpython.py")
data.pred.calib <- python.call('calibrate_probs',
y_val=y_val, # Actual values from validation
prob_val=pred_val, # Predicted values from validation
prob_test=pred_test) # Predicted values from test
# data.pred.calib will be a list, so to get the calibrated predictions for each value we do:
calib_pred_val = data.pred.calib$val
calib_pred_test = data.pred.calib$test
"""
y_val = np.asarray(y_val, dtype=float)
prob_val = np.asarray(prob_val, dtype=float).reshape((-1, 1))
prob_test = np.asarray(prob_test, dtype=float).reshape((-1, 1))
prob_clb_val = np.zeros(len(y_val))
prob_clb_test = np.zeros(len(prob_test))
kf_val_full = KFold(len(y_val), n_folds=n_folds, random_state=random_state)
for ix_train, ix_test in kf_val_full:
kf_val_inner = KFold(len(ix_train), n_folds=n_folds, random_state=random_state)
clf = CalibratedClassifierCV(method=method, cv=kf_val_inner)
clf.fit(prob_val[ix_train], y_val[ix_train])
prob_clb_val[ix_test] = clf.predict_proba(prob_val[ix_test])[:, 1]
prob_clb_test += clf.predict_proba(prob_test)[:, 1]/n_folds
return {'val': list(prob_clb_val), 'test': list(prob_clb_test)}
开发者ID:ChenglongChen,项目名称:avito_context_click_2015,代码行数:34,代码来源:util_rpython.py
示例15: test_sample_weight_warning
def test_sample_weight_warning():
n_samples = 100
X, y = make_classification(n_samples=2 * n_samples, n_features=6,
random_state=42)
sample_weight = np.random.RandomState(seed=42).uniform(size=len(y))
X_train, y_train, sw_train = \
X[:n_samples], y[:n_samples], sample_weight[:n_samples]
X_test = X[n_samples:]
for method in ['sigmoid', 'isotonic']:
base_estimator = LinearSVC(random_state=42)
calibrated_clf = CalibratedClassifierCV(base_estimator, method=method)
# LinearSVC does not currently support sample weights but they
# can still be used for the calibration step (with a warning)
msg = "LinearSVC does not support sample_weight."
assert_warns_message(
UserWarning, msg,
calibrated_clf.fit, X_train, y_train, sample_weight=sw_train)
probs_with_sw = calibrated_clf.predict_proba(X_test)
# As the weights are used for the calibration, they should still yield
# a different predictions
calibrated_clf.fit(X_train, y_train)
probs_without_sw = calibrated_clf.predict_proba(X_test)
diff = np.linalg.norm(probs_with_sw - probs_without_sw)
assert_greater(diff, 0.1)
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:28,代码来源:test_calibration.py
示例16: calibrate
def calibrate(X_val, y_val, estimator):
clf = CalibratedClassifierCV(base_estimator=estimator,
method='isotonic', cv='prefit')
clf.fit(X_val, y_val)
return clf
开发者ID:fnd212,项目名称:ML2016_EDU,代码行数:7,代码来源:model_calibration.py
示例17: prepare_model
def prepare_model(self, obj_fn=None, num_steps=None, model_params=None, batch_size: int = None):
model = CalibratedClassifierCV(KNeighborsClassifier(**model_params), method="sigmoid")
model_clf = model.fit(self.ds[self.data_groups["data_train_group"]].to_ndarray(),
self.ds[self.data_groups["target_train_group"]].to_ndarray())
cal_model = CalibratedClassifierCV(model_clf, method="sigmoid", cv="prefit")
cal_model.fit(self.ds[self.data_groups["data_validation_group"]].to_ndarray(),
self.ds[self.data_groups["target_validation_group"]].to_ndarray())
return self.ml_model(cal_model)
开发者ID:elaeon,项目名称:ML,代码行数:8,代码来源:w_sklearn.py
示例18: get_model
def get_model(params, X, y):
clf = RandomForestClassifier(**params)
cclf = CalibratedClassifierCV(base_estimator=clf,
method='isotonic',
cv=makeKFold(3, y, 1))
weight = y.shape[0] / (2 * np.bincount(y))
sample_weight = np.array([weight[i] for i in y])
cclf.fit(X, y, sample_weight)
return cclf
开发者ID:jingxiang-li,项目名称:kaggle-yelp,代码行数:9,代码来源:level3_model_rf.py
示例19: train_test
def train_test(self, X, y, X_test):
"""
"""
sss = StratifiedShuffleSplit(y, 1, test_size=0.5)
for train_id, valid_id in sss:
X0, X1 = X[train_id], X[valid_id]
y0, y1 = y[train_id], y[valid_id]
#First half
w0 = np.zeros(len(y0))
for i in range(len(w0)):
w0[i] = self.w[int(y0[i])]
xg0_train = DMatrix(X0, label=y0, weight=w0)
xg0_test = DMatrix(X1, label=y1)
xgt_test = DMatrix(X_test)
bst0 = my_train_xgboost(self.param, xg0_train, self.num_round)
y0_pred = bst0.predict(xg0_test).reshape(X1.shape[0], 9)
yt_pred = bst0.predict(xgt_test).reshape(X_test.shape[0], 9)
#Calibrated RF
rf = RandomForestClassifier(n_estimators=600, criterion='gini',
class_weight='auto', max_features='auto')
cal = CalibratedClassifierCV(rf, method='isotonic', cv=3)
cal.fit(X0, y0)
y0_cal = cal.predict_proba(X1)
yt_cal = cal.predict_proba(X_test)
#Second half
ss = StandardScaler()
y0_pred = ss.fit_transform(y0_pred)
yt_pred = ss.fit_transform(yt_pred)
y0_cal = ss.fit_transform(y0_cal)
yt_cal = ss.fit_transform(yt_cal)
X1 = np.hstack((X1, y0_pred, y0_cal))
X_test = np.hstack((X_test, yt_pred, yt_cal))
w1 = np.zeros(len(y1))
# self.param['eta'] = 0.01
self.num_round = 450
for i in range(len(w1)):
w1[i] = self.w[int(y1[i])]
xg1_train = DMatrix(X1, label=y1, weight=w1)
xg_test= DMatrix(X_test)
bst1 = my_train_xgboost(self.param, xg1_train, self.num_round)
y_pred = bst1.predict(xg_test).reshape(X_test.shape[0], 9)
return y_pred
开发者ID:chrinide,项目名称:kaggle_otto_group,代码行数:49,代码来源:clf_xgboost_split.py
示例20: train_validate
def train_validate(self, X_train, y_train, X_valid, y_valid):
"""
"""
sss = StratifiedShuffleSplit(y_train, 1, test_size=0.5)
for train_id, valid_id in sss:
X0_train, X1_train = X_train[train_id], X_train[valid_id]
y0_train, y1_train = y_train[train_id], y_train[valid_id]
#First half
w0_train = np.zeros(len(y0_train))
for i in range(len(w0_train)):
w0_train[i] = self.w[int(y0_train[i])]
xg0_train = DMatrix(X0_train, label=y0_train, weight=w0_train)
xg0_valid = DMatrix(X1_train, label=y1_train)
xgv_valid = DMatrix(X_valid, label=y_valid)
watchlist = [(xg0_train,'train'), (xg0_valid, 'validation0')]
# bst0 = train(self.param, xg0_train, self.num_round, watchlist)
bst0 = my_train_xgboost(self.param, xg0_train, self.num_round, watchlist)
y0_pred = bst0.predict(xg0_valid).reshape(X1_train.shape[0], 9)
yv_pred = bst0.predict(xgv_valid).reshape(X_valid.shape[0], 9)
#Calibrated RF
rf = RandomForestClassifier(n_estimators=600, criterion='gini',
class_weight='auto', max_features='auto')
cal = CalibratedClassifierCV(rf, method='isotonic', cv=3)
cal.fit(X0_train, y0_train)
y0_cal = cal.predict_proba(X1_train)
yv_cal = cal.predict_proba(X_valid)
#Second half
ss = StandardScaler()
y0_pred = ss.fit_transform(y0_pred)
yv_pred = ss.fit_transform(yv_pred)
y0_cal = ss.fit_transform(y0_cal)
yv_cal = ss.fit_transform(yv_cal)
X1_train = np.hstack((X1_train, y0_pred, y0_cal))
X_valid = np.hstack((X_valid, yv_pred, yv_cal))
w1_train = np.zeros(len(y1_train))
# self.param['eta'] = 0.05
self.num_round = 450
for i in range(len(w1_train)):
w1_train[i] = self.w[int(y1_train[i])]
xg1_train = DMatrix(X1_train, label=y1_train, weight=w1_train)
xg_valid = DMatrix(X_valid, label=y_valid)
watchlist = [(xg1_train,'train'), (xg_valid, 'validation')]
# bst1 = train(self.param, xg1_train, self.num_round, watchlist)
bst1 = my_train_xgboost(self.param, xg1_train, self.num_round, watchlist)
y_pred = bst1.predict(xg_valid).reshape(X_valid.shape[0], 9)
# pdb.set_trace()
return y_pred
开发者ID:chrinide,项目名称:kaggle_otto_group,代码行数:56,代码来源:clf_xgboost_split.py
注:本文中的sklearn.calibration.CalibratedClassifierCV类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论