• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python calibration.CalibratedClassifierCV类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.calibration.CalibratedClassifierCV的典型用法代码示例。如果您正苦于以下问题:Python CalibratedClassifierCV类的具体用法?Python CalibratedClassifierCV怎么用?Python CalibratedClassifierCV使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了CalibratedClassifierCV类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: train_model_rfc_calibrated

def train_model_rfc_calibrated (features, labels) :
	# First, set aside a some of the training set for calibration
	# Use stratified shuffle split so that class ratios are maintained after the split
	splitter = StratifiedShuffleSplit(labels, n_iter = 1, train_size = 0.7, random_state = 30)

	# Length is 1 in this case since we have a single fold for splitting
	print (len(splitter))

	for train_idx, calib_idx in splitter:
		features_train, features_calib = features[train_idx], features[calib_idx]
		labels_train, labels_calib = labels[train_idx], labels[calib_idx]

	print ("features_train shape: ", features_train.shape)
	print ("features_calib shape: ", features_calib.shape)
	print ("labels_train shape: ", labels_train.shape)
	print ("labels_calib shape: ", labels_calib.shape)
		
	print ("Performing Grid Search ...")
	# params_dict = {'criterion': ['entropy'], 'n_estimators':[30, 35, 40, 45], 'max_depth':[5, 6], 'min_samples_leaf': [1, 2, 5], 'min_samples_split': [2, 5, 10]}
	params_dict = {'criterion': ['entropy'], 'n_estimators':[60, 70, 80, 90], 'max_depth':[5, 6], 'min_samples_leaf': [1, 2, 5], 'min_samples_split': [2, 5, 10], 'max_features' : [6, 7, 8]}
	clf = GridSearchCV(rfc(random_state = 30, n_jobs = 4), params_dict, scoring = 'roc_auc', cv = 5)
	clf.fit(features_train, labels_train)

	print ("Best estimator: ", clf.best_estimator_)
	print ("Best best scores: %.4f" %(clf.best_score_))
	# print ("Best grid scores: ", clf.grid_scores_)

	# Perform calibration 
	# Use 'sigmoid' because sklearn cautions against using 'isotonic' for lesser than 1000 calibration samples as it can result in overfitting
	print ("Performing Calibration now ...")
	sigmoid = CalibratedClassifierCV(clf, cv='prefit', method='sigmoid')
	sigmoid.fit(features_calib, labels_calib)
	return sigmoid
开发者ID:sathishrvijay,项目名称:Kaggle-HumanVsRobot,代码行数:33,代码来源:classifier_exp.py


示例2: setTrainDataAndMakeModel

def setTrainDataAndMakeModel(X_train,Y_train,X_test):
    clf = MultinomialNB(alpha=125535, class_prior=None, fit_prior=True)
    calibrated_clf = CalibratedClassifierCV(clf, method='isotonic', cv=5)
    calibrated_clf.fit(X_train, Y_train)
    ypreds = calibrated_clf.predict_proba(X_test)    
    return ypreds
    
开发者ID:gokul180288,项目名称:Kaggle-1,代码行数:6,代码来源:MultinomialNB.py


示例3: predict

    def predict(self, X, thres=0.5, return_proba=True):
        """

        Predict class for X.
        The predicted class of an input sample is a vote by the trees in
        the forest, weighted by their probability estimates. That is,
        the predicted class is the one with highest mean probability
        estimate across the trees.

        """

        if self._model == 'svc_lin':
            from sklearn.base import clone
            from sklearn.calibration import CalibratedClassifierCV
            clf = CalibratedClassifierCV(clone(self._estimator).set_param(
                **self._estimator.get_param()))
            train_y = self._Xtrain[[self._rate_column]].values.ravel().tolist()
            self._estimator = clf.fit(self._Xtrain, train_y)

        proba = np.array(self._estimator.predict_proba(X))

        if proba.shape[1] > 2:
            pred = (proba > thres).astype(int)
        else:
            pred = (proba[:, 1] > thres).astype(int)

        if return_proba:
            return proba, pred

        return pred
开发者ID:oesteban,项目名称:mriqc,代码行数:30,代码来源:helper.py


示例4: move_bias

    def move_bias(self, data_matrix, estimator=None, nu=.5, cv=2):
        '''
            move bias until nu of data_matrix are in the negative class
            then use scikits calibrate to calibrate self.estimator around the input
        '''
        #  move bias
        # l = [(estimator.decision_function(g)[0], g) for g in data_matrix]
        # l.sort(key=lambda x: x[0])
        # element = int(len(l) * nu)
        # estimator.intercept_ -= l[element][0]

        scores = [estimator.decision_function(sparse_vector)[0]
                  for sparse_vector in data_matrix]
        scores_sorted = sorted(scores)
        pivot = scores_sorted[int(len(scores_sorted) * self.nu)]
        estimator.intercept_ -= pivot

        # calibrate
        if self.move_bias_recalibrate:
            # data_matrix_binary = vstack([a[1] for a in l])
            # data_y = numpy.asarray([0] * element + [1] * (len(l) - element))
            data_y = numpy.asarray([1 if score >= pivot else -1 for score in scores])
            self.testimator = SGDClassifier(loss='log')
            self.testimator.fit(data_matrix, data_y)
            # estimator = CalibratedClassifierCV(estimator, cv=cv, method='sigmoid')
            estimator = CalibratedClassifierCV(self.testimator, cv=cv, method='sigmoid')
            estimator.fit(data_matrix, data_y)
        return estimator
开发者ID:smautner,项目名称:GraphLearn,代码行数:28,代码来源:estimate.py


示例5: svm_boost_calib_scale

def svm_boost_calib_scale(x,y,x_test,seed):
    # normalize x+x_test
    x_rows = x.shape[0]
    X = preprocessing.scale(np.vstack((x,x_test)))
    x = X[:x_rows,:]
    x_test = X[x_rows:, :]
    print x.shape
    print x_test.shape

    model = SVC(probability=True, class_weight='auto', random_state=seed,
        C= 100,gamma=0.0)
    boosted = AdaBoostClassifier(model, random_state=seed)
    # avg CV AUC PLS
    cv = StratifiedKFold(y, n_folds=10, random_state=seed)
    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    calib = CalibratedClassifierCV(boosted, cv=10, method='isotonic')
    for i, (train, test) in enumerate(cv):
        probas_ = calib.fit(x[train], y[train]).predict_proba(x[test])
        # Compute ROC curve and area the curve
        fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
    mean_tpr /= len(cv)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    print('Training set 10CV AUC:\n{}'.format(mean_auc))
    # return probs
    #model = SVC(probability=True, random_state=seed)
    #model = model.fit(x, y)
    probs = np.average([cls.predict_proba(x_test) for cls in calib.calibrated_classifiers_], axis=0)
    print probs.shape
    #print('Training set acc:\n{}'.format(model2.score(x, y)))
    #bids_test_probs = model2.predict_proba(x_test)
    return probs
开发者ID:raresct,项目名称:fb_hr,代码行数:35,代码来源:make_sub.py


示例6: get_score

    def get_score(self, params):
        params['n_estimators'] = int(params['n_estimators'])
        params['max_depth'] = int(params['max_depth'])
        params['min_samples_split'] = int(params['min_samples_split'])
        params['min_samples_leaf'] = int(params['min_samples_leaf'])
        params['n_estimators'] = int(params['n_estimators'])

        print('Training with params:')
        print(params)

        # cross validation here
        scores = []
        for train_ix, test_ix in makeKFold(5, self.y, 1):
            X_train, y_train = self.X[train_ix, :], self.y[train_ix]
            X_test, y_test = self.X[test_ix, :], self.y[test_ix]
            weight = y_train.shape[0] / (2 * np.bincount(y_train))
            sample_weight = np.array([weight[i] for i in y_train])

            clf = RandomForestClassifier(**params)
            cclf = CalibratedClassifierCV(base_estimator=clf,
                                          method='isotonic',
                                          cv=makeKFold(3, y_train, 1))
            cclf.fit(X_train, y_train, sample_weight)
            pred = cclf.predict(X_test)
            scores.append(f1_score(y_true=y_test, y_pred=pred))

        print(scores)
        score = np.mean(scores)

        print(score)
        return {'loss': -score, 'status': STATUS_OK}
开发者ID:jingxiang-li,项目名称:kaggle-yelp,代码行数:31,代码来源:level3_model_rf.py


示例7: svm_calib

def svm_calib(x, y, x_test, seed):
    model = SVC(probability=True, random_state=seed)
    # avg CV AUC PLS
    cv = StratifiedKFold(y, n_folds=10, random_state=seed)
    mean_tpr = 0.0
    mean_fpr = np.linspace(0, 1, 100)
    calib = CalibratedClassifierCV(model, cv=10, method='isotonic')
    for i, (train, test) in enumerate(cv):
        probas_ = calib.fit(x[train], y[train]).predict_proba(x[test])
        # Compute ROC curve and area the curve
        fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
        mean_tpr += interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0
    mean_tpr /= len(cv)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    print('Training set 10CV AUC:\n{}'.format(mean_auc))
    # return probs
    #model = SVC(probability=True, random_state=seed)
    #model = model.fit(x, y)
    probs = np.average([cls.predict_proba(x_test) for cls in calib.calibrated_classifiers_], axis=0)
    print probs.shape
    #print('Training set acc:\n{}'.format(model2.score(x, y)))
    #bids_test_probs = model2.predict_proba(x_test)
    return probs
开发者ID:raresct,项目名称:fb_hr,代码行数:25,代码来源:make_sub.py


示例8: train

def train(model_id,train_x,train_y,valid_x,valid_y,test_x):
    train_x,train_y=shuffle(train_x,train_y)


    random_state=random.randint(0, 1000000)
    print('random state: {state}'.format(state=random_state))

    clf = RandomForestClassifier(bootstrap=False, class_weight=None,
            criterion='entropy', max_depth=29008, max_features=36,
            max_leaf_nodes=None, min_samples_leaf=5, min_samples_split=3,
            min_weight_fraction_leaf=0.0, n_estimators=4494, n_jobs=8,
            oob_score=False, random_state=979271, verbose=0,
            warm_start=False)

    clf.fit(train_x, train_y)

    ccv = CalibratedClassifierCV(base_estimator=clf,method="sigmoid",cv="prefit")
    ccv.fit(valid_x,valid_y)

    valid_predictions = ccv.predict_proba(valid_x)
    test_predictions= ccv.predict_proba(test_x)

    loss = test(valid_y,valid_predictions,True)
    if  loss<0.52:
        data.saveData(valid_predictions,"../valid_results/valid_"+str(model_id)+".csv")
        data.saveData(test_predictions,"../results/results_"+str(model_id)+".csv")
开发者ID:hujiewang,项目名称:otto,代码行数:26,代码来源:rf2.py


示例9: svc_test2

def svc_test2():
    """
    Submission:
    E_val:
    E_in:
    E_out:
    """
    from sklearn.preprocessing import StandardScaler
    from sklearn.svm import SVC
    from sklearn.cross_validation import StratifiedKFold
    from sklearn.calibration import CalibratedClassifierCV

    X, y = dataset.load_train()

    raw_scaler = StandardScaler()
    raw_scaler.fit(X)
    X_scaled = raw_scaler.transform(X)

    svc = SVC(kernel='linear', class_weight='auto', cache_size=10240)
    svc.fit(X_scaled, y)

    isotonic = CalibratedClassifierCV(svc, cv=StratifiedKFold(y, 5),
                                      method='isotonic')
    isotonic.fit(X_scaled, y)

    logger.debug('Got best isotonic CalibratedClassifier.')
    logger.debug('E_in (isotonic): %f', Util.auc_score(isotonic, X_scaled, y))
开发者ID:Divergent914,项目名称:yakddcup2015,代码行数:27,代码来源:modeling.py


示例10: simple_model

def simple_model(data, test):
    targets = data.target
    X, tX, y, ty = train_test_split(data.drop("target", axis=1), 
                                              targets, 
                                              test_size=0.2,
                                              random_state=2016)
                                              
    
    predictions = []
    
    print("\n\nTraining")
    # Sklearn GBM
    clf = RandomForestClassifier(n_estimators=2500,  
                                 max_depth=2,
                                 random_state=2015)
                                     
    cal = CalibratedClassifierCV(clf, cv=5, method="isotonic")
    cal.fit(X,y)
    
    pred = cal.predict_proba(tX)[:,1]
    print("\n\tValidation for Calibrated RFC")
    print("\t", log_loss(ty, pred))
    print("\t", roc_auc_score(ty, pred))
    
    # ens["gbm"] = pred
    predictions.append(cal.predict_proba(test)[:,1])
    
    predictions = sum(predictions)/len(predictions)
    
    return predictions
开发者ID:leonardodaniel,项目名称:kaggle_mosco,代码行数:30,代码来源:rfc.py


示例11: main

def main():
    X, Y, encoder, scale = load_train_data('train.csv')
    estimators = 500
    X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=0.2, random_state=0)
    X_train_real, X_test_real, Y_train_real, Y_test_real = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)
    log.info('Loaded training file')
    X_test, _ = load_csv_file('test.csv', cut_end=False)
    log.info('Loaded test file')

    #Classifier Setup
    tree_clf = ExtraTreesClassifier(n_estimators=estimators, n_jobs=-1,
                                    random_state=42, max_depth=55, min_samples_split=1)

    clf = make_pipeline(TfidfTransformer(), DenseTransformer(), tree_clf)
    log.info('Fitting GradientBoost')
    clf.fit(X_train_real, Y_train_real)
    clf_probs = clf.predict_proba(X_test_real)
    score = log_loss(Y_test_real, clf_probs)
    log.info('Log Loss score un-trained = %f' % score)
    # Calibrate Classifier using ground truth in X,Y_valid
    sig_clf = CalibratedClassifierCV(clf, method="isotonic", cv="prefit")
    log.info('Fitting CalibratedClassifierCV')
    sig_clf.fit(X_valid, Y_valid)
    sig_clf_probs = sig_clf.predict_proba(X_test_real)
    sig_score = log_loss(Y_test_real, sig_clf_probs)
    log.info('Log loss score trained = %f' % sig_score)

    # Ok lets predict the test data with our funky new classifier
    sig_submission_probs = sig_clf.predict_proba(X_test)

    write_out_submission(sig_submission_probs, 'submission.csv')
开发者ID:Almclean,项目名称:otto-group,代码行数:31,代码来源:main.py


示例12: internal_processing

    def internal_processing(self, X, y, X_test):
        """
        """  
        Xs = np.hsplit(X, 5)
        Xts = np.hsplit(X_test, 5)
        Xts_cal = []
        
        for i in range(len(Xs)):           
            Xts_cal.append(calibrate(Xs[i], y, Xts[i]))
         
        XX_test = np.hstack(Xts_cal)   
        
        ec = EC(n_preds=5)
        ec.fit(X, y)
        y_ens = ec.predict_proba(XX_test)
#        y_pred = ec.predict_proba(X_test)
        
        #validation
        yv = ec.predict_proba(X)
        print 'Weights: %s' %(ec.w)
        print 'Validation log-loss: %s' %(logloss_mc(y, yv))
        
        cc = CalibratedClassifierCV(base_estimator=EC(n_preds=5), 
                                    method='isotonic', cv=10)
                                    
        cc.fit(X, y)
        y_cal = cc.predict_proba(XX_test)
        
        y_pred = (y_ens + y_cal)/2.
         
        return y_pred       
开发者ID:chrinide,项目名称:kaggle_otto_group,代码行数:31,代码来源:ens_opt_cal.py


示例13: setTrainTestDataAndCheckModel

def setTrainTestDataAndCheckModel(X_train,Y_train,X_test,Y_test):
    model = RandomForestClassifier(125)
    model.fit(X_train,Y_train)
    '''
    clf = GridSearchCV(model,{'n_estimators':[100,125,150]},verbose=1)
    
    clf.fit(X_train,Y_train)
    print(clf.best_score_)
    print(clf.best_params_)    
    
    output = model.predict(X_test)
    print "-------------------RFC-----------------------"
    #print accuracy_score(Y_test,output)
    #print "%.2f" % log_loss(Y_test,output, eps=1e-15, normalize=True)
    
    ypreds = model.predict_proba(X_test)
    print "%.2f" % log_loss(Y_test,ypreds, eps=1e-15, normalize=True)

    
    clfbag = BaggingClassifier(model, n_estimators=5)
    clfbag.fit(X_train, Y_train)
    ypreds = clfbag.predict(X_test)    
    #print accuracy_score(Y_test,ypreds)    
    
    ypreds = clfbag.predict_proba(X_test)
    print "%.2f" % log_loss(Y_test,ypreds, eps=1e-15, normalize=True)
    '''
    calibrated_clf = CalibratedClassifierCV(model, method='isotonic', cv=5)
    calibrated_clf.fit(X_train, Y_train)
    #ypreds = calibrated_clf.predict(X_test)
    #print accuracy_score(Y_test,ypreds)
    
    ypreds = calibrated_clf.predict_proba(X_test)
    print "%.2f" % log_loss(Y_test, ypreds, eps=1e-15, normalize=True)
开发者ID:gokul180288,项目名称:Kaggle-1,代码行数:34,代码来源:randomforestclassifier.py


示例14: calibrate_probs

def calibrate_probs(y_val, prob_val, prob_test, n_folds=2, method='isotonic', random_state=5968):
    """ Calling from R:

        suppressMessages(library("rPython")) # Load RPython
        python.load("path/to/util_rpython.py")

        data.pred.calib <- python.call('calibrate_probs',
                                   y_val=y_val, # Actual values from validation
                                   prob_val=pred_val, # Predicted values from validation
                                   prob_test=pred_test) # Predicted values from test

        # data.pred.calib will be a list, so to get the calibrated predictions for each value we do:
        calib_pred_val = data.pred.calib$val
        calib_pred_test = data.pred.calib$test

    """

    y_val = np.asarray(y_val, dtype=float)
    prob_val = np.asarray(prob_val, dtype=float).reshape((-1, 1))
    prob_test = np.asarray(prob_test, dtype=float).reshape((-1, 1))

    prob_clb_val = np.zeros(len(y_val))
    prob_clb_test = np.zeros(len(prob_test))

    kf_val_full = KFold(len(y_val), n_folds=n_folds, random_state=random_state)

    for ix_train, ix_test in kf_val_full:
        kf_val_inner = KFold(len(ix_train), n_folds=n_folds, random_state=random_state)
        clf = CalibratedClassifierCV(method=method, cv=kf_val_inner)
        clf.fit(prob_val[ix_train], y_val[ix_train])
        prob_clb_val[ix_test] = clf.predict_proba(prob_val[ix_test])[:, 1]
        prob_clb_test += clf.predict_proba(prob_test)[:, 1]/n_folds

    return {'val': list(prob_clb_val), 'test': list(prob_clb_test)}
开发者ID:ChenglongChen,项目名称:avito_context_click_2015,代码行数:34,代码来源:util_rpython.py


示例15: test_sample_weight_warning

def test_sample_weight_warning():
    n_samples = 100
    X, y = make_classification(n_samples=2 * n_samples, n_features=6,
                               random_state=42)

    sample_weight = np.random.RandomState(seed=42).uniform(size=len(y))
    X_train, y_train, sw_train = \
        X[:n_samples], y[:n_samples], sample_weight[:n_samples]
    X_test = X[n_samples:]

    for method in ['sigmoid', 'isotonic']:
        base_estimator = LinearSVC(random_state=42)
        calibrated_clf = CalibratedClassifierCV(base_estimator, method=method)
        # LinearSVC does not currently support sample weights but they
        # can still be used for the calibration step (with a warning)
        msg = "LinearSVC does not support sample_weight."
        assert_warns_message(
            UserWarning, msg,
            calibrated_clf.fit, X_train, y_train, sample_weight=sw_train)
        probs_with_sw = calibrated_clf.predict_proba(X_test)

        # As the weights are used for the calibration, they should still yield
        # a different predictions
        calibrated_clf.fit(X_train, y_train)
        probs_without_sw = calibrated_clf.predict_proba(X_test)

        diff = np.linalg.norm(probs_with_sw - probs_without_sw)
        assert_greater(diff, 0.1)
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:28,代码来源:test_calibration.py


示例16: calibrate

def calibrate(X_val, y_val, estimator):

    clf = CalibratedClassifierCV(base_estimator=estimator, 
                                method='isotonic', cv='prefit')

    clf.fit(X_val, y_val)
    return clf
开发者ID:fnd212,项目名称:ML2016_EDU,代码行数:7,代码来源:model_calibration.py


示例17: prepare_model

 def prepare_model(self, obj_fn=None, num_steps=None, model_params=None, batch_size: int = None):
     model = CalibratedClassifierCV(KNeighborsClassifier(**model_params), method="sigmoid")
     model_clf = model.fit(self.ds[self.data_groups["data_train_group"]].to_ndarray(),
                           self.ds[self.data_groups["target_train_group"]].to_ndarray())
     cal_model = CalibratedClassifierCV(model_clf, method="sigmoid", cv="prefit")
     cal_model.fit(self.ds[self.data_groups["data_validation_group"]].to_ndarray(),
                   self.ds[self.data_groups["target_validation_group"]].to_ndarray())
     return self.ml_model(cal_model)
开发者ID:elaeon,项目名称:ML,代码行数:8,代码来源:w_sklearn.py


示例18: get_model

def get_model(params, X, y):
    clf = RandomForestClassifier(**params)
    cclf = CalibratedClassifierCV(base_estimator=clf,
                                  method='isotonic',
                                  cv=makeKFold(3, y, 1))
    weight = y.shape[0] / (2 * np.bincount(y))
    sample_weight = np.array([weight[i] for i in y])
    cclf.fit(X, y, sample_weight)
    return cclf
开发者ID:jingxiang-li,项目名称:kaggle-yelp,代码行数:9,代码来源:level3_model_rf.py


示例19: train_test

    def train_test(self, X, y, X_test):
        """
        """
        sss = StratifiedShuffleSplit(y, 1, test_size=0.5)    
        for train_id, valid_id in sss:
            X0, X1 = X[train_id], X[valid_id]
            y0, y1 = y[train_id], y[valid_id]  
            
        #First half
        
        w0 = np.zeros(len(y0))
        for i in range(len(w0)):
            w0[i] = self.w[int(y0[i])]
        xg0_train = DMatrix(X0, label=y0, weight=w0)  
        xg0_test = DMatrix(X1, label=y1)   
        xgt_test = DMatrix(X_test)
        bst0 = my_train_xgboost(self.param, xg0_train, self.num_round)
        y0_pred = bst0.predict(xg0_test).reshape(X1.shape[0], 9)
        yt_pred = bst0.predict(xgt_test).reshape(X_test.shape[0], 9)
        
        #Calibrated RF
        rf = RandomForestClassifier(n_estimators=600, criterion='gini', 
                class_weight='auto', max_features='auto')
        cal = CalibratedClassifierCV(rf, method='isotonic', cv=3)
        cal.fit(X0, y0)
        y0_cal = cal.predict_proba(X1)
        yt_cal = cal.predict_proba(X_test)
        
        #Second half
        ss = StandardScaler()
        y0_pred = ss.fit_transform(y0_pred)
        yt_pred = ss.fit_transform(yt_pred)
        y0_cal = ss.fit_transform(y0_cal)
        yt_cal = ss.fit_transform(yt_cal)
        X1 = np.hstack((X1, y0_pred, y0_cal))
        X_test = np.hstack((X_test, yt_pred, yt_cal))  
        w1 = np.zeros(len(y1))
        
#        self.param['eta'] = 0.01
        self.num_round = 450

        for i in range(len(w1)):
            w1[i] = self.w[int(y1[i])]
        xg1_train = DMatrix(X1, label=y1, weight=w1)    
        xg_test= DMatrix(X_test)
        bst1 = my_train_xgboost(self.param, xg1_train, self.num_round)
        y_pred = bst1.predict(xg_test).reshape(X_test.shape[0], 9)
        
        return y_pred






                    
        
开发者ID:chrinide,项目名称:kaggle_otto_group,代码行数:49,代码来源:clf_xgboost_split.py


示例20: train_validate

    def train_validate(self, X_train, y_train, X_valid, y_valid):
        """
        """
        sss = StratifiedShuffleSplit(y_train, 1, test_size=0.5)    
        for train_id, valid_id in sss:
            X0_train, X1_train = X_train[train_id], X_train[valid_id]
            y0_train, y1_train = y_train[train_id], y_train[valid_id]  
            
        #First half
       
        w0_train = np.zeros(len(y0_train))
        for i in range(len(w0_train)):
            w0_train[i] = self.w[int(y0_train[i])]
        xg0_train = DMatrix(X0_train, label=y0_train, weight=w0_train)  
        xg0_valid = DMatrix(X1_train, label=y1_train)   
        xgv_valid = DMatrix(X_valid, label=y_valid)
        watchlist = [(xg0_train,'train'), (xg0_valid, 'validation0')]
        
#        bst0 = train(self.param, xg0_train, self.num_round, watchlist)
        bst0 = my_train_xgboost(self.param, xg0_train, self.num_round, watchlist)
        y0_pred = bst0.predict(xg0_valid).reshape(X1_train.shape[0], 9)
        yv_pred = bst0.predict(xgv_valid).reshape(X_valid.shape[0], 9)
        
        #Calibrated RF
        rf = RandomForestClassifier(n_estimators=600, criterion='gini', 
                                    class_weight='auto', max_features='auto')
        cal = CalibratedClassifierCV(rf, method='isotonic', cv=3)        
        cal.fit(X0_train, y0_train)
        y0_cal = cal.predict_proba(X1_train)
        yv_cal = cal.predict_proba(X_valid)
        
        #Second half
        ss = StandardScaler()
        y0_pred = ss.fit_transform(y0_pred)
        yv_pred = ss.fit_transform(yv_pred)
        y0_cal = ss.fit_transform(y0_cal)
        yv_cal = ss.fit_transform(yv_cal)
        X1_train = np.hstack((X1_train, y0_pred, y0_cal))
        X_valid = np.hstack((X_valid, yv_pred, yv_cal))        
        w1_train = np.zeros(len(y1_train))
        
#        self.param['eta'] = 0.05
        self.num_round = 450

        for i in range(len(w1_train)):
            w1_train[i] = self.w[int(y1_train[i])]
        xg1_train = DMatrix(X1_train, label=y1_train, weight=w1_train)    
        xg_valid = DMatrix(X_valid, label=y_valid)
        watchlist = [(xg1_train,'train'), (xg_valid, 'validation')]
        
#        bst1 = train(self.param, xg1_train, self.num_round, watchlist)
        bst1 = my_train_xgboost(self.param, xg1_train, self.num_round, watchlist)
        y_pred = bst1.predict(xg_valid).reshape(X_valid.shape[0], 9)

#        pdb.set_trace()
        return y_pred
开发者ID:chrinide,项目名称:kaggle_otto_group,代码行数:56,代码来源:clf_xgboost_split.py



注:本文中的sklearn.calibration.CalibratedClassifierCV类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python cluster.affinity_propagation函数代码示例发布时间:2022-05-27
下一篇:
Python base.is_classifier函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap