• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python xgboost.plot_importance函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中xgboost.plot_importance函数的典型用法代码示例。如果您正苦于以下问题:Python plot_importance函数的具体用法?Python plot_importance怎么用?Python plot_importance使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了plot_importance函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: train_helper

def train_helper(X_train, X_test, y_train, y_test, model_name):
    xg_train = xgboost.DMatrix( X_train, label=y_train)
    xg_test = xgboost.DMatrix(X_test, label=y_test)

    le = load_label_encoder(model_name)

    param = {}
    # use softmax multi-class classification
    param['objective'] = 'multi:softprob'
    param['eta'] = 0.002
    param['max_depth'] = 7
    param['nthread'] = 7
    param['num_class'] = len(le.classes_)
    param['eval_metric'] = 'merror'

    evals = [ (xg_train, 'train'), (xg_test, 'eval') ]

    # Train xgboost
    print "Training classifier..."
    t1 = time.time()
    bst = xgboost.train(param, xg_train, 500, evals, early_stopping_rounds=10)
    xgboost.plot_importance(bst)
    t2 = time.time()
    print t2-t1
    bst.save_model(classifier_filename(model_name))
    return bst
开发者ID:smurching,项目名称:pokemon_ai,代码行数:26,代码来源:model_xgb_tiered.py


示例2: test_importance_plot_lim

    def test_importance_plot_lim(self):
        np.random.seed(1)
        dm = xgb.DMatrix(np.random.randn(100, 100), label=[0, 1] * 50)
        bst = xgb.train({}, dm)
        assert len(bst.get_fscore()) == 71
        ax = xgb.plot_importance(bst)
        assert ax.get_xlim() == (0., 11.)
        assert ax.get_ylim() == (-1., 71.)

        ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71))
        assert ax.get_xlim() == (0., 5.)
        assert ax.get_ylim() == (10., 71.)
开发者ID:BayronP,项目名称:xgboost,代码行数:12,代码来源:test_plotting.py


示例3: run_xgb

def run_xgb(train, test, features, target, random_state=0):
    eta = 0.02
    max_depth = 5 
    subsample = 0.75
    colsample_bytree = 0.7
    start_time = time.time()

    print('XGBoost params. ETA: {}, MAX_DEPTH: {}, SUBSAMPLE: {}, COLSAMPLE_BY_TREE: {}'.format(eta, max_depth, subsample, colsample_bytree))
    params = {
        "objective": "multi:softprob",
        "num_class": 12,
        "booster" : "gbtree",
        "eval_metric": "mlogloss",
        "eta": eta,
        "max_depth": max_depth,
        "subsample": subsample,
        "colsample_bytree": colsample_bytree,
        "silent": 1,
        "seed": random_state,
    }
    num_boost_round = 500*2
    early_stopping_rounds = 50
    test_size = 0.3

    X_train, X_valid = train_test_split(train, test_size=test_size, random_state=random_state)
    print('Length train:', len(X_train.index))
    print('Length valid:', len(X_valid.index))
    y_train = X_train[target]
    y_valid = X_valid[target]
    dtrain = xgb.DMatrix(X_train[features], y_train)
    dvalid = xgb.DMatrix(X_valid[features], y_valid)

    watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
    gbm = xgb.train(params, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=early_stopping_rounds, verbose_eval=True)

    print "importance of feathure"
    xgb.plot_importance(gbm)
    show()


    #time.sleep(60*5)

    print("Validating...")
    check = gbm.predict(xgb.DMatrix(X_valid[features]), ntree_limit=gbm.best_iteration)
    score = log_loss(y_valid.tolist(), check)

    print("Predict test set...")
    test_prediction = gbm.predict(xgb.DMatrix(test[features]), ntree_limit=gbm.best_iteration)

    print('Training time: {} minutes'.format(round((time.time() - start_time)/60, 2)))
    return test_prediction.tolist(), score
开发者ID:worldwar2008,项目名称:kg,代码行数:51,代码来源:gq.py


示例4: run_train_validation

 def run_train_validation(self):
     x_train, y_train,x_validation,y_validation = self.get_train_validationset()
     dtrain = xgb.DMatrix(x_train, label= y_train,feature_names=x_train.columns)
     dvalidation = xgb.DMatrix(x_validation, label= y_validation,feature_names=x_validation.columns)
     self.set_xgb_parameters()
     
     evals=[(dtrain,'train'),(dvalidation,'eval')]
     model = xgb.train(self.xgb_params, dtrain, evals=evals, **self.xgb_learning_params)
     xgb.plot_importance(model)
     plt.show()
      
     print "features used:\n {}".format(self.get_used_features())
      
     return
开发者ID:LevinJ,项目名称:Supply-demand-forecasting,代码行数:14,代码来源:xgbbasemodel.py


示例5: test_sklearn_plotting

def test_sklearn_plotting():
    tm._skip_if_no_sklearn()
    from sklearn.datasets import load_iris

    iris = load_iris()

    classifier = xgb.XGBClassifier()
    classifier.fit(iris.data, iris.target)

    import matplotlib
    matplotlib.use('Agg')

    from matplotlib.axes import Axes
    from graphviz import Digraph

    ax = xgb.plot_importance(classifier)
    assert isinstance(ax, Axes)
    assert ax.get_title() == 'Feature importance'
    assert ax.get_xlabel() == 'F score'
    assert ax.get_ylabel() == 'Features'
    assert len(ax.patches) == 4

    g = xgb.to_graphviz(classifier, num_trees=0)
    assert isinstance(g, Digraph)

    ax = xgb.plot_tree(classifier, num_trees=0)
    assert isinstance(ax, Axes)
开发者ID:ChangXiaodong,项目名称:xgboost-withcomments,代码行数:27,代码来源:test_with_sklearn.py


示例6: test_plotting

    def test_plotting(self):
        bst2 = xgb.Booster(model_file='xgb.model')
        # plotting

        import matplotlib
        matplotlib.use('Agg')

        from matplotlib.axes import Axes
        from graphviz import Digraph

        ax = xgb.plot_importance(bst2)
        assert isinstance(ax, Axes)
        assert ax.get_title() == 'Feature importance'
        assert ax.get_xlabel() == 'F score'
        assert ax.get_ylabel() == 'Features'
        assert len(ax.patches) == 4

        ax = xgb.plot_importance(bst2, color='r',
                                 title='t', xlabel='x', ylabel='y')
        assert isinstance(ax, Axes)
        assert ax.get_title() == 't'
        assert ax.get_xlabel() == 'x'
        assert ax.get_ylabel() == 'y'
        assert len(ax.patches) == 4
        for p in ax.patches:
            assert p.get_facecolor() == (1.0, 0, 0, 1.0) # red


        ax = xgb.plot_importance(bst2, color=['r', 'r', 'b', 'b'],
                                 title=None, xlabel=None, ylabel=None)
        assert isinstance(ax, Axes)
        assert ax.get_title() == ''
        assert ax.get_xlabel() == ''
        assert ax.get_ylabel() == ''
        assert len(ax.patches) == 4
        assert ax.patches[0].get_facecolor() == (1.0, 0, 0, 1.0) # red
        assert ax.patches[1].get_facecolor() == (1.0, 0, 0, 1.0) # red
        assert ax.patches[2].get_facecolor() == (0, 0, 1.0, 1.0) # blue
        assert ax.patches[3].get_facecolor() == (0, 0, 1.0, 1.0) # blue

        g = xgb.to_graphviz(bst2, num_trees=0)
        assert isinstance(g, Digraph)
        ax = xgb.plot_tree(bst2, num_trees=0)
        assert isinstance(ax, Axes)
开发者ID:ndingwall,项目名称:xgboost,代码行数:44,代码来源:test_basic.py


示例7: save_topn_features

 def save_topn_features(self, fname="XGBRegressor_topn_features.txt", topn=-1):
     ax = xgb.plot_importance(self.model)
     yticklabels = ax.get_yticklabels()[::-1]
     if topn == -1:
         topn = len(yticklabels)
     else:
         topn = min(topn, len(yticklabels))
     with open(fname, "w") as f:
         for i in range(topn):
             f.write("%s\n"%yticklabels[i].get_text())
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:10,代码来源:xgb_utils.py


示例8: plot_feat_importances

def plot_feat_importances():
    gbm = xgboost.XGBClassifier(silent=False, seed=8).fit(X_train, y_train)
    plot = xgboost.plot_importance(gbm)
    ticks = plot.set_yticklabels(df_xgb.columns)

    importances = rf.feature_importances_
    std = np.std([tree.feature_importances_ for tree in rf.estimators_],
                 axis=0)
    indices = np.argsort(importances)
    plt.barh(range(len(indices)), importances[indices], yerr=std[indices], color='lightblue')
    ticks = plt.yticks(range(len(indices)), df_xgb.columns)
开发者ID:Nathx,项目名称:ride_sharing_churn,代码行数:11,代码来源:churn.py


示例9: plot_importance

    def plot_importance(self, ax=None, height=0.2,
                        xlim=None, title='Feature importance',
                        xlabel='F score', ylabel='Features',
                        grid=True, **kwargs):

        """Plot importance based on fitted trees.

        Parameters
        ----------
        ax : matplotlib Axes, default None
            Target axes instance. If None, new figure and axes will be created.
        height : float, default 0.2
            Bar height, passed to ax.barh()
        xlim : tuple, default None
            Tuple passed to axes.xlim()
        title : str, default "Feature importance"
            Axes title. To disable, pass None.
        xlabel : str, default "F score"
            X axis title label. To disable, pass None.
        ylabel : str, default "Features"
            Y axis title label. To disable, pass None.
        kwargs :
            Other keywords passed to ax.barh()

        Returns
        -------
        ax : matplotlib Axes
        """

        import xgboost as xgb

        if not isinstance(self._df.estimator, xgb.XGBModel):
            raise ValueError('estimator must be XGBRegressor or XGBClassifier')
        return xgb.plot_importance(self._df.estimator.booster(),
                                   ax=ax, height=height, xlim=xlim, title=title,
                                   xlabel=xlabel, ylabel=ylabel, grid=True, **kwargs)
开发者ID:Sandy4321,项目名称:pandas-ml,代码行数:36,代码来源:base.py


示例10: range

fscore_lo = np.percentile(fscore, 2.5, axis=0)
fscore_hi = np.percentile(fscore, 97.5, axis=0)
ind_sort = np.array(np.argsort(fscore_mean))
fscore_mean_sorted = fscore_mean[ind_sort]
# ci_sorted = fscore_ci[ind_sort]
fscore_lo_sorted = fscore_lo[ind_sort]
fscore_hi_sorted = fscore_hi[ind_sort]
feature_label_sorted = feature_label[ind_sort]
feature_label_short = []
for i in range(feature_label_sorted.size):
    feature_label_short.append(dic[feature_label_sorted[i]])
    
get_ipython().magic(u'matplotlib inline')
plt.figure(figsize=(4,12))
axes = plt.gca()
# plt.barh(np.arange(val_sorted.size), val_sorted, xerr=ci_sorted, height=.7, color=(.4,.4,.8), align='center', ecolor=(0,0,0))
plt.barh(np.arange(fscore_mean_sorted.size), fscore_mean_sorted,          xerr=np.array([fscore_mean_sorted-fscore_lo_sorted,fscore_hi_sorted-fscore_mean_sorted]),          height=.7, color=(.4,.4,.8), align='center', ecolor=(0,0,0))
plt.yticks(np.arange(len(feature_label_short)), feature_label_short, fontsize=12, color=(0,0,0));
# axes.set_ylim([3.5, len(feature_label_short)-9.5])
# axes.set_xlim([0, 0.04])
plt.box(on=False)
plt.xlabel('Gini Importance',fontsize=14)
plt.grid()


# In[ ]:

np.percentile(fscore, 2.5, axis=0)
xgb.plot_importance()

开发者ID:sosata,项目名称:CS120DataAnalysis,代码行数:29,代码来源:show_importance.py


示例11: print

    del xgb_train, xgb_val
    gc.collect()

    cv_scores.append(roc_auc_score(y_val, bst.predict(xgb.DMatrix(X_val), ntree_limit=bst.best_ntree_limit)))
    print(cv_scores)

    print('predicting...')
    if i == 0:
        pred = bst.predict(xgb.DMatrix(np.array(test_x)),
                               ntree_limit=bst.best_ntree_limit)
    else:
        pred += bst.predict(xgb.DMatrix(np.array(test_x)),
                                ntree_limit=bst.best_ntree_limit)

del train_x, train_y
gc.collect()

print('mean_score:', np.mean(cv_scores))

pred /= folds
df_test['is_churn'] = pred.clip(0.0000001, 0.999999)
df_test = df_test[['msno', 'is_churn']]

# df_test.to_csv(out_path + 'stack_submissions{}.csv'.format(datetime.now().strftime("%Y%m%d-%H%M%S")), index=False)
df_test = []

plt.rcParams['figure.figsize'] = (7.0, 7.0)
xgb.plot_importance(booster=bst)
plt.show()
# plt.savefig('./feature_importance.png', dpi=100)
开发者ID:zgcgreat,项目名称:WSDM,代码行数:30,代码来源:stacking_fit.py


示例12: print

cv_xgb = xgb.cv(params = our_params, dtrain = xgdmat, num_boost_round = 3000, nfold = 5,
                metrics = ['error'], # Make sure you enter metrics inside a list or you may encounter issues!
                early_stopping_rounds = 100) # Look for early stopping that minimizes error

print('Tail:\n')
print(cv_xgb.tail(5))


our_params = {'eta': 0.1, 'seed':0, 'subsample': 0.8, 'colsample_bytree': 0.8,
             'objective': 'binary:logistic', 'max_depth':3, 'min_child_weight':1}

print('Final Train: \n')
final_gb = xgb.train(our_params, xgdmat, num_boost_round = 432)

xgb.plot_importance(final_gb)
plt.show()
#Predicting:

testdmat = xgb.DMatrix(X_pred)
y_pred = final_gb.predict(testdmat)

y_pred[y_pred > 0.5] = 1
y_pred[y_pred <= 0.5] = 0

y_pred = y_pred.astype(np.int64)
#Submission

submission = pd.DataFrame({
        "PassengerId": test_df["PassengerId"],
        "Survived": y_pred
开发者ID:grixxy,项目名称:ml_python,代码行数:30,代码来源:TitanicXGBoost.py


示例13: print

##xgboost.train(params, dtrain, num_boost_round=10, evals=(), obj=None, 
##              feval=None, maximize=False, early_stopping_rounds=None, 
##              evals_result=None, verbose_eval=True, learning_rates=None, 
##              xgb_model=None)
#
evallist  = [(dtest,'eval'), (dtrain,'train')]

watchlist = [ (xg_train,'train'), (xg_test, 'test') ]
evals_result = {}
num_round = 10
bst = xgb.train(param,xg_train, num_round, evals_result=evals_result)
pred = bst.predict(xg_test)

print ('predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))

xgb.plot_importance(bst)
xgb.plot_tree(bst, num_trees=2)

#=============Logistic Regression==============================================================

#Define sigmoid function
def sigmoid(z):
    return 1 / (1 + e**(-z))

#Calcualte the cost to be minimized -- using the sigmoid function
def cost(theta, X, y, l):
    m = X.shape[0] #Number of rows in the data
    z = X.dot(theta)
    O = (-1 / m) * (log(sigmoid(z)).T.dot(y)  +  log(1-sigmoid(z)).T.dot((1-y)))
#    print(m)
#    print(theta)
开发者ID:tijohnso,项目名称:Usyd_masters,代码行数:31,代码来源:mlass1_9.py


示例14: format

#test = []
pred2 = model.predict(dtest)

df2 = pd.DataFrame()
df2["Orginal"] = testDelay
df2["Predicted"] = pred2
df2.to_csv('compareDelay.csv', index = False)


import matplotlib.pyplot as plt
plt.style.use("ggplot")
mapper = { 'f{0}' . format (I): v for I, v in  enumerate (train.columns)}
mapped = {mapper [k]: v for k, v in model.get_fscore().items()}
import operator
mapped = sorted(mapped.items(), key=operator.itemgetter(1))
xgb.plot_importance(mapped)
plt.show()
df = pd.DataFrame(mapped, columns=['feature', 'fscore'])
df['fscore'] = df['fscore'] / df['fscore'].sum()
df.plot(kind='barh', x='feature', y='fscore', legend=False, figsize=(25, 15))
plt.title('XGBoost Feature Importance')
plt.xlabel('relative importance')
plt.gcf().savefig('feature_importance_xgb.png')


xx = np.linspace(-10,500)
yy = xx
h0 = plt.plot(xx, yy, 'k-', label="ideal Values")
plt.scatter(df2.Orginal, df2.Predicted, c = 'y')
plt.legend()
plt.show()
开发者ID:DEK11,项目名称:Predicting-EOB-delay,代码行数:31,代码来源:linear.py


示例15: plot_importance

 def plot_importance(self):
     ax = xgb.plot_importance(self.model)
     self.save_topn_features()
     return ax
开发者ID:Anhmike,项目名称:Kaggle_HomeDepot,代码行数:4,代码来源:xgb_utils.py


示例16: print

    model = xgb.train(params, dtrain, 200, watchlist, maximize=True, early_stopping_rounds = 25, verbose_eval=5)
    del dvalid
else:
    dtrain = xgb.DMatrix(train, y)
    del train, y
    gc.collect()
    watchlist = [(dtrain, 'train')]
    model = xgb.train(params, dtrain, 30, watchlist, maximize=True, verbose_eval=1)

del dtrain
gc.collect()

print('[{}] Finish XGBoost Training'.format(time.time() - start_time))

# Plot the feature importance from xgboost
plot_importance(model)
plt.gcf().savefig('feature_importance_xgb.png')

# Load the test for predict 
test = pd.read_csv(path+"test.csv", usecols=test_columns, dtype=dtypes)
test = pd.merge(test, ip_count, on='ip', how='left', sort=False)
del ip_count
gc.collect()

sub['click_id'] = test['click_id'].astype('int')

test['clicks_by_ip'] = test['clicks_by_ip'].astype('uint16')
test = timeFeatures(test)
test.drop(['click_id', 'ip'], axis=1, inplace=True)
dtest = xgb.DMatrix(test)
del test
开发者ID:ashukumar27,项目名称:Kaggle,代码行数:31,代码来源:01_xgboost.py


示例17: print

                              reg_alpha=0.05,
                              reg_lambda=2,
                              subsample=1.0,
                              colsample_bytree=1.0,
                              max_delta_step=1,
                              scale_pos_weight=1,
                              objective='multi:softprob',
                              nthread=8,
                              seed=0  # ,
                              # silent = False
                              )
    print('training...')
    xgb_model.fit(training, label)
    print('predicting...')
    predicted = xgb_model.predict_proba(testing)
    predicted = pandas.DataFrame(predicted)
    predicted.columns = xgb_model.classes_
    # Name index column.
    predicted.index.name = 'Id'
    # Write csv.
    print('Saving prediction...')
    predicted.to_csv('Prediction.csv')
    # feature importance
    feat_imp = pandas.Series(xgb_model.booster().get_fscore()).sort_values(ascending=False)
    feat_imp.plot(kind='bar', title='Feature Importances')
    matplotlib.pyplot.show()
    plot_importance(xgb_model, title='Feature importance')
    matplotlib.pyplot.show()
    plot_tree(xgb_model, num_trees=0)
    matplotlib.pyplot.show()
开发者ID:MichaelPluemacher,项目名称:San-Francisco-crimes,代码行数:30,代码来源:XGBoost_model.py


示例18: train

def train(param, num_round=1000, early_stopping_rounds=20):
    exec_time = time.strftime("%Y%m%d%I%p%M", time.localtime())

    os.mkdir('{0}_{1}'.format(model_path, exec_time))
    os.mkdir('{0}_{1}'.format(submission_path, exec_time))

    train_params = param.copy()
    train_params['num_boost_round'] = num_round
    train_params['early_stopping_rounds'] = early_stopping_rounds
    json.dump(train_params, open('{0}_{1}{2}'.format(model_path, exec_time, model_params), 'wb+'))

    print 'get training data'

    train_features = pd.read_csv(train_path + 'train_features.csv').astype(float)
    train_labels = pd.read_csv(train_path + 'labels.csv').astype(float)

    validate_features = pd.read_csv(validate_path + 'train_features.csv').astype(float)
    validate_labels = pd.read_csv(validate_path + 'labels.csv').astype(float)

    predict_features = pd.read_csv(predict_path + 'train_features.csv').astype(float)

    create_feature_map(train_features.columns.tolist(), '{0}_{1}{2}'.format(model_path, exec_time, model_fmap_file))

    train_matrix = xgboost.DMatrix(train_features.values, label=train_labels.values, feature_names=train_features.columns)
    val_matrix = xgboost.DMatrix(validate_features.values, label=validate_labels.values, feature_names=validate_features.columns)
    predict_matrix = xgboost.DMatrix(predict_features.values, feature_names=predict_features.columns)

    watchlist = [(train_matrix, 'train'), (val_matrix, 'eval')]

    print 'model training'
    with open('{0}_{1}{2}'.format(model_path, exec_time, model_train_log), 'wb+') as outf:
        sys.stdout = outf
        model = xgboost.train(param, train_matrix, num_boost_round=num_round, evals=watchlist, early_stopping_rounds=early_stopping_rounds)

    sys.stdout = save_stdout
    print 'model.best_score: {0}, model.best_iteration: {1}, model.best_ntree_limit: {2}'.format(model.best_score, model.best_iteration, model.best_ntree_limit)

    print 'output offline model data'
    model.save_model('{0}_{1}{2}'.format(model_path, exec_time, model_file))
    model.dump_model('{0}_{1}{2}'.format(model_path, exec_time, model_dump_file))

    importance = model.get_fscore(fmap='{0}_{1}{2}'.format(model_path, exec_time, model_fmap_file))
    importance = sorted(importance.items(), key=operator.itemgetter(1))
    df = pd.DataFrame(importance, columns=['feature', 'fscore'])
    df['fscore'] = df['fscore'] / df['fscore'].sum()
    df.to_csv('{0}_{1}{2}'.format(model_path, exec_time, model_feature_importance_csv), index=False)

    xgboost.plot_importance(model)
    plt.gcf().set_size_inches(20, 16)
    plt.gcf().set_tight_layout(True)
    plt.gcf().savefig('{0}_{1}{2}'.format(model_path, exec_time, model_feature_importance_file))
    plt.close()

    train_pred_labels = model.predict(train_matrix, ntree_limit=model.best_ntree_limit)
    val_pred_labels = model.predict(val_matrix, ntree_limit=model.best_ntree_limit)

    train_pred_frame = pd.Series(train_pred_labels, index=train_features.index)
    train_pred_frame.name = probability_consumed_label
    val_pred_frame = pd.Series(val_pred_labels, index=validate_features.index)
    val_pred_frame.name = probability_consumed_label

    train_true_frame = pd.read_csv(train_path + 'labels.csv')['Label']
    val_true_frame = pd.read_csv(validate_path + 'labels.csv')['Label']
    train_coupons = pd.read_csv(train_path + 'dataset.csv')
    val_coupons = pd.read_csv(validate_path + 'dataset.csv')
    train_check_matrix = train_coupons[[coupon_label]].join(train_true_frame).join(train_pred_frame)
    val_check_matrix = val_coupons[[coupon_label]].join(val_true_frame).join(val_pred_frame)
    print 'Average auc of train matrix: ', check_average_auc(train_check_matrix)
    print 'Average auc of validate matrix', check_average_auc(val_check_matrix)

    val_coupons = val_coupons.join(val_pred_frame).join(val_pred_frame.map(lambda x: 0. if x < 0.5 else 1.).rename('map')).join(val_true_frame)
    val_coupons.to_csv('{0}_{1}{2}'.format(model_path, exec_time, val_diff_file), index=False)
    print confusion_matrix(val_coupons['Label'], val_coupons['map'])

    labels = model.predict(predict_matrix, ntree_limit=model.best_ntree_limit)
    frame = pd.Series(labels, index=predict_features.index)
    frame.name = probability_consumed_label

    plt.figure()
    frame.hist(figsize=(10, 8))
    plt.title('results histogram')
    plt.xlabel('predict probability')
    plt.gcf().savefig('{0}_{1}{2}'.format(submission_path, exec_time, submission_hist_file))
    plt.close()

    submission = pd.read_csv(predict_path + 'dataset.csv')
    submission = submission[[user_label, coupon_label, date_received_label]].join(frame)
    submission.to_csv('{0}_{1}{2}'.format(submission_path, exec_time, submission_file), index=False)
开发者ID:alex19911222,项目名称:O2O-Coupon-Usage-Forecast,代码行数:88,代码来源:xgb.py


示例19: plot_importance_matrix

 def plot_importance_matrix(self,vars_names):
     pdb.set_trace()
     xgb.plot_importance(self.clf)
开发者ID:jgpavez,项目名称:transfer_learning,代码行数:3,代码来源:xgboost_wrapper.py


示例20: XGBoost_regressor2

def XGBoost_regressor2():
    """
    Train an XGBoost model with XGBoost lib.
    This method is mainly used to find relative importance of
    the features.
    """
    train = xgb.DMatrix('train_libSVM.dat')
    all_train = xgb.DMatrix('all_train_libSVM.dat')
    test = xgb.DMatrix('test_libSVM.dat')
    validation = xgb.DMatrix('validate_libSVM.dat')
    param = {'max_depth': 11, 'eta': 0.002, 'silent': 1,
             'objective': 'reg:linear', 'gamma': 2.2,
             'subsample': 0.8, 'colsample_bytree': 0.7,
             'scale_pos_weight': 0.55, 'min_child_weight': 5,
             'n_jobs': 4}
    # 0.03-> 900, 1600 without features of SVD similarity between
    #                  search term and other columns
    # eta    ntrees   error
    # 0.03-> 900 ->  0.2397 * 2 = 4795
    # 0.025 -> 1900 ->            4782
    # 0.06 -> 640 -> 0.2400 * 2 = 4801
    ############# common brand & SVD brand deleted ############
    # 0.03-> 900 ->  0.2397 * 2 = 4794
    ############ add KL distance ########
    # 0.03 -> 966 -> 0.2397
    # 0.03 -> 1102 -> 0.234 or so
    ##### add spell checking
    # round = 200
    # depth = 12 -> 0.235371
    # depth = 11 min_cw = 5 -> 0.235316   SELECTED
    # depth = 10 -> 0.235840
    # depth = 9 -> 0.235912
    # depth = 8 -> 0.236202
    # min_child_weight = 6 -> 0.235679
    # min_child_weight = 4 -> 0.235478

    ###### as of April 16
    # 3500, 0.01, 0.238908
    # 1500, 0.03, 0.238893
    # 750, 0.06, 0.239034
    watchlist = [(validation, 'eval'), (train, 'train')]
    # TODO: do data cleaning again.
    # add approximate matching
    # check KL distance
    # n = 1096
    num_round = 10000
    xgb_model = xgb.train(param, train, num_round, watchlist)
    # xgb_model = xgb.cv(param, all_train, num_round, nfold=5,
    #                    metrics={'error'})
    # print xgb_model.head()
    # xgb_model.info()

    prediction = xgb_model.predict(test)
    importance = xgb_model.get_fscore(fmap='xgb.fmap')
    print importance
    sorted_importance = sorted(importance.items(),
                               key=operator.itemgetter(1))
    print sorted_importance
    importance_of_feature_file\
        = open('importance_of_feature_file', 'w')
    pickle.dump(sorted_importance, importance_of_feature_file)
    importance_of_feature_file.close()

    xgb.plot_importance(xgb_model)
    test_id = pd.read_pickle('id_test')
    prediction = prediction * 2 + 1
    prediction[prediction > 3] = 3
    prediction[prediction < 1] = 1
    clean_result(prediction)
    pd.DataFrame({"id": test_id.values, "relevance": prediction})\
        .to_csv('submission.csv', index=False)
开发者ID:versemonger,项目名称:Home-Depot-Product-Search-Relevance,代码行数:71,代码来源:para_tuning.py



注:本文中的xgboost.plot_importance函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python xgboost.train函数代码示例发布时间:2022-05-26
下一篇:
Python xgboost.cv函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap