• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python ensemble.ExtraTreesRegressor类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.ensemble.ExtraTreesRegressor的典型用法代码示例。如果您正苦于以下问题:Python ExtraTreesRegressor类的具体用法?Python ExtraTreesRegressor怎么用?Python ExtraTreesRegressor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了ExtraTreesRegressor类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: fit

 def fit(self, X, y, **kwargs):
     for key, value in kwargs.iteritems():
         if key in self.INITPARAMS.keys():
             self.INITPARAMS[key] = value
     model = ExtraTreesRegressor(**self.INITPARAMS)
     model.fit(X, y)
     self.model = model
开发者ID:DJRumble,项目名称:S2DS,代码行数:7,代码来源:estimator.py


示例2: do_etrees

def do_etrees(filename):
    df, Y = create_merged_dataset(filename)
    etree = ExtraTreesRegressor(n_estimators=200, n_jobs=-1, min_samples_leaf=5, random_state=SEED)
    X = df.drop(['driver', 'trip'], 1)
    etree.fit(X, Y)
    probs = etree.predict(X[:200])
    return pd.DataFrame({'driver': df['driver'][:200], 'trip': df['trip'][:200], 'probs': probs})
开发者ID:fabiogm,项目名称:kaggle-driver-telematics,代码行数:7,代码来源:main.py


示例3: main

def main():
    for ind in range(1, 15+1):
    #for ind in [3,4,5,7,9,11,12,13,14,15]: # no 1,2,6,8,10
        print "TrainingSet/ACT%d_competition_training.csv" % ind
        #read in  data, parse into training and target sets
        cols, train = read_data("../TrainingSet/ACT%d_competition_training.csv" % ind)
        target = np.array( [x[0] for x in train] )

        train = filter_cols(train, cols, "../selected/selected_%d.txt" % ind)
        #print("Train: ", len(train), " cols:", len(train[0]))
        train = np.array( train )

        #In this case we'll use a random forest, but this could be any classifier
        cfr = ExtraTreesRegressor(n_estimators=1000, max_features=(len(train[0])//3), n_jobs=8, random_state=1279)

        #Simple K-Fold cross validation. 10 folds.
        cv = cross_validation.KFold(len(train), k=10, indices=False, shuffle=True)

        #iterate through the training and test cross validation segments and
        #run the classifier on each one, aggregating the results into a list
        results = []
        for traincv, testcv in cv:
            ft = cfr.fit(train[traincv], target[traincv])
            score = ft.score(train[testcv], target[testcv])
            results.append(score)
            print "\tFold %d: %f" % (len(results), score)

        #print out the mean of the cross-validated results
        print "Results: " + str( np.array(results).mean() )
开发者ID:ashispapu,项目名称:kaggle-1,代码行数:29,代码来源:validation.py


示例4: mul_dtree

def mul_dtree(X, Y2):
    forest = ExtraTreesRegressor(n_estimators=5,
                             compute_importances=True,
                             random_state=0)
    forest.fit(X[:200], Y2[:200])
    forest.predict(X[200:])
    print Y2[200:]
开发者ID:Catentropy,项目名称:mylab,代码行数:7,代码来源:t_dtree.py


示例5: predict_with_one

def predict_with_one(X, out_file_name):
    n_samples, n_features = X.shape
    iter_num = 3
    div = ShuffleSplit(n_samples, n_iter=iter_num, test_size=0.2, random_state=0)
    model = ExtraTreesRegressor(n_estimators=5)
    score_matrix = np.zeros((n_features, n_features))

    t = time()
    round_num = 0
    for train, test in div:
        round_num += 1
        train_samples = X[np.array(train)]
        test_samples = X[np.array(test)]
        for i in range(n_features):
            for j in range(n_features):
                X_train = train_samples[:, i:i+1]
                X_test = test_samples[:, i:i+1]
                y_train = train_samples[:, j]
                y_test = test_samples[:, j]
        # for i in range(len(fl)):
        #     for j in range(len(fl)):
        #         if fl[j][1]-fl[j][0] != 1:
        #             continue
        #         X_train = train_samples[:, fl[i][0]:fl[i][1]]
        #         X_test = test_samples[:, fl[i][0]:fl[i][1]]
        #         y_train = train_samples[:, fl[j][0]]
        #         y_test = test_samples[:, fl[j][0]]
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                mae = mean_absolute_error(y_test, y_pred)
                score_matrix[i, j] += mae
                print('Round', round_num, '|', i, j, mae, time()-t)
    np.savetxt(os.path.join(CODE_PATH, out_file_name),
               score_matrix/iter_num, fmt='%.3f', delimiter=',')
开发者ID:Jewelryland,项目名称:YelpRecSys,代码行数:34,代码来源:feature_covariance.py


示例6: cal_important_features

def cal_important_features(batch=10, threshold=1e-4):
  X_samples, Y_samples, scaler = dat.data_prepare('ocpm', 'lifetime_ecpm', outlier=0.05)
  tot_goot_atrs = {}
  for a in ATRS[5:]: tot_goot_atrs[a] = {}
  for i in np.arange(1,batch+1):
    Ts = timeit.default_timer()
    model = ExtraTreesRegressor(n_jobs=6)
    model.fit(X_samples, Y_samples)
    print "Totally %i features." % len(model.feature_importances_)
    print "[Labels] %i categories, %i interests, %i client_names, %i auto_tags" % (num.categories_len, num.interests_len, num.client_names_len, num.auto_tags_len)
    good_atrs = show_important_features(model.feature_importances_, threshold)
    for a in reversed(ATRS[5:]):
      for b in good_atrs[a]:
        if b in tot_goot_atrs[a]:
          tot_goot_atrs[a][b] += 1
        else:
          tot_goot_atrs[a][b] = 1
    print "%i batch finished in %.1f secs." % (i, (timeit.default_timer() - Ts))
    print "------------------------------------------------"
  # show performances
  for atr in reversed(ATRS[5:]):
    print "-------[%s]-----------------------" % atr
    for j in np.arange(1,batch+1):
      good_keys = [k for k,v in tot_goot_atrs[atr].items() if (v >= j)]
      print "%i keys occurs > %i times." % (len(good_keys), j)
  return tot_goot_atrs
开发者ID:Marsan-Ma,项目名称:adminer,代码行数:26,代码来源:data_probe.py


示例7: fit

    def fit(self, X, y, weights = None, **kwargs):
        if weights is None: weights = np.ones(y.shape[0])
        data = np.hstack((y.reshape(y.shape[0],1),X))
        
        S = wcov(data, weights)
        corr = wcorr(data, weights)
        wsd = np.sqrt(S.diagonal())
        
        ExtraTrees = ExtraTreesRegressor(**kwargs)
        ExtraTrees.fit(X,y, sample_weight=weights)
        
        Rsquare = ( S[0,1:].dot(np.linalg.inv(S[1:,1:]).dot(S[1:,0])) )/S[0,0]
        
        # assign proportion of Rsquare to each covariate dep. on importance
        self.importances = ExtraTrees.feature_importances_ * Rsquare 
        model = self.constrained_optimization( corr )
        
        if self.fit_intercept:
            w = np.diagflat( weights/np.sum(weights),k=0)
            wmean = np.sum(w.dot(data), axis=0)
            self.intercept_ = wmean[0] - wsd[0]*np.sum(wmean[1:]*model.x/wsd[1:])

        self.coef_ = wsd[0]*model.x/wsd[1:] 
        
        return self
开发者ID:naxion-analytics,项目名称:MRpy,代码行数:25,代码来源:RandomForest.py


示例8: main

def main():
    for ind in range(1, 15+1):
        print "TrainingSet/ACT%d_competition_training.csv" % ind
        #read in  data, parse into training and target sets
        cols, molecules1, train = read_data("../TrainingSet/ACT%d_competition_training.csv" % ind)
        target = np.array( [x[0] for x in train] )

        #load train
        train = filter_cols(train, cols, "../selected/cor9/selected_%d.txt" % ind)
        train = np.array(train)
        #print("Train: ", len(train), " cols:", len(train[0]))

        # seeds used: orig=1279, cor8=1278, cor9=1277
        cfr = ExtraTreesRegressor(n_estimators=2000, max_features=(len(train[0])//3), n_jobs=8, random_state=1277)
                                  #min_samples_leaf=2, min_samples_split=2, random_state=1279)
        rf = cfr.fit(train, target)

        #predict train
        pred = rf.predict(train)
        write_file("erStacking/cor9/er_stacking_%d.csv" % ind, molecules1, pred)

        #load test
        cols, molecules2, test = read_data("../TestSet/ACT%d_competition_test.csv" % ind)
        test = filter_cols(test, cols, "../selected/cor9/selected_%d.txt" % ind)
        test = np.array(test)

        #predict test
        pred = rf.predict(test)
        write_file("erStacking/test/cor9/er_submission_%d.csv" % ind, molecules2, pred)
开发者ID:ashispapu,项目名称:kaggle-1,代码行数:29,代码来源:testing_er.py


示例9: fit

    def fit(self,data_train,target):
        self.target_train = target
        self.catcol = data_train.filter(like='var').columns.tolist()
        #start_gbr_tr = time.clock()
        self.gbr = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
        self.gbr.fit(data_train,self.target_train)
        self.transformed_train_gbr = self.gbr.transform(data_train,threshold="0.35*mean")
        self.gbr_tr_fit = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
        self.gbr_tr_fit.fit(self.transformed_train_gbr,self.target_train)
        #end_gbr_tr = time.clock()
        #print >> log, "time_gbr_tr = ", end_gbr_tr-start_gbr_tr

        #start_xfr_tr = time.clock()
        self.xfr= ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
        self.xfr.fit(data_train,self.target_train)
        self.transformed_train_xfr = self.xfr.transform(data_train,threshold="0.35*mean")
        self.xfr_tr_fit = ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
        self.xfr_tr_fit.fit(self.transformed_train_xfr,self.target_train)
        #end_xfr_tr = time.clock()
        #print >> log, "time_xfr_tr = ", end_xfr_tr-start_xfr_tr

        #start_gbr_cat = time.clock()
        self.gbr_cat_fit = GradientBoostingRegressor(n_estimators =self.nest,max_depth=7)
        self.gbr_cat_fit.fit(data_train[self.catcol],self.target_train)
        #end_gbr_cat = time.clock()
        #print >> log, "time_gbr_cat = ", end_gbr_cat-start_gbr_cat

        #start_xfr_cat = time.clock()
        self.xfr_cat_fit = ExtraTreesRegressor(n_estimators =self.nest,max_depth=7)
        self.xfr_cat_fit.fit(data_train[self.catcol],self.target_train)
        #end_xfr_cat = time.clock()
        #print >> log, "time_xfr_cat = ", end_xfr_cat-start_xfr_cat
        return self
开发者ID:kirilligum,项目名称:cdips-fire,代码行数:33,代码来源:cvbari.py


示例10: build_models

    def build_models(self):

        self.remove_columns(
            [
                "institute_latitude",
                "institute_longitude",
                "institute_state",
                "institute_country",
                "var10",
                "var11",
                "var12",
                "var13",
                "var14",
                "var15",
                "instructor_past_performance",
                "instructor_association_industry_expert",
                "secondary_area",
                "var24",
            ]
        )

        model1 = GradientBoostingRegressor(learning_rate=0.1, n_estimators=200, subsample=0.8)
        model2 = RandomForestRegressor(n_estimators=50)
        model3 = ExtraTreesRegressor(n_estimators=50)

        model1.fit(self.X, self.y)
        model2.fit(self.X, self.y)
        model3.fit(self.X, self.y)

        return [model1, model2, model3]
开发者ID:numb3r33,项目名称:predict-grants,代码行数:30,代码来源:model.py


示例11: build_extra_tree_regressor

def build_extra_tree_regressor(X_test, X_train_full, y_train_full):


    print "Building ExtraTrees regressor..."
    etr = ExtraTreesRegressor(n_estimators=500)
    etr.fit(X_train_full, y_train_full)
    etr_predict = etr.predict(X_test)

    return etr_predict
开发者ID:DarioBernardo,项目名称:Kaggle,代码行数:9,代码来源:BlendedRegressorsCV.py


示例12: get_forest

def get_forest(X_names=Xs, y_names=ys, num_trees=256, data=data):
    forest = ExtraTreesRegressor(
        n_estimators=num_trees, n_jobs=62, bootstrap=True)
    X = data.loc[:, [i for i in X_names]]
    y = data.loc[:, [i for i in y_names]]
    start = time()
    rfr = forest.fit(X, y)
    end = time()
    return(rfr, end-start)
开发者ID:earlbellinger,项目名称:asteroseismology,代码行数:9,代码来源:subsetter.py


示例13: reg_skl_etr

def reg_skl_etr(param, data):
    [X_tr, X_cv, y_class_tr, y_class_cv, y_reg_tr, y_reg_cv] = data
    etr = ExtraTreesRegressor(n_estimators=param['n_estimators'],
                              max_features=param['max_features'],
                              n_jobs=param['n_jobs'],
                              random_state=param['random_state'])
    etr.fit(X_tr, y_reg_tr)
    pred = etr.predict(X_cv)
    RMSEScore = getscoreRMSE(y_reg_cv, pred)
    return RMSEScore, pred
开发者ID:matlabat,项目名称:Home-Depot-Search-Relevance,代码行数:10,代码来源:models_param.py


示例14: extra_trees_regressor

def extra_trees_regressor(x, y, n_estimators, max_depth):
    kf = KFold(len(x), n_folds=3)
    scores = []
    for train_index, test_index in kf:
        X_train, X_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]
        clf = ExtraTreesRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=0)
        clf.fit(X_train, y_train)
        scores.append(mean_squared_error(clf.predict(X_test), y_test) ** 0.5)
    return np.mean(scores)
开发者ID:milton0825,项目名称:yelp-lr,代码行数:10,代码来源:baseline_rating.py


示例15: MyExtraTreeReg

class MyExtraTreeReg(MyRegressor):
    def __init__(self, params=dict()):
        self._params = params
        self._extree = ExtraTreesRegressor(**(self._params))

    def update_params(self, updates):
        self._params.update(updates)
        self._extree = ExtraTreesRegressor(**(self._params))

    def fit(self, Xtrain, ytrain):
        self._extree.fit(Xtrain, ytrain)

    def predict(self, Xtest, option = None):
      return self._extree.predict(Xtest)

    def plt_feature_importance(self, fname_list, f_range = list()):
        importances = self._extree.feature_importances_

        std = np.std([tree.feature_importances_ for tree in self._extree.estimators_], axis=0)
        indices = np.argsort(importances)[::-1]

        fname_array = np.array(fname_list)

        if not f_range:
            f_range = range(indices.shape[0])

        n_f = len(f_range)

        plt.figure()
        plt.title("Extra Tree Feature importances")
        plt.barh(range(n_f), importances[indices[f_range]],
               color="b", xerr=std[indices[f_range]], ecolor='k',align="center")
        plt.yticks(range(n_f), fname_array[indices[f_range]])
        plt.ylim([-1, n_f])
        plt.show()


    def list_feature_importance(self, fname_list, f_range = list(), return_list = False):
        importances = self._extree.feature_importances_
        indices = np.argsort(importances)[::-1]

        print 'Extra tree feature ranking:'

        if not f_range :
            f_range = range(indices.shape[0])

        n_f = len(f_range)

        for i in range(n_f):
            f = f_range[i]
            print '{0:d}. feature[{1:d}]  {2:s}  ({3:f})'.format(f + 1, indices[f], fname_list[indices[f]], importances[indices[f]])

        if return_list:
            return [indices[f_range[i]] for i in range(n_f)]
开发者ID:tonyzhangrt,项目名称:wklearn,代码行数:54,代码来源:learner.py


示例16: algorithm_ExtraTrees

def algorithm_ExtraTrees(X_train,Y_train,X_validation,Y_validation, seed=7):


    # 训练模型
    scaler = StandardScaler().fit(X_train)
    rescaledX = scaler.transform(X_train)
    gbr = ExtraTreesRegressor(n_estimators=80)
    gbr.fit(X=rescaledX, y=Y_train)
    # 评估算法模型
    rescaledX_validation = scaler.transform(X_validation)
    predictions = gbr.predict(rescaledX_validation)
    print(mean_squared_error(Y_validation, predictions))
开发者ID:jiluhu,项目名称:flask_ml,代码行数:12,代码来源:sklearn_regression_lib.py


示例17: dummie_columns_extra_trees

def dummie_columns_extra_trees(train, test):
    from sklearn.ensemble import ExtraTreesRegressor
    print "-- {} --".format("Extremely Randomized Trees Regression using all but remarks")
    predicting_columns = list(train._get_numeric_data().columns.values)
    predicting_columns.remove("LISTPRICE")
    predicting_columns.remove("SOLDPRICE")
    rf = ExtraTreesRegressor(
        n_estimators=300, n_jobs=-1)
    rf.fit(train[predicting_columns], train["SOLDPRICE"])
    score = rf.score(test[predicting_columns], test["SOLDPRICE"])
    predictions = rf.predict(test[predicting_columns])
    sample_predictions(test, predictions)
    print "Accuracy: {}\n".format(score)
    return score, predictions
开发者ID:CurleySamuel,项目名称:Thesis,代码行数:14,代码来源:first_pass.py


示例18: baseline_extra

def baseline_extra(train_x, train_y,
                   test_x, test_y, n, d,
                   result_path="review_baseline_extra.txt"):
    predict = []
    clf = ExtraTreesRegressor(n_estimators=n,
                              max_depth=d,
                              random_state=0)
    clf = clf.fit(train_x, train_y)
    predict = clf.predict(test_x).tolist()
    result = pd.DataFrame([], columns=['review_count', 'predict'])
    result['review_count'] = test_y
    result['predict'] = predict
    result.to_csv(result_path, index=False)
    rmse = mean_squared_error(predict, test_y) ** 0.5
    return rmse
开发者ID:milton0825,项目名称:yelp-lr,代码行数:15,代码来源:baseline_review.py


示例19: simple_extremely_random_trees

def simple_extremely_random_trees(data_train_x, data_test_x, data_train_y, data_test_y):
    from sklearn.ensemble import ExtraTreesRegressor
    print "-- {} --".format("Extremely Randomized Trees Regression using all but remarks")
    rf = ExtraTreesRegressor(
        n_estimators=300,
        n_jobs=-1
    )
    rf.fit(data_train_x, data_train_y)
    sample_predictions(rf.predict(data_test_x), data_test_y)
    score = rf.score(data_test_x, data_test_y)
    cross_validated_scores = cross_val_score(
        rf, data_test_x, data_test_y, cv=5)
    print "MSE Accuracy: {}".format(score)
    print "MSE Across 5 Folds: {}".format(cross_validated_scores)
    print "95%% Confidence Interval: %0.3f (+/- %0.3f)\n" % (cross_validated_scores.mean(), cross_validated_scores.std() * 1.96)
开发者ID:CurleySamuel,项目名称:Thesis,代码行数:15,代码来源:second_pass.py


示例20: trainRegressorsAndSave

def trainRegressorsAndSave(computeScore=False):
    for db in dbs:
        if (not os.path.exists("clfs/" + db)):
            clf = ExtraTreesRegressor(n_estimators=500, random_state=1, n_jobs=-1)
            saveTrainedClassifier(db, clf)
        elif (computeScore):
            clf = joblib.load("clfs/" + db)

        if (computeScore):
            print("Loading test data...")
            loaded = loadDB(db + ".csv")
            X_test = loaded[:, 0:-1]
            y_test = loaded[:, -1]

            print("Normalized score is {}".format(clf.score(X_test, y_test)))
            X_test = y_test = 0
开发者ID:dtaralla,项目名称:hearthstone,代码行数:16,代码来源:hearthstone_utils.py



注:本文中的sklearn.ensemble.ExtraTreesRegressor类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python ensemble.GradientBoostingClassifier类代码示例发布时间:2022-05-27
下一篇:
Python ensemble.ExtraTreesClassifier类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap