本文整理汇总了Python中sklearn.ensemble.RandomForestRegressor类的典型用法代码示例。如果您正苦于以下问题:Python RandomForestRegressor类的具体用法?Python RandomForestRegressor怎么用?Python RandomForestRegressor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了RandomForestRegressor类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: train_year
def train_year(train_fea, trees):
values = train_fea['SaleYear'].values
years = sorted(list(set(values)))
rfs =[]
for i in range(0, len(years)):
print 'train model %d' % (years[i])
rf = RandomForestRegressor(n_estimators=trees, n_jobs=1, compute_importances = True)
y = train_fea[train_fea['SaleYear']==years[i]]
y_fea = y.copy()
del y_fea['SalePrice']
rf.fit(y_fea, y["SalePrice"])
rfs.append(rf)
errors = None
for i in range(1, len(years)):
pairs = get_pairs(years, i)
for p in pairs:
print 'compare %d, %d' % (p[0], p[1])
y1 = train_fea[train_fea['SaleYear']==p[0]]
y2 = train_fea[train_fea['SaleYear']==p[1]]
y1_fea, y2_fea = y1.copy(), y2.copy()
del y1_fea['SalePrice']
del y2_fea['SalePrice']
rf = rfs[years.index(p[0])]
y2_p = rf.predict(y2_fea)
y2_r = np.array([v for v in y2['SalePrice']])
error_rates = np.array(map(lambda x,y: math.fabs(x-y)/y, y2_p, y2_r))
if type(errors)==types.NoneType:
errors = pd.DataFrame({'dist':i, 'mean':error_rates.mean(), 'var':error_rates.var(), 'std':error_rates.std()}, index=[i])
else:
errors = errors.append(pd.DataFrame({'dist':i, 'mean':error_rates.mean(), 'var':error_rates.var(), 'std':error_rates.std()}, index=[i]))
errors_list = []
for i in range(1, len(years)):
errors_list.append(errors.ix[i]['mean'].mean())
return rfs, errors_list
开发者ID:zhangda,项目名称:bulldozers,代码行数:34,代码来源:yearbase.py
示例2: pipeline
def pipeline():
val = data[data.watch==1]
val_a_b = val[['item_id','store_code','a','b']]
val_y = val.label
val_x = val.drop(['label','watch','item_id','store_code','a','b'],axis=1)
train = data[(data.watch!=1)&(data.watch!=0)]
train_y = train.label
a = list(train.a)
b = list(train.b)
train_weight = []
for i in range(len(a)):
train_weight.append(min(a[i],b[i]))
train_weight = np.array(train_weight)
train_x = train.drop(['label','watch','item_id','store_code','a','b'],axis=1)
train_x.fillna(train_x.median(),inplace=True)
val_x.fillna(val_x.median(),inplace=True)
model = RandomForestRegressor(n_estimators=500,max_depth=5,max_features=0.6,n_jobs=-1,random_state=1024)
#train
model.fit(train_x,train_y, sample_weight=train_weight)
#predict val set
val_a_b['pred'] = model.predict(val_x)
val_a_b['y'] = val_y
cost = cal_cost(val_y.values,val_a_b.pred.values,val_a_b.a.values,val_a_b.b.values)
val_a_b.to_csv('val_{0}.csv'.format(cost[1]),index=None)
开发者ID:foxchopin,项目名称:CaiNiao-DemandForecast-StoragePlaning,代码行数:34,代码来源:rf.py
示例3: cross_validate
def cross_validate(features_target):
features = features_target[0]
target = features_target[1]
rf = RandomForestRegressor(
n_estimators=100, verbose=2, n_jobs=1, min_samples_split=10, compute_importances=True, random_state=1
)
cv = cross_validation.KFold(len(features), n_folds=10, indices=False)
# iterate through the training and test cross validation segments and
# run the classifier on each one, aggregating the results into a list
results = []
i = 1
for traincv, testcv in cv:
print "Running fold " + str(i)
fit = rf.fit(features[traincv], target[traincv])
predictions = fit.predict(features[testcv])
predictions = predictions.flatten()
for j in range(len(predictions)):
results.append((target[testcv][j], predictions[j]))
importance(rf)
i = i + 1
combined_auc(results)
开发者ID:EoinLawless,项目名称:CauseEffect,代码行数:26,代码来源:TrainTestValidate.py
示例4: RFscore_one
def RFscore_one(x,y,id):
folds=3
print "RFscore " + id
r = range(len(x))
np.random.shuffle(r)
x = x[r]
y = y[r]
x = (x - np.mean(x)) / np.std(x)
y = (y - np.mean(y)) / np.std(y)
x = np.array(x, ndmin=2)
y = np.array(y, ndmin=2)
x = x.T
y = y.T
rf = RandomForestRegressor(n_estimators=50, verbose=0,n_jobs=1,min_samples_split=10,compute_importances=True,random_state=1)
fit = rf.fit(x,y)
s = fit.score(x,y)
cv = cross_validation.KFold(len(x), n_folds=folds, indices=False)
score = 0
median = dist(y)
for traincv, testcv in cv:
fit = rf.fit(x[traincv], y[traincv])
score += fit.score(x[testcv], y[testcv])
score /= folds
score /= median
return score
开发者ID:EoinLawless,项目名称:CauseEffect,代码行数:32,代码来源:features.py
示例5: do_regression
def do_regression(df, j, i, k): # input is a pandas dataframe with columns as needed below
# output is a regression object trained to the data in the input dataframe
# convert dataframe info into a vector
y = df.loc[ (df['workingday'] == j) & (df['Hour'] == i) & (df['Year'] == 2011 + k), 'count' ].astype(int).values
x_1 = df.loc[ (df['workingday'] == j) & (df['Hour'] == i) & (df['Year'] == 2011 + k), 'humidity' ].astype(int).values
x_2 = df.loc[ (df['workingday'] == j) & (df['Hour'] == i) & (df['Year'] == 2011 + k), 'temp' ].astype(int).values
x = zip(x_1, x_2)
## Create linear regression object
#regr = linear_model.LinearRegression()
# create random forest object, should include all parameters
regr = RandomForestRegressor(n_estimators= 100)
#forest = DecisionTreeRegressor(max_depth = 4)
## Train the model using the training sets
regr.fit(x, y)
return regr
开发者ID:michaelbateman,项目名称:KagglePlayground,代码行数:25,代码来源:randomforest.py
示例6: regression
def regression(X_train, y_train, X_test, y_test):
"""
Train the regressor from Scikit-Learn.
"""
# Random forest regressor w/ param optimization
params = {'n_estimators':1000, 'criterion':'mse', 'max_depth':20, 'min_samples_split':1, #'estimators':400, depth:20
'min_samples_leaf':1, 'max_features':2, 'bootstrap':True, 'oob_score':False, #'max_features':'log2'
'n_jobs':32, 'random_state':0, 'verbose':0, 'min_density':None, 'max_leaf_nodes':None}
if config.DEBUG: params['verbose'] = 1
regr = RandomForestRegressor(**params)
# Train the model using the training sets
regr.fit(X_train, y_train)
return regr
# Plot the resutls
save_semeval_data.plot_results(regr, params, X_test, y_test, feature_names)
if config.DEBUG:
# Show the mean squared error
print("Residual sum of squares: %.2f" % np.mean((regr.predict(X_test) - y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regr.score(X_test, y_test))
return regr
开发者ID:BinbinBian,项目名称:semeval-relatedness,代码行数:26,代码来源:semeval_task1.py
示例7: fit
def fit(self, X, y, **kwargs):
for key, value in kwargs.iteritems():
if key in self.INITPARAMS.keys():
self.INITPARAMS[key] = value
model = RandomForestRegressor(**self.INITPARAMS)
model.fit(X, y)
self.model = model
开发者ID:DJRumble,项目名称:S2DS,代码行数:7,代码来源:estimator.py
示例8: random_learning
def random_learning(labels, train, test):
label_log=np.log1p(labels)
clf=RandomForestRegressor(n_estimators=50, n_jobs=3)
model=clf.fit(train, label_log)
preds1=model.predict(test)
preds=np.expm1(preds1)
return preds
开发者ID:nickmcadden,项目名称:Kaggle,代码行数:7,代码来源:cv_example.py
示例9: get_kernel
def get_kernel(train_data, test_data, label):
#Define forest (n_estimators = number of trees)
forest = RandomForestRegressor(n_estimators=1000, warm_start = True)
forest = forest.fit(train_data, label)
dataset = np.concatenate((train_data, test_data), axis=0)
SAMPLE_SIZE = len(dataset)
M = 100
#Loop that generates samples of the PDF
kernel_list = np.empty([M, SAMPLE_SIZE, SAMPLE_SIZE])
for m in range(M):
print("Building partial kernel: {}".format(m))
kernel_list[m,:,:] = get_partial_kernel(forest, dataset)
#Average the samples to compute the kernel
kernel = np.mean(kernel_list, axis=0)
# B = np.zeros((SAMPLE_SIZE, SAMPLE_SIZE))
# I = np.identity(SAMPLE_SIZE)
# alpha = 0.1
# for m in range(M):
# B += np.linalg.inv(kernel_list[m,:,:] + alpha * I)
# B *= M
# return B
return kernel
开发者ID:marthall,项目名称:random_forest_kernel,代码行数:31,代码来源:RegressionKernel.py
示例10: main
def main():
# read in data, parse into training and target sets
cols, train = read_data("../TrainingSet/ACT12_competition_training.csv", 1)
target = np.array([x[0] for x in train])
train = filter_cols(train, cols, "../selected/selected_12.txt")
# print("Train: ", len(train), " cols:", len(train[0]))
train = np.array(train)
# In this case we'll use a random forest, but this could be any classifier
cfr = RandomForestRegressor(n_estimators=500, max_features=(len(train[0]) // 3), n_jobs=8)
# Simple K-Fold cross validation. 10 folds.
cv = cross_validation.KFold(len(train), k=5, indices=False)
# iterate through the training and test cross validation segments and
# run the classifier on each one, aggregating the results into a list
results = []
for traincv, testcv in cv:
ft = cfr.fit(train[traincv], target[traincv])
pred = ft.predict(train[traincv])
print pred[:10]
score = ft.score(train[traincv], target[traincv])
results.append(score)
print "\tFold %d: %f" % (len(results), score)
# print out the mean of the cross-validated results
print "Results: " + str(np.array(results).mean())
开发者ID:ashispapu,项目名称:kaggle-1,代码行数:28,代码来源:randomForest.py
示例11: train_with_features
def train_with_features(self, features):
X = self.data_folder.truncate(self.A, features)
rfc = RandomForestRegressor()
rfc.fit(X, self.target)
return rfc
开发者ID:nicster,项目名称:bikesharingML,代码行数:7,代码来源:randomForest.py
示例12: test_rrf_vs_sklearn_reg
def test_rrf_vs_sklearn_reg(self):
"""Test R vs. sklearn on boston housing dataset. """
from sklearn.datasets import load_boston
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
boston = load_boston()
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target,
test_size=0.2, random_state=13)
n_samples, n_features = X_train.shape
mtry = int(np.floor(0.3 * n_features))
# do 100 trees
r_rf = RRFEstimatorR(**{'ntree': 100, 'nodesize': 1, 'replace': 0,
'mtry': mtry, 'corr.bias': False,
'sampsize': n_samples, 'random_state': 1234})
r_rf.fit(X_train, y_train)
y_pred = r_rf.predict(X_test)
r_mse = mean_squared_error(y_test, y_pred)
p_rf = RandomForestRegressor(n_estimators=100, min_samples_leaf=1, bootstrap=False,
max_features=mtry, random_state=1)
p_rf.fit(X_train, y_train)
y_pred = p_rf.predict(X_test)
p_mse = mean_squared_error(y_test, y_pred)
print('%.4f vs %.4f' % (r_mse, p_mse))
# should be roughly the same (7.6 vs. 7.2)
np.testing.assert_almost_equal(r_mse, p_mse, decimal=0)
开发者ID:tkincaid,项目名称:tkincaid.github.com,代码行数:29,代码来源:test_rrf_bm.py
示例13: round2
def round2(X, y):
# Set parameters
min_score = {}
for tree in [50, 100, 200, 500]:
for feature in ['auto', 'log2']:
model = RandomForestRegressor(n_estimators=tree, max_features=feature)
n = len(y)
# Perform 5-fold cross validation
scores = []
kf = KFold(n, n_folds=5, shuffle=True)
# Calculate root mean squared error for train/test for each fold
for train_idx, test_idx in kf:
X_train, X_test = X[train_idx], X[test_idx]
y_train, y_test = y[train_idx], y[test_idx]
model.fit(X_train, y_train)
prediction = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, prediction))
scores.append(rmse)
if len(min_score) == 0:
min_score['estimator'] = tree
min_score['max_feature'] = feature
min_score['scores'] = scores
else:
if np.mean(scores) < np.mean(min_score['scores']):
min_score['estimator'] = tree
min_score['max_feature'] = feature
min_score['scores'] = scores
print "Estimator:", tree
print "Max Features:", feature
print scores
print np.mean(scores)
return min_score
开发者ID:gokamoto,项目名称:AdvancedMLProject,代码行数:35,代码来源:model_RandomForest.py
示例14: RandomForestModel
class RandomForestModel(Model):
""" random forest model """
def __init__(self, *argv, **args):
super(RandomForestModel, self).__init__(*argv)
self.rf = RandomForestRegressor(**args)
def pretreat_feature(self):
# pre-handle about the feature data
pass
def train(self):
# train the samples
self.rf.fit(self.x, self.y)
def assess(self):
# assess the regression model
error = 0.0
for j in range(len(self.test_x)):
pre_val = self.predict(self.test_x[j])
error += (pre_val - self.test_y[j]) ** 2
print 'Training Error: ', error
def predict(self, x):
# predic the output of the x
return self.rf.predict(x)
def validate(self):
# use cross-validation to choose the best meta-parameter
pass
开发者ID:kymo,项目名称:kaggle,代码行数:31,代码来源:model.py
示例15: do_rf
def do_rf(filename):
df, Y = create_merged_dataset(filename)
rf = RandomForestRegressor(n_estimators=100)
X = df.drop(['driver', 'trip'], 1)
rf.fit(X, Y)
probs = rf.predict(X[:200])
return pd.DataFrame({'driver': df['driver'][:200], 'trip': df['trip'][:200], 'probs': probs})
开发者ID:fabiogm,项目名称:kaggle-driver-telematics,代码行数:7,代码来源:main.py
示例16: main
def main():
train = pd.read_csv('../train.csv', parse_dates=['datetime'])
train['hour'] = pd.DatetimeIndex(train['datetime']).hour
train['weekday'] = pd.DatetimeIndex(train['datetime']).weekday
train['isweekend'] = 0
train.loc[(train['weekday']==5) | (train['weekday']==6), 'isweekend'] = 1
test = pd.read_csv('../test.csv', parse_dates=['datetime'])
test['hour'] = pd.DatetimeIndex(test['datetime']).hour
test['weekday'] = pd.DatetimeIndex(test['datetime']).weekday
test['isweekend'] = 0
test.loc[(test['weekday']==5) | (test['weekday']==6), 'isweekend'] = 1
results = pd.DataFrame(columns=['datetime', 'count'])
for hour, test_subset in test.groupby(test['hour']):
train_subset = train[train['hour'] == hour]
model = RandomForestRegressor(n_estimators=100)
model.fit(np.array(get_features(train_subset)), np.array(train_subset['count']))
predictions = model.predict(np.array(get_features(test_subset)))
dt = test_subset['datetime']
predictions = pd.Series(predictions, index=dt.index)
res = pd.concat([dt, predictions], axis=1)
res.columns=['datetime', 'count']
results = pd.concat([results, res])
results['count'] = results['count'].astype('int64')
results = results.sort('datetime')
results.to_csv('../submissions/seventhSubmission.csv', index=False)
开发者ID:bhrzslm,项目名称:ml_challenges,代码行数:29,代码来源:model_5_weekday.py
示例17: build_random_forest_regressor
def build_random_forest_regressor(X_test, X_train_full, y_train_full):
print "Building random forest regressor..."
rf = RandomForestRegressor(n_estimators=800)
probas_rf = rf.fit(X_train_full, y_train_full).predict(X_test)
return probas_rf
开发者ID:DarioBernardo,项目名称:Kaggle,代码行数:7,代码来源:BlendedRegressorsCV.py
示例18: cross_val
def cross_val(seq, ft):
n_folds = 10
X, y = load_train_data(seq, ft)
print('%d-fold cross validation. Dataset: %d samples, %d features' % (n_folds, X.shape[0], X.shape[1]))
kf = KFold(len(y), n_folds=n_folds)
n_est = range(30, 110, 20)
results = []
for n_estimators in n_est:
scores = []
for i, (train, test) in enumerate(kf):
rf = RandomForestRegressor(n_estimators=n_estimators, n_jobs=mp.cpu_count())
# the (default) score for each regression tree in the ensemble is regression
# r2 determination coefficient (e.g., how much variance in y is explained
# by the model)
# https://www.khanacademy.org/math/probability/regression/regression-correlation/v/r-squared-or-coefficient-of-determination
rf.fit(X[train], y[train])
if False:
y_pred = rf.predict(X[test])
score = mean_squared_error(y_pred, y[test])
else:
score = rf.score(X[test], y[test])
scores.append(score)
scores = np.array(scores)
print("n_estimators=%d; accuracy (R^2 score): %0.2f (+/- %0.2f)" % (n_estimators, scores.mean(), scores.std() * 2))
results.append([seq, ft, X.shape[0], n_estimators, scores.mean(), scores.std()*2])
return results
开发者ID:alexkreimer,项目名称:monos,代码行数:30,代码来源:fit.py
示例19: _create_random_forest
def _create_random_forest(self, current_param={}):
combined_param = dict(self.params, **current_param)
clf = RandomForestRegressor()
clf.set_params(**combined_param)
clf = clf.fit(self.Xtr, self.Ytr)
return clf
开发者ID:CS178,项目名称:KaggleRandomForestTreeRegressor,代码行数:7,代码来源:KaggleRandomForestTreeRegressor.py
示例20: fill_missing_age
def fill_missing_age(df):
#把已有的数值型特征取出来丢进Random Forest Regressor 中
age_df = df[['Age','Fare','Parch','SibSp','Pclass']]
#print age_df
#把乘客分成已知年龄和未知年龄两部分
known_age = age_df[age_df.Age.notnull()].as_matrix()
unknown_age = age_df[age_df.Age.isnull()].as_matrix()
# print "known_age......."
# print known_age
# print "unknown age ........"
# print unknown_age
# 目标年龄
y=known_age[:,0]
# 特征属性值
x=known_age[:,1:]
#fit 到RandomForestRegressor之中
RFR=RandomForestRegressor(random_state=0,n_estimators=2000,n_jobs=-1)
RFR.fit(x,y)
#用得到的模型进行未知年龄结果预测
predictedAge= RFR.predict(unknown_age[:,1::])
#用预测的结果填补原缺失数据
df.loc[(df.Age.isnull()),'Age']=predictedAge
return df,RFR
开发者ID:qimuyunduan,项目名称:ML,代码行数:29,代码来源:Titanic.py
注:本文中的sklearn.ensemble.RandomForestRegressor类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论