本文整理汇总了Python中sklearn.datasets.load_boston函数的典型用法代码示例。如果您正苦于以下问题:Python load_boston函数的具体用法?Python load_boston怎么用?Python load_boston使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了load_boston函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_load_boston
def test_load_boston():
res = load_boston()
assert_equal(res.data.shape, (506, 13))
assert_equal(res.target.size, 506)
assert_equal(res.feature_names.size, 13)
assert_true(res.DESCR)
# test return_X_y option
X_y_tuple = load_boston(return_X_y=True)
bunch = load_boston()
assert_true(isinstance(X_y_tuple, tuple))
assert_array_equal(X_y_tuple[0], bunch.data)
assert_array_equal(X_y_tuple[1], bunch.target)
开发者ID:NazBen,项目名称:scikit-learn,代码行数:13,代码来源:test_base.py
示例2: get_cmap_scatter_plot
def get_cmap_scatter_plot():
boston = datasets.load_boston()
prices = boston['target']
lower_status = boston['data'][:,-1]
nox = boston['data'][:,4]
x, y = get_data_sources(x=lower_status, y=prices)
x_mapper, y_mapper = get_mappers(x, y)
color_source = ArrayDataSource(nox)
color_mapper = dc.reverse(dc.RdYlGn)(
DataRange1D(low=nox.min(), high=nox.max())
)
scatter_plot = ColormappedScatterPlot(
index=x, value=y,
index_mapper=x_mapper, value_mapper=y_mapper,
color_data=color_source,
color_mapper=color_mapper,
marker='circle',
title='Color represents nitric oxides concentration',
render_method='bruteforce',
**PLOT_DEFAULTS
)
add_axes(scatter_plot, x_label='Percent lower status in the population',
y_label='Median house prices')
return scatter_plot
开发者ID:5n1p,项目名称:chaco,代码行数:29,代码来源:create_plot_snapshots.py
示例3: test_regressors_int
def test_regressors_int():
# test if regressors can cope with integer labels (by converting them to
# float)
regressors = all_estimators(type_filter='regressor')
boston = load_boston()
X, y = boston.data, boston.target
X, y = shuffle(X, y, random_state=0)
X = StandardScaler().fit_transform(X)
y = np.random.randint(2, size=X.shape[0])
for name, Reg in regressors:
if Reg in dont_test or Reg in (CCA,):
continue
# catch deprecation warnings
with warnings.catch_warnings(record=True):
# separate estimators to control random seeds
reg1 = Reg()
reg2 = Reg()
set_random_state(reg1)
set_random_state(reg2)
if Reg in (_PLS, PLSCanonical, PLSRegression):
y_ = np.vstack([y, 2 * y + np.random.randint(2, size=len(y))])
y_ = y_.T
else:
y_ = y
# fit
reg1.fit(X, y_)
pred1 = reg1.predict(X)
reg2.fit(X, y_.astype(np.float))
pred2 = reg2.predict(X)
assert_array_almost_equal(pred1, pred2, 2, name)
开发者ID:nwf5d,项目名称:scikit-learn,代码行数:32,代码来源:test_common.py
示例4: load_data
def load_data():
"""Load the Boston dataset."""
boston = datasets.load_boston()
return boston
开发者ID:shoc2005,项目名称:P1,代码行数:7,代码来源:boston_housing.py
示例5: test_regressors_train
def test_regressors_train():
estimators = all_estimators()
regressors = [(name, E) for name, E in estimators if issubclass(E,
RegressorMixin)]
boston = load_boston()
X, y = boston.data, boston.target
X, y = shuffle(X, y, random_state=0)
# TODO: test with intercept
# TODO: test with multiple responses
X = Scaler().fit_transform(X)
y = Scaler().fit_transform(y)
for name, Reg in regressors:
if Reg in dont_test or Reg in meta_estimators:
continue
# catch deprecation warnings
with warnings.catch_warnings(record=True):
reg = Reg()
if hasattr(reg, 'alpha'):
reg.set_params(alpha=0.01)
# raises error on malformed input for fit
assert_raises(ValueError, reg.fit, X, y[:-1])
# fit
reg.fit(X, y)
reg.predict(X)
assert_greater(reg.score(X, y), 0.5)
开发者ID:arunchaganty,项目名称:scikit-learn,代码行数:26,代码来源:test_common.py
示例6: demo
def demo(X = None, y = None, test_size = 0.1):
if X == None:
boston = load_boston()
X = pd.DataFrame(boston.data)
y = pd.DataFrame(boston.target)
base_estimator = DecisionTreeRegressor(max_depth = 5)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
print X_train.shape
# If you want to compare with BaggingRegressor.
# bench = BaggingRegressor(base_estimator = base_estimator, n_estimators = 10, max_samples = 1, oob_score = True).fit(X_train, y_train)
# print bench.score(X_test, y_test)
# print mean_squared_error(bench.predict(X_test), y_test)
clf = BasicSegmenterEG_FEMPO(ngen=30,init_sample_percentage = 1, n_votes=10, n = 10, base_estimator = base_estimator,
unseen_x = X_test, unseen_y = y_test)
clf.fit(X_train, y_train)
print clf.score(X_test,y_test)
y = clf.predict(X_test)
print mean_squared_error(y, y_test)
print y.shape
return clf, X_test, y_test
开发者ID:bhanu-mnit,项目名称:EvoML,代码行数:29,代码来源:test_auto_segmentEG_FEMPO.py
示例7: test_rrf_vs_sklearn_reg
def test_rrf_vs_sklearn_reg(self):
"""Test R vs. sklearn on boston housing dataset. """
from sklearn.datasets import load_boston
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
boston = load_boston()
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target,
test_size=0.2, random_state=13)
n_samples, n_features = X_train.shape
mtry = int(np.floor(0.3 * n_features))
# do 100 trees
r_rf = RRFEstimatorR(**{'ntree': 100, 'nodesize': 1, 'replace': 0,
'mtry': mtry, 'corr.bias': False,
'sampsize': n_samples, 'random_state': 1234})
r_rf.fit(X_train, y_train)
y_pred = r_rf.predict(X_test)
r_mse = mean_squared_error(y_test, y_pred)
p_rf = RandomForestRegressor(n_estimators=100, min_samples_leaf=1, bootstrap=False,
max_features=mtry, random_state=1)
p_rf.fit(X_train, y_train)
y_pred = p_rf.predict(X_test)
p_mse = mean_squared_error(y_test, y_pred)
print('%.4f vs %.4f' % (r_mse, p_mse))
# should be roughly the same (7.6 vs. 7.2)
np.testing.assert_almost_equal(r_mse, p_mse, decimal=0)
开发者ID:tkincaid,项目名称:tkincaid.github.com,代码行数:29,代码来源:test_rrf_bm.py
示例8: test_template
def test_template(params={'objective': 'regression', 'metric': 'l2'},
X_y=load_boston(True), feval=mean_squared_error,
num_round=100, init_model=None, custom_eval=None,
early_stopping_rounds=10,
return_data=False, return_model=False):
params['verbose'], params['seed'] = -1, 42
X_train, X_test, y_train, y_test = train_test_split(*X_y, test_size=0.1, random_state=42)
lgb_train = lgb.Dataset(X_train, y_train, params=params)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, params=params)
if return_data:
return lgb_train, lgb_eval
evals_result = {}
gbm = lgb.train(params, lgb_train,
num_boost_round=num_round,
valid_sets=lgb_eval,
valid_names='eval',
verbose_eval=False,
feval=custom_eval,
evals_result=evals_result,
early_stopping_rounds=early_stopping_rounds,
init_model=init_model)
if return_model:
return gbm
else:
return evals_result, feval(y_test, gbm.predict(X_test, gbm.best_iteration))
开发者ID:kqdmqx,项目名称:LightGBM,代码行数:25,代码来源:test_engine.py
示例9: load_boston
def load_boston():
from sklearn.datasets import load_boston
boston = load_boston()
# print(boston.DESCR)
# print(boston.feature_names)
# CRIM : 人口1人当たりの犯罪発生数
# ZN : 25,000 平方フィート以上の住居区画の占める割合
# INDUS : 小売業以外の商業が占める面積の割合
# CHAS : チャールズ川によるダミー変数 (1: 川の周辺, 0: それ以外)
# NOX : NOx の濃度
# RM : 住居の平均部屋数
# AGE : 1940 年より前に建てられた物件の割合
# DIS : 5 つのボストン市の雇用施設からの距離 (重み付け済)
# RAD : 環状高速道路へのアクセスしやすさ
# TAX : $10,000 ドルあたりの不動産税率の総計
# PTRATIO : 町毎の児童と教師の比率
# B : 町毎の黒人 (Bk) の比率を次の式で表したもの。 1000(Bk – 0.63)^2
# LSTAT : 給与の低い職業に従事する人口の割合 (%)
# pp.pprint(boston.data)
# print(np.array(boston.data).shape)
# pp.pprint(boston.target) # house prices
X = boston.data
y = boston.target
return SklearnDataGenerator.shuffle(X, y)
开发者ID:Munetaka,项目名称:labo,代码行数:26,代码来源:sklearn_data_generator.py
示例10: get_data
def get_data():
data = load_boston()
clf = LinearRegression()
clf.fit(data.data, data.target)
predicted = clf.predict(data.data)
plt.figure(num=None, figsize=(14, 6), dpi=80, facecolor='w', edgecolor='k')
plt.scatter(data.target, predicted)
plt.plot([0, 50], [0, 50], '--k')
plt.axis('tight')
plt.xlabel('True price of Houses ($1000s)')
plt.ylabel('Predicted price of Houses ($1000s)')
img = StringIO.StringIO()
plt.savefig(img,bbox_inches='tight')
img.seek(0)
plt.close()
return img
开发者ID:abhishekgahlot,项目名称:ml-project,代码行数:31,代码来源:linear_regression.py
示例11: generate_data
def generate_data(case, sparse=False):
# Generate regression / classification data.
bunch = None
if case == 'regression':
bunch = datasets.load_boston()
elif case == 'classification':
bunch = datasets.fetch_20newsgroups_vectorized(subset='all')
X, y = shuffle(bunch.data, bunch.target)
offset = int(X.shape[0] * 0.8)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
if sparse:
X_train = csr_matrix(X_train)
X_test = csr_matrix(X_test)
else:
X_train = np.array(X_train)
X_test = np.array(X_test)
y_test = np.array(y_test)
y_train = np.array(y_train)
data = {
'X_train': X_train,
'X_test': X_test,
'y_train': y_train,
'y_test': y_test,
}
return data
开发者ID:0x0all,项目名称:machineLearning,代码行数:27,代码来源:plot_model_complexity_influence.py
示例12: get_bar_plot
def get_bar_plot():
boston = datasets.load_boston()
prices = boston['target']
ys, bin_edges = np.histogram(prices, bins=10)
ys = ys.astype('d') / ys.sum()
xs = (bin_edges[:-1] + bin_edges[1:]) / 2.0
x, y = get_data_sources(x=xs, y=ys)
x_mapper, y_mapper = get_mappers(x, y)
# we need to make the range of the x coordinate a bit larger, otherwise
# half of the first and last bar are cut
delta = bin_edges[1] - bin_edges[0]
x_mapper.range.low = xs[0] - delta / 2.
x_mapper.range.high = xs[-1] + delta / 2.
y_mapper.range.high += 0.02
bar_plot = BarPlot(
index = x,
value = y,
index_mapper = x_mapper,
value_mapper = y_mapper,
fill_color = 'blue',
bar_width = 3.0,
**PLOT_DEFAULTS
)
add_axes(bar_plot, x_label='Median house prices', y_label='Frequency')
return bar_plot
开发者ID:5n1p,项目名称:chaco,代码行数:32,代码来源:create_plot_snapshots.py
示例13: get_jitter_plot
def get_jitter_plot():
boston = datasets.load_boston()
prices = boston['target']
x, y = get_data_sources(y=prices)
x_mapper, y_mapper = get_mappers(x, y)
jitter_plot = JitterPlot(
index=y,
mapper=y_mapper,
marker='circle',
jitter_width=100,
**PLOT_DEFAULTS
)
jitter_plot.line_width = 1.
x_axis = PlotAxis(orientation='bottom',
title='Median house prices',
mapper=jitter_plot.mapper,
component=jitter_plot,
**AXIS_DEFAULTS)
jitter_plot.underlays.append(x_axis)
return jitter_plot
开发者ID:5n1p,项目名称:chaco,代码行数:25,代码来源:create_plot_snapshots.py
示例14: get_variable_size_scatter_plot
def get_variable_size_scatter_plot():
boston = datasets.load_boston()
prices = boston['target']
lower_status = boston['data'][:,-1]
tax = boston['data'][:,9]
x, y = get_data_sources(x=lower_status, y=prices)
x_mapper, y_mapper = get_mappers(x, y)
# normalize between 0 and 10
marker_size = tax / tax.max() * 10.
scatter_plot = ScatterPlot(
index=x, value=y,
index_mapper=x_mapper, value_mapper=y_mapper,
marker='circle',
marker_size=marker_size,
title='Size represents property-tax rate',
**PLOT_DEFAULTS
)
scatter_plot.color = (0.0, 1.0, 0.3, 0.4)
add_axes(scatter_plot, x_label='Percent lower status in the population',
y_label='Median house prices')
return scatter_plot
开发者ID:5n1p,项目名称:chaco,代码行数:26,代码来源:create_plot_snapshots.py
示例15: load_extended_boston
def load_extended_boston():
boston = load_boston()
X = boston.data
X = MinMaxScaler().fit_transform(boston.data)
X = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X)
return X, boston.target
开发者ID:MrGreenRubato,项目名称:notebooks,代码行数:7,代码来源:datasets.py
示例16: main
def main():
# ボストンデータセットを読み込む
boston = datasets.load_boston()
# 部屋の数
rooms = boston.data[:, 5]
# 家の値段
house_prices = boston.target
plt.scatter(rooms, house_prices, color="r")
# 最小二乗法で誤差が最も少なくなる直線を得る
x = np.array([[v, 1] for v in rooms]) # バイアス項を追加する
y = house_prices
(slope, bias), total_error, _, _ = np.linalg.lstsq(x, y)
# 得られた直線をプロットする
plt.plot(x[:, 0], slope * x[:, 0] + bias)
# 訓練誤差の RMSE
rmse = np.sqrt(total_error[0] / len(x))
msg = "RMSE (training): {0}".format(rmse)
print(msg)
# グラフを表示する
plt.xlabel("Number of Room")
plt.ylabel("Price of House ($1,000)")
plt.grid()
plt.show()
plt.savefig("image.png")
开发者ID:id774,项目名称:sandbox,代码行数:29,代码来源:boston_reg.py
示例17: main
def main():
# boston data sets
boston = datasets.load_boston()
# 部屋数
rooms = boston.data[:,5]
# 家の値段
house_prices = boston.target
# 部屋の数と家の値段の関係をプロットする。
plt.scatter(rooms, house_prices, color='r')
# 最小二乗法で誤差が最も少なくなる直線を得る
# x = np.array([rooms],np.one(len(rooms))).T
x = np.array([[v, 1] for v in rooms]) # バイアス項を追加する
y = house_prices
# print np.ones_like(rooms)
# 最小二乗法で誤差が最も少なくなる直線を得る
(slope,bias), total_error, _, _ = np.linalg.lstsq(x, y)
# 得られた直線をプロットする
plt.plot(x[:, 0], slope * x[:, 0] + bias)
# plt.xlabel('部屋の数')
# plt.ylabel('家の値段 (単位: 1000 ドル)')
plt.grid()
plt.xlabel('rooms')
plt.ylabel('price')
plt.show()
开发者ID:tsuboty,项目名称:LSM,代码行数:33,代码来源:boston.py
示例18: main
def main(unused_argv):
# Load dataset
boston = datasets.load_boston()
x, y = boston.data, boston.target
# Split dataset into train / test
x_train, x_test, y_train, y_test = model_selection.train_test_split(
x, y, test_size=0.2, random_state=42)
# Scale data (training set) to 0 mean and unit standard deviation.
scaler = preprocessing.StandardScaler()
x_train = scaler.fit_transform(x_train)
# Build 2 layer fully connected DNN with 10, 10 units respectively.
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
x_train)
regressor = tf.contrib.learn.DNNRegressor(
feature_columns=feature_columns, hidden_units=[10, 10])
# Fit
regressor.fit(x_train, y_train, steps=5000, batch_size=1)
# Transform
x_transformed = scaler.transform(x_test)
# Predict and score
y_predicted = list(regressor.predict(x_transformed, as_iterable=True))
score = metrics.mean_squared_error(y_predicted, y_test)
print('MSE: {0:f}'.format(score))
开发者ID:aravindvcyber,项目名称:tensorflow,代码行数:30,代码来源:boston.py
示例19: test_boston_housing_regression_with_sample_weights
def test_boston_housing_regression_with_sample_weights():
tm._skip_if_no_sklearn()
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston
from sklearn.cross_validation import KFold
boston = load_boston()
y = boston['target']
X = boston['data']
sample_weight = np.ones_like(y, 'float')
kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)
for train_index, test_index in kf:
xgb_model = xgb.XGBRegressor().fit(
X[train_index], y[train_index],
sample_weight=sample_weight[train_index]
)
preds = xgb_model.predict(X[test_index])
# test other params in XGBRegressor().fit
preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
labels = y[test_index]
assert mean_squared_error(preds, labels) < 25
assert mean_squared_error(preds2, labels) < 370
assert mean_squared_error(preds3, labels) < 25
assert mean_squared_error(preds4, labels) < 370
开发者ID:AlexisMignon,项目名称:xgboost,代码行数:29,代码来源:test_with_sklearn.py
示例20: overview
def overview():
boston = load_boston()
features = [
[0, 'CRIM', "per capita crime rate by town"],
[1, 'ZN', "proportion of residential land zoned for lots over 25,000 sq.ft."],
[2, 'INDUS', "proportion of non-retail business acres per town"],
[3, 'CHAS', "Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)"],
[4, 'NOX', "nitric oxides concentration (parts per 10 million)"],
[5, 'RM', "average number of rooms per dwelling"],
[6, 'AGE', "proportion of owner-occupied units built prior to 1940"],
[6, 'DIS', "weighted distances to five Boston employment centres"],
[7, 'RAD', "index of accessibility to radial highways"],
[8, 'TAX', "full-value property-tax rate per $10,000"],
[9, 'PTRATIO', "pupil-teacher ratio by town"],
[10, 'B', "1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town"],
[11, 'LSTAT', "% lower status of the population"],
[12, 'MEDV', "Median value of owner-occupied homes in $1000's"],
]
plot_row = 4
plot_col = 4
plt.figure(figsize=(10, 10))
for f in features:
print '{}:\t{}'.format(f[1], f[2])
for feature in features:
# plt.subplot(行数, 列数, 何番目のプロットか)
plt.subplot(plot_row, plot_col, feature[0] + 1)
plt.scatter(boston.data[:, feature[0]], boston.target)
plt.xlabel(feature[1])
plt.tight_layout()
开发者ID:achiku,项目名称:syakyou,代码行数:31,代码来源:boston.py
注:本文中的sklearn.datasets.load_boston函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论