• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python datasets.load_boston函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.datasets.load_boston函数的典型用法代码示例。如果您正苦于以下问题:Python load_boston函数的具体用法?Python load_boston怎么用?Python load_boston使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了load_boston函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_load_boston

def test_load_boston():
    res = load_boston()
    assert_equal(res.data.shape, (506, 13))
    assert_equal(res.target.size, 506)
    assert_equal(res.feature_names.size, 13)
    assert_true(res.DESCR)

    # test return_X_y option
    X_y_tuple = load_boston(return_X_y=True)
    bunch = load_boston()
    assert_true(isinstance(X_y_tuple, tuple))
    assert_array_equal(X_y_tuple[0], bunch.data)
    assert_array_equal(X_y_tuple[1], bunch.target)
开发者ID:NazBen,项目名称:scikit-learn,代码行数:13,代码来源:test_base.py


示例2: get_cmap_scatter_plot

def get_cmap_scatter_plot():
    boston = datasets.load_boston()
    prices = boston['target']
    lower_status = boston['data'][:,-1]
    nox = boston['data'][:,4]

    x, y = get_data_sources(x=lower_status, y=prices)
    x_mapper, y_mapper = get_mappers(x, y)

    color_source = ArrayDataSource(nox)
    color_mapper = dc.reverse(dc.RdYlGn)(
        DataRange1D(low=nox.min(), high=nox.max())
    )

    scatter_plot = ColormappedScatterPlot(
        index=x, value=y,
        index_mapper=x_mapper, value_mapper=y_mapper,
        color_data=color_source,
        color_mapper=color_mapper,
        marker='circle',
        title='Color represents nitric oxides concentration',
        render_method='bruteforce',
        **PLOT_DEFAULTS
    )

    add_axes(scatter_plot, x_label='Percent lower status in the population',
             y_label='Median house prices')

    return scatter_plot
开发者ID:5n1p,项目名称:chaco,代码行数:29,代码来源:create_plot_snapshots.py


示例3: test_regressors_int

def test_regressors_int():
    # test if regressors can cope with integer labels (by converting them to
    # float)
    regressors = all_estimators(type_filter='regressor')
    boston = load_boston()
    X, y = boston.data, boston.target
    X, y = shuffle(X, y, random_state=0)
    X = StandardScaler().fit_transform(X)
    y = np.random.randint(2, size=X.shape[0])
    for name, Reg in regressors:
        if Reg in dont_test or Reg in (CCA,):
            continue
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            # separate estimators to control random seeds
            reg1 = Reg()
            reg2 = Reg()
        set_random_state(reg1)
        set_random_state(reg2)

        if Reg in (_PLS, PLSCanonical, PLSRegression):
            y_ = np.vstack([y, 2 * y + np.random.randint(2, size=len(y))])
            y_ = y_.T
        else:
            y_ = y

        # fit
        reg1.fit(X, y_)
        pred1 = reg1.predict(X)
        reg2.fit(X, y_.astype(np.float))
        pred2 = reg2.predict(X)
        assert_array_almost_equal(pred1, pred2, 2, name)
开发者ID:nwf5d,项目名称:scikit-learn,代码行数:32,代码来源:test_common.py


示例4: load_data

def load_data():
    """Load the Boston dataset."""

    boston = datasets.load_boston()
        
    
    return boston
开发者ID:shoc2005,项目名称:P1,代码行数:7,代码来源:boston_housing.py


示例5: test_regressors_train

def test_regressors_train():
    estimators = all_estimators()
    regressors = [(name, E) for name, E in estimators if issubclass(E,
        RegressorMixin)]
    boston = load_boston()
    X, y = boston.data, boston.target
    X, y = shuffle(X, y, random_state=0)
    # TODO: test with intercept
    # TODO: test with multiple responses
    X = Scaler().fit_transform(X)
    y = Scaler().fit_transform(y)
    for name, Reg in regressors:
        if Reg in dont_test or Reg in meta_estimators:
            continue
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            reg = Reg()
        if hasattr(reg, 'alpha'):
            reg.set_params(alpha=0.01)

        # raises error on malformed input for fit
        assert_raises(ValueError, reg.fit, X, y[:-1])
        # fit
        reg.fit(X, y)
        reg.predict(X)
        assert_greater(reg.score(X, y), 0.5)
开发者ID:arunchaganty,项目名称:scikit-learn,代码行数:26,代码来源:test_common.py


示例6: demo

def demo(X = None, y = None, test_size = 0.1):
    
    if X == None:
        boston = load_boston()
        X = pd.DataFrame(boston.data)
        y = pd.DataFrame(boston.target)



    base_estimator = DecisionTreeRegressor(max_depth = 5)


    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    print X_train.shape

    # If you want to compare with BaggingRegressor.
    # bench =  BaggingRegressor(base_estimator = base_estimator, n_estimators = 10, max_samples = 1, oob_score = True).fit(X_train, y_train)
    # print bench.score(X_test, y_test)
    # print mean_squared_error(bench.predict(X_test), y_test)

    clf = BasicSegmenterEG_FEMPO(ngen=30,init_sample_percentage = 1, n_votes=10, n = 10, base_estimator = base_estimator,
        unseen_x = X_test, unseen_y = y_test)
    clf.fit(X_train, y_train)
    print clf.score(X_test,y_test)
    y = clf.predict(X_test)
    print mean_squared_error(y, y_test)
    print y.shape

    return clf, X_test, y_test
开发者ID:bhanu-mnit,项目名称:EvoML,代码行数:29,代码来源:test_auto_segmentEG_FEMPO.py


示例7: test_rrf_vs_sklearn_reg

    def test_rrf_vs_sklearn_reg(self):
        """Test R vs. sklearn on boston housing dataset. """
        from sklearn.datasets import load_boston
        from sklearn.cross_validation import train_test_split
        from sklearn.metrics import mean_squared_error
        from sklearn.ensemble import RandomForestRegressor

        boston = load_boston()
        X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target,
                                                            test_size=0.2, random_state=13)

        n_samples, n_features = X_train.shape
        mtry = int(np.floor(0.3 * n_features))
        # do 100 trees
        r_rf = RRFEstimatorR(**{'ntree': 100, 'nodesize': 1, 'replace': 0,
                                'mtry': mtry, 'corr.bias': False,
                                'sampsize': n_samples, 'random_state': 1234})
        r_rf.fit(X_train, y_train)
        y_pred = r_rf.predict(X_test)
        r_mse = mean_squared_error(y_test, y_pred)

        p_rf = RandomForestRegressor(n_estimators=100, min_samples_leaf=1, bootstrap=False,
                                     max_features=mtry, random_state=1)
        p_rf.fit(X_train, y_train)
        y_pred = p_rf.predict(X_test)
        p_mse = mean_squared_error(y_test, y_pred)
        print('%.4f vs %.4f' % (r_mse, p_mse))
        # should be roughly the same (7.6 vs. 7.2)
        np.testing.assert_almost_equal(r_mse, p_mse, decimal=0)
开发者ID:tkincaid,项目名称:tkincaid.github.com,代码行数:29,代码来源:test_rrf_bm.py


示例8: test_template

 def test_template(params={'objective': 'regression', 'metric': 'l2'},
                   X_y=load_boston(True), feval=mean_squared_error,
                   num_round=100, init_model=None, custom_eval=None,
                   early_stopping_rounds=10,
                   return_data=False, return_model=False):
     params['verbose'], params['seed'] = -1, 42
     X_train, X_test, y_train, y_test = train_test_split(*X_y, test_size=0.1, random_state=42)
     lgb_train = lgb.Dataset(X_train, y_train, params=params)
     lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, params=params)
     if return_data:
         return lgb_train, lgb_eval
     evals_result = {}
     gbm = lgb.train(params, lgb_train,
                     num_boost_round=num_round,
                     valid_sets=lgb_eval,
                     valid_names='eval',
                     verbose_eval=False,
                     feval=custom_eval,
                     evals_result=evals_result,
                     early_stopping_rounds=early_stopping_rounds,
                     init_model=init_model)
     if return_model:
         return gbm
     else:
         return evals_result, feval(y_test, gbm.predict(X_test, gbm.best_iteration))
开发者ID:kqdmqx,项目名称:LightGBM,代码行数:25,代码来源:test_engine.py


示例9: load_boston

    def load_boston():
        from sklearn.datasets import load_boston
        boston = load_boston()
        # print(boston.DESCR)

        # print(boston.feature_names)
        # CRIM      : 人口1人当たりの犯罪発生数
        # ZN        : 25,000 平方フィート以上の住居区画の占める割合
        # INDUS     : 小売業以外の商業が占める面積の割合
        # CHAS      : チャールズ川によるダミー変数 (1: 川の周辺, 0: それ以外)
        # NOX       : NOx の濃度
        # RM        : 住居の平均部屋数
        # AGE       : 1940 年より前に建てられた物件の割合
        # DIS       : 5 つのボストン市の雇用施設からの距離 (重み付け済)
        # RAD       : 環状高速道路へのアクセスしやすさ
        # TAX       : $10,000 ドルあたりの不動産税率の総計
        # PTRATIO   : 町毎の児童と教師の比率
        # B         : 町毎の黒人 (Bk) の比率を次の式で表したもの。 1000(Bk – 0.63)^2
        # LSTAT     : 給与の低い職業に従事する人口の割合 (%)

        # pp.pprint(boston.data)
        # print(np.array(boston.data).shape)
        # pp.pprint(boston.target) # house prices
        X = boston.data
        y = boston.target
        return SklearnDataGenerator.shuffle(X, y)
开发者ID:Munetaka,项目名称:labo,代码行数:26,代码来源:sklearn_data_generator.py


示例10: get_data

def get_data():

    data = load_boston()

    clf = LinearRegression()

    clf.fit(data.data, data.target)

    predicted = clf.predict(data.data)

    plt.figure(num=None, figsize=(14, 6), dpi=80, facecolor='w', edgecolor='k')
    
    plt.scatter(data.target, predicted)
    
    plt.plot([0, 50], [0, 50], '--k')
    
    plt.axis('tight')
    
    plt.xlabel('True price of Houses ($1000s)')
    
    plt.ylabel('Predicted price of Houses ($1000s)')
        
    img = StringIO.StringIO()
    
    plt.savefig(img,bbox_inches='tight')
    
    img.seek(0)
    
    plt.close()

    return img
    
开发者ID:abhishekgahlot,项目名称:ml-project,代码行数:31,代码来源:linear_regression.py


示例11: generate_data

    def generate_data(case, sparse=False):
        # Generate regression / classification data. 
        bunch = None 
        if case == 'regression':
            bunch = datasets.load_boston()
        elif case == 'classification': 
            bunch = datasets.fetch_20newsgroups_vectorized(subset='all')
        X, y = shuffle(bunch.data, bunch.target)
        offset = int(X.shape[0] * 0.8) 
        X_train, y_train = X[:offset], y[:offset]
        X_test, y_test = X[offset:], y[offset:] 
        if sparse:
            X_train = csr_matrix(X_train)
            X_test = csr_matrix(X_test)
        else:
            X_train = np.array(X_train)
            X_test = np.array(X_test)
        y_test = np.array(y_test)
        y_train = np.array(y_train)
        data = {
            'X_train': X_train,
            'X_test': X_test,
            'y_train': y_train,
            'y_test': y_test,
        }

        return data 
开发者ID:0x0all,项目名称:machineLearning,代码行数:27,代码来源:plot_model_complexity_influence.py


示例12: get_bar_plot

def get_bar_plot():
    boston = datasets.load_boston()
    prices = boston['target']

    ys, bin_edges = np.histogram(prices, bins=10)
    ys = ys.astype('d') / ys.sum()
    xs = (bin_edges[:-1] + bin_edges[1:]) / 2.0

    x, y = get_data_sources(x=xs, y=ys)
    x_mapper, y_mapper = get_mappers(x, y)

    # we need to make the range of the x coordinate a bit larger, otherwise
    # half of the first and last bar are cut
    delta = bin_edges[1] - bin_edges[0]
    x_mapper.range.low = xs[0] - delta / 2.
    x_mapper.range.high = xs[-1] + delta / 2.

    y_mapper.range.high += 0.02

    bar_plot = BarPlot(
        index = x,
        value = y,
        index_mapper = x_mapper,
        value_mapper = y_mapper,
        fill_color = 'blue',
        bar_width = 3.0,
        **PLOT_DEFAULTS
    )

    add_axes(bar_plot, x_label='Median house prices', y_label='Frequency')

    return bar_plot
开发者ID:5n1p,项目名称:chaco,代码行数:32,代码来源:create_plot_snapshots.py


示例13: get_jitter_plot

def get_jitter_plot():
    boston = datasets.load_boston()
    prices = boston['target']

    x, y = get_data_sources(y=prices)
    x_mapper, y_mapper = get_mappers(x, y)

    jitter_plot = JitterPlot(
        index=y,
        mapper=y_mapper,
        marker='circle',
        jitter_width=100,
        **PLOT_DEFAULTS
    )
    jitter_plot.line_width = 1.

    x_axis = PlotAxis(orientation='bottom',
                      title='Median house prices',
                      mapper=jitter_plot.mapper,
                      component=jitter_plot,
                      **AXIS_DEFAULTS)

    jitter_plot.underlays.append(x_axis)

    return jitter_plot
开发者ID:5n1p,项目名称:chaco,代码行数:25,代码来源:create_plot_snapshots.py


示例14: get_variable_size_scatter_plot

def get_variable_size_scatter_plot():
    boston = datasets.load_boston()
    prices = boston['target']
    lower_status = boston['data'][:,-1]
    tax = boston['data'][:,9]

    x, y = get_data_sources(x=lower_status, y=prices)
    x_mapper, y_mapper = get_mappers(x, y)

    # normalize between 0 and 10
    marker_size = tax / tax.max() * 10.

    scatter_plot = ScatterPlot(
        index=x, value=y,
        index_mapper=x_mapper, value_mapper=y_mapper,
        marker='circle',
        marker_size=marker_size,
        title='Size represents property-tax rate',
        **PLOT_DEFAULTS
    )
    scatter_plot.color = (0.0, 1.0, 0.3, 0.4)

    add_axes(scatter_plot, x_label='Percent lower status in the population',
             y_label='Median house prices')

    return scatter_plot
开发者ID:5n1p,项目名称:chaco,代码行数:26,代码来源:create_plot_snapshots.py


示例15: load_extended_boston

def load_extended_boston():
    boston = load_boston()
    X = boston.data

    X = MinMaxScaler().fit_transform(boston.data)
    X = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X)
    return X, boston.target
开发者ID:MrGreenRubato,项目名称:notebooks,代码行数:7,代码来源:datasets.py


示例16: main

def main():
    # ボストンデータセットを読み込む
    boston = datasets.load_boston()
    # 部屋の数
    rooms = boston.data[:, 5]
    # 家の値段
    house_prices = boston.target

    plt.scatter(rooms, house_prices, color="r")

    # 最小二乗法で誤差が最も少なくなる直線を得る
    x = np.array([[v, 1] for v in rooms])  # バイアス項を追加する
    y = house_prices
    (slope, bias), total_error, _, _ = np.linalg.lstsq(x, y)

    # 得られた直線をプロットする
    plt.plot(x[:, 0], slope * x[:, 0] + bias)

    # 訓練誤差の RMSE
    rmse = np.sqrt(total_error[0] / len(x))
    msg = "RMSE (training): {0}".format(rmse)
    print(msg)

    # グラフを表示する
    plt.xlabel("Number of Room")
    plt.ylabel("Price of House ($1,000)")
    plt.grid()
    plt.show()
    plt.savefig("image.png")
开发者ID:id774,项目名称:sandbox,代码行数:29,代码来源:boston_reg.py


示例17: main

def main():

	# boston data sets
	boston = datasets.load_boston()

	# 部屋数
	rooms = boston.data[:,5]

	# 家の値段
	house_prices = boston.target

	# 部屋の数と家の値段の関係をプロットする。
	plt.scatter(rooms, house_prices, color='r')


	# 最小二乗法で誤差が最も少なくなる直線を得る
	# x = np.array([rooms],np.one(len(rooms))).T
	x = np.array([[v, 1] for v in rooms])  # バイアス項を追加する
	y = house_prices

	# print np.ones_like(rooms)

	# 最小二乗法で誤差が最も少なくなる直線を得る
	(slope,bias), total_error, _, _ = np.linalg.lstsq(x, y)

	# 得られた直線をプロットする
	plt.plot(x[:, 0], slope * x[:, 0] + bias)
	# plt.xlabel('部屋の数')
	# plt.ylabel('家の値段 (単位: 1000 ドル)')
	plt.grid()
	plt.xlabel('rooms')
	plt.ylabel('price')
	plt.show()
开发者ID:tsuboty,项目名称:LSM,代码行数:33,代码来源:boston.py


示例18: main

def main(unused_argv):
  # Load dataset
  boston = datasets.load_boston()
  x, y = boston.data, boston.target

  # Split dataset into train / test
  x_train, x_test, y_train, y_test = model_selection.train_test_split(
      x, y, test_size=0.2, random_state=42)

  # Scale data (training set) to 0 mean and unit standard deviation.
  scaler = preprocessing.StandardScaler()
  x_train = scaler.fit_transform(x_train)

  # Build 2 layer fully connected DNN with 10, 10 units respectively.
  feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
      x_train)
  regressor = tf.contrib.learn.DNNRegressor(
      feature_columns=feature_columns, hidden_units=[10, 10])

  # Fit
  regressor.fit(x_train, y_train, steps=5000, batch_size=1)
  
  # Transform
  x_transformed = scaler.transform(x_test)
  
  # Predict and score
  y_predicted = list(regressor.predict(x_transformed, as_iterable=True))
  score = metrics.mean_squared_error(y_predicted, y_test)

  print('MSE: {0:f}'.format(score))
开发者ID:aravindvcyber,项目名称:tensorflow,代码行数:30,代码来源:boston.py


示例19: test_boston_housing_regression_with_sample_weights

def test_boston_housing_regression_with_sample_weights():
    tm._skip_if_no_sklearn()
    from sklearn.metrics import mean_squared_error
    from sklearn.datasets import load_boston
    from sklearn.cross_validation import KFold

    boston = load_boston()
    y = boston['target']
    X = boston['data']
    sample_weight = np.ones_like(y, 'float')
    kf = KFold(y.shape[0], n_folds=2, shuffle=True, random_state=rng)

    for train_index, test_index in kf:
        xgb_model = xgb.XGBRegressor().fit(
            X[train_index], y[train_index],
            sample_weight=sample_weight[train_index]
        )

        preds = xgb_model.predict(X[test_index])
        # test other params in XGBRegressor().fit
        preds2 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=3)
        preds3 = xgb_model.predict(X[test_index], output_margin=True, ntree_limit=0)
        preds4 = xgb_model.predict(X[test_index], output_margin=False, ntree_limit=3)
        labels = y[test_index]

        assert mean_squared_error(preds, labels) < 25
        assert mean_squared_error(preds2, labels) < 370
        assert mean_squared_error(preds3, labels) < 25
        assert mean_squared_error(preds4, labels) < 370
开发者ID:AlexisMignon,项目名称:xgboost,代码行数:29,代码来源:test_with_sklearn.py


示例20: overview

def overview():
    boston = load_boston()
    features = [
        [0, 'CRIM', "per capita crime rate by town"],
        [1, 'ZN', "proportion of residential land zoned for lots over 25,000 sq.ft."],
        [2, 'INDUS', "proportion of non-retail business acres per town"],
        [3, 'CHAS', "Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)"],
        [4, 'NOX', "nitric oxides concentration (parts per 10 million)"],
        [5, 'RM', "average number of rooms per dwelling"],
        [6, 'AGE', "proportion of owner-occupied units built prior to 1940"],
        [6, 'DIS', "weighted distances to five Boston employment centres"],
        [7, 'RAD', "index of accessibility to radial highways"],
        [8, 'TAX', "full-value property-tax rate per $10,000"],
        [9, 'PTRATIO', "pupil-teacher ratio by town"],
        [10, 'B', "1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town"],
        [11, 'LSTAT', "% lower status of the population"],
        [12, 'MEDV', "Median value of owner-occupied homes in $1000's"],
    ]

    plot_row = 4
    plot_col = 4
    plt.figure(figsize=(10, 10))
    for f in features:
        print '{}:\t{}'.format(f[1], f[2])

    for feature in features:
        # plt.subplot(行数, 列数, 何番目のプロットか)
        plt.subplot(plot_row, plot_col, feature[0] + 1)
        plt.scatter(boston.data[:, feature[0]], boston.target)
        plt.xlabel(feature[1])
    plt.tight_layout()
开发者ID:achiku,项目名称:syakyou,代码行数:31,代码来源:boston.py



注:本文中的sklearn.datasets.load_boston函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python datasets.load_breast_cancer函数代码示例发布时间:2022-05-27
下一篇:
Python datasets.get_data_home函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap