• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python datasets.load_breast_cancer函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.datasets.load_breast_cancer函数的典型用法代码示例。如果您正苦于以下问题:Python load_breast_cancer函数的具体用法?Python load_breast_cancer怎么用?Python load_breast_cancer使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了load_breast_cancer函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_load_breast_cancer

def test_load_breast_cancer():
    res = load_breast_cancer()
    assert_equal(res.data.shape, (569, 30))
    assert_equal(res.target.size, 569)
    assert_equal(res.target_names.size, 2)
    assert_true(res.DESCR)

    # test return_X_y option
    X_y_tuple = load_breast_cancer(return_X_y=True)
    bunch = load_breast_cancer()
    assert_true(isinstance(X_y_tuple, tuple))
    assert_array_equal(X_y_tuple[0], bunch.data)
    assert_array_equal(X_y_tuple[1], bunch.target)
开发者ID:NazBen,项目名称:scikit-learn,代码行数:13,代码来源:test_base.py


示例2: Breast_cancer

def Breast_cancer(training_size, test_size, n, PLOT_DATA):
    class_labels = [r'A', r'B']
    data, target = datasets.load_breast_cancer(True)
    sample_train, sample_test, label_train, label_test = train_test_split(data, target, test_size=0.3, random_state=12)

    # Now we standarize for gaussian around 0 with unit variance
    std_scale = StandardScaler().fit(sample_train)
    sample_train = std_scale.transform(sample_train)
    sample_test = std_scale.transform(sample_test)

    # Now reduce number of features to number of qubits
    pca = PCA(n_components=n).fit(sample_train)
    sample_train = pca.transform(sample_train)
    sample_test = pca.transform(sample_test)

    # Scale to the range (-1,+1)
    samples = np.append(sample_train, sample_test, axis=0)
    minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
    sample_train = minmax_scale.transform(sample_train)
    sample_test = minmax_scale.transform(sample_test)

    # Pick training size number of samples from each distro
    training_input = {key: (sample_train[label_train == k, :])[:training_size] for k, key in enumerate(class_labels)}
    test_input = {key: (sample_train[label_train == k, :])[training_size:(
        training_size+test_size)] for k, key in enumerate(class_labels)}

    if PLOT_DATA:
        for k in range(0, 2):
            plt.scatter(sample_train[label_train == k, 0][:training_size],
                        sample_train[label_train == k, 1][:training_size])

        plt.title("PCA dim. reduced Breast cancer dataset")
        plt.show()

    return sample_train, training_input, test_input, class_labels
开发者ID:GiuseppeOrlando878776,项目名称:qiskit-tutorials,代码行数:35,代码来源:svm_datasets.py


示例3: test_dt

def test_dt():
    cancer = load_breast_cancer()
    X, y = cancer.data, cancer.target
    feature_names = cancer.feature_names

    sk_dt = SKDT(random_state=1, max_depth=3)
    our_dt = ClassificationTree(feature_names=feature_names, random_state=1)

    sk_dt.fit(X, y)
    our_dt.fit(X, y)

    sk_pred = sk_dt.predict_proba(X)
    our_pred = our_dt.predict_proba(X)
    assert np.allclose(sk_pred, our_pred)

    sk_pred = sk_dt.predict(X)
    our_pred = our_dt.predict(X)
    assert np.allclose(sk_pred, our_pred)

    # With labels
    local_expl = our_dt.explain_local(X, y)
    local_viz = local_expl.visualize(0)
    assert local_viz is not None

    # Without labels
    local_expl = our_dt.explain_local(X)
    local_viz = local_expl.visualize(0)
    assert local_viz is not None

    global_expl = our_dt.explain_global()
    global_viz = global_expl.visualize()
    assert global_viz is not None
开发者ID:caskeep,项目名称:interpret,代码行数:32,代码来源:test_decisiontree.py


示例4: test_RFECV

def test_RFECV():
    from sklearn.datasets import load_boston
    from sklearn.datasets import load_breast_cancer
    from sklearn.datasets import load_iris
    from sklearn.feature_selection import RFECV

    # Regression
    X, y = load_boston(return_X_y=True)
    bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1,
                            n_estimators=10, n_jobs=1,
                            objective='reg:squarederror',
                            random_state=0, verbosity=0)
    rfecv = RFECV(
        estimator=bst, step=1, cv=3, scoring='neg_mean_squared_error')
    rfecv.fit(X, y)

    # Binary classification
    X, y = load_breast_cancer(return_X_y=True)
    bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1,
                            n_estimators=10, n_jobs=1,
                            objective='binary:logistic',
                            random_state=0, verbosity=0)
    rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='roc_auc')
    rfecv.fit(X, y)

    # Multi-class classification
    X, y = load_iris(return_X_y=True)
    bst = xgb.XGBClassifier(base_score=0.4, booster='gblinear',
                            learning_rate=0.1,
                            n_estimators=10, n_jobs=1,
                            objective='multi:softprob',
                            random_state=0, reg_alpha=0.001, reg_lambda=0.01,
                            scale_pos_weight=0.5, verbosity=0)
    rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='neg_log_loss')
    rfecv.fit(X, y)
开发者ID:dmlc,项目名称:xgboost,代码行数:35,代码来源:test_with_sklearn.py


示例5: main

def main():
    dataset = datasets.load_breast_cancer()
    features = dataset.data
    labels = dataset.target
    
    train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.3,
        stratify=labels)
    
    parameter_set = {'loss': ('hinge', 'squared_hinge'), 'C': [1, 10, 100, 1000, 5, 50, 500, 5000]}

    model = LinearSVC()
    grid_scores, best_score, best_params, test_score = validate_model(model=model, parameter_set=parameter_set,
        train_data=[train_features, train_labels], test_data=[test_features, test_labels])

    print(grid_scores)
    print('SVM best score: {}'.format(best_score))
    print('SVM best params : {}'.format(best_params))
    print('SVM test score : {}'.format(test_score))

    parameter_set = {'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'batch_size': [16, 32, 64, 128],}

    model = MLPClassifier()

    grid_scores, best_score, best_params, test_score = validate_model(model=model, parameter_set=parameter_set,
        train_data=[train_features, train_labels], test_data=[test_features, test_labels])

    print(grid_scores)
    print('MLP best score: {}'.format(best_score))
    print('MLP best params : {}'.format(best_params))
    print('MLP test score : {}'.format(test_score))
开发者ID:TaihuLight,项目名称:wisconsin-breast-cancer,代码行数:32,代码来源:grid_search.py


示例6: test_early_stopping

 def test_early_stopping(self):
     X, y = load_breast_cancer(True)
     params = {
         'objective': 'binary',
         'metric': 'binary_logloss',
         'verbose': -1
     }
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     lgb_train = lgb.Dataset(X_train, y_train)
     lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
     valid_set_name = 'valid_set'
     # no early stopping
     gbm = lgb.train(params, lgb_train,
                     num_boost_round=10,
                     valid_sets=lgb_eval,
                     valid_names=valid_set_name,
                     verbose_eval=False,
                     early_stopping_rounds=5)
     self.assertEqual(gbm.best_iteration, 10)
     self.assertIn(valid_set_name, gbm.best_score)
     self.assertIn('binary_logloss', gbm.best_score[valid_set_name])
     # early stopping occurs
     gbm = lgb.train(params, lgb_train,
                     valid_sets=lgb_eval,
                     valid_names=valid_set_name,
                     verbose_eval=False,
                     early_stopping_rounds=5)
     self.assertLessEqual(gbm.best_iteration, 100)
     self.assertIn(valid_set_name, gbm.best_score)
     self.assertIn('binary_logloss', gbm.best_score[valid_set_name])
开发者ID:hubei2626662,项目名称:LightGBM,代码行数:30,代码来源:test_engine.py


示例7: load_breast_cancer_df

def load_breast_cancer_df(include_tgt=True, tgt_name="target", shuffle=False):
    """Loads the breast cancer dataset into a dataframe with the
    target set as the "target" feature or whatever name
    is specified in ``tgt_name``.

    Parameters
    ----------

    include_tgt : bool, optional (default=True)
        Whether to include the target

    tgt_name : str, optional (default="target")
        The name of the target feature

    shuffle : bool, optional (default=False)
        Whether to shuffle the rows


    Returns
    -------

    X : pd.DataFrame, shape=(n_samples, n_features)
        The loaded dataset
    """
    bc = load_breast_cancer()
    X = pd.DataFrame.from_records(data=bc.data, columns=bc.feature_names)

    if include_tgt:
        X[tgt_name] = bc.target

    return X if not shuffle else shuffle_dataframe(X)
开发者ID:tgsmith61591,项目名称:skutil,代码行数:31,代码来源:util.py


示例8: setUp

 def setUp(self):
     self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(*load_breast_cancer(True), test_size=0.1, random_state=1)
     self.train_data = lgb.Dataset(self.X_train, self.y_train)
     self.params = {
         "objective": "binary",
         "verbose": -1,
         "num_leaves": 3
     }
开发者ID:Se7enZHOU,项目名称:LightGBM,代码行数:8,代码来源:test_plotting.py


示例9: test_binary

 def test_binary(self):
     X, y = load_breast_cancer(True)
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
     gbm = lgb.LGBMClassifier(n_estimators=50, silent=True)
     gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
     ret = log_loss(y_test, gbm.predict_proba(X_test))
     self.assertLess(ret, 0.15)
     self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1], places=5)
开发者ID:Se7enZHOU,项目名称:LightGBM,代码行数:8,代码来源:test_sklearn.py


示例10: main

def main(arguments):
    # load the features of the dataset
    features = datasets.load_breast_cancer().data

    # standardize the features
    features = StandardScaler().fit_transform(features)

    # get the number of features
    num_features = features.shape[1]

    # load the corresponding labels for the features
    labels = datasets.load_breast_cancer().target

    # transform the labels to {-1, +1}
    labels[labels == 0] = -1

    # split the dataset to 70/30 partition: 70% train, 30% test
    train_features, test_features, train_labels, test_labels = train_test_split(features, labels,
                                                                                test_size=0.3, stratify=labels)

    train_size = train_features.shape[0]
    test_size = test_features.shape[0]

    # slice the dataset as per the batch size
    train_features = train_features[:train_size - (train_size % BATCH_SIZE)]
    train_labels = train_labels[:train_size - (train_size % BATCH_SIZE)]
    test_features = test_features[:test_size - (test_size % BATCH_SIZE)]
    test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)]

    # instantiate the SVM class
    model = SVM(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, svm_c=arguments.svm_c, num_classes=NUM_CLASSES,
                num_features=num_features)

    # train the instantiated model
    model.train(epochs=arguments.num_epochs, log_path=arguments.log_path, train_data=[train_features, train_labels],
                train_size=train_features.shape[0], validation_data=[test_features, test_labels],
                validation_size=test_features.shape[0], result_path=arguments.result_path)

    test_conf, test_accuracy = utils.plot_confusion_matrix(phase='testing', path=arguments.result_path,
                                                           class_names=['benign', 'malignant'])

    print('True negatives : {}'.format(test_conf[0][0]))
    print('False negatives : {}'.format(test_conf[1][0]))
    print('True positives : {}'.format(test_conf[1][1]))
    print('False positives : {}'.format(test_conf[0][1]))
    print('Testing accuracy : {}'.format(test_accuracy))
开发者ID:TaihuLight,项目名称:wisconsin-breast-cancer,代码行数:46,代码来源:main_svm.py


示例11: load_binary_data

 def load_binary_data(self, shuffled=True):
     samples = load_breast_cancer()
     if shuffled:
         self.X = shuffle(samples.data, random_state=self.SEED)
         self.y = shuffle(samples.target, random_state=self.SEED)
     else:
         self.X, self.y = samples.data, samples.target
     self.n_features = len(self.X[0])
开发者ID:nok,项目名称:scikit-learn-model-porting,代码行数:8,代码来源:Classifier.py


示例12: test_binary

 def test_binary(self):
     X_y = load_breast_cancer(True)
     params = {
         'objective': 'binary',
         'metric': 'binary_logloss'
     }
     evals_result, ret = template.test_template(params, X_y, log_loss)
     self.assertLess(ret, 0.15)
     self.assertAlmostEqual(min(evals_result['eval']['binary_logloss']), ret, places=5)
开发者ID:kqdmqx,项目名称:LightGBM,代码行数:9,代码来源:test_engine.py


示例13: test_issues_161_and_189

 def test_issues_161_and_189(self):
     """
     ensure DataManager(data).data == data
     """
     X, y = load_breast_cancer(True)
     X, y = X[15:40], y[15:40]
     model = KNeighborsClassifier(weights='distance', p=2, n_neighbors=10).fit(X, y)
     skater_model = InMemoryModel(model.predict_proba, examples=X, probability=True)
     assert skater_model.probability is True
     assert skater_model.model_type == StaticTypes.model_types.classifier
开发者ID:ashishyadavppe,项目名称:Skater,代码行数:10,代码来源:test_model.py


示例14: train_breast_cancer

def train_breast_cancer(param_in):
    data = datasets.load_breast_cancer()
    X = scale(data.data)
    dtrain = xgb.DMatrix(X, label=data.target)
    param = {'objective': 'binary:logistic'}
    param.update(param_in)
    bst = xgb.train(param, dtrain, num_rounds)
    xgb_pred = bst.predict(dtrain)
    xgb_score = metrics.accuracy_score(data.target, np.round(xgb_pred))
    assert xgb_score >= 0.8
开发者ID:amitkr492,项目名称:MACHINE_LEARNING,代码行数:10,代码来源:test_linear.py


示例15: test_load_breast_cancer

def test_load_breast_cancer():
    res = load_breast_cancer()
    assert_equal(res.data.shape, (569, 30))
    assert_equal(res.target.size, 569)
    assert_equal(res.target_names.size, 2)
    assert_true(res.DESCR)
    assert_true(os.path.exists(res.filename))

    # test return_X_y option
    check_return_X_y(res, partial(load_breast_cancer))
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:10,代码来源:test_base.py


示例16: test_chunked_dataset

    def test_chunked_dataset(self):
        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(True), test_size=0.1, random_state=2)

        chunk_size = X_train.shape[0] // 10 + 1
        X_train = [X_train[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_train.shape[0] // chunk_size + 1)]
        X_test = [X_test[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_test.shape[0] // chunk_size + 1)]

        train_data = lgb.Dataset(X_train, label=y_train, params={"bin_construct_sample_cnt": 100})
        valid_data = train_data.create_valid(X_test, label=y_test, params={"bin_construct_sample_cnt": 100})

        train_data.construct()
        valid_data.construct()
开发者ID:srngit,项目名称:LightGBM,代码行数:12,代码来源:test_basic.py


示例17: main

def main(arguments):
    # load the features of the dataset
    features = datasets.load_breast_cancer().data

    # standardize the features
    features = StandardScaler().fit_transform(features)

    # get the number of features
    num_features = features.shape[1]

    # load the labels for the features
    labels = datasets.load_breast_cancer().target

    train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.30,
                                                                                stratify=labels)

    model = MLP(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, node_size=NUM_NODES, num_classes=NUM_CLASSES,
                num_features=num_features)

    model.train(num_epochs=arguments.num_epochs, log_path=arguments.log_path, train_data=[train_features, train_labels],
                train_size=train_features.shape[0], test_data=[test_features, test_labels],
                test_size=test_features.shape[0], result_path=arguments.result_path)
开发者ID:TaihuLight,项目名称:wisconsin-breast-cancer,代码行数:22,代码来源:main_mlp.py


示例18: load_cancer_data

def load_cancer_data():
    # clinical measurements of breast cancer tumors
    # for the classification
    from sklearn.datasets import load_breast_cancer
    cancer = load_breast_cancer() # cancer is like dict

    #import pdb; pdb.set_trace()
    # Format contain "replacement fields" surrounded by {}
    print("cancer.keys(): \n{}".format(cancer.keys()))
    print("shape of cancer data: {}".format(cancer.data.shape))
    print("sample counts per class:\n{}".format(
        {n:v for n, v in zip(cancer.target_names, np.bincount(cancer.target))}
    )) # bincount counts number of occurrences of each value in array of ints
    print("Feature names:\n{}".format(cancer.feature_names))
开发者ID:muyun,项目名称:dev.machinelearning,代码行数:14,代码来源:load_data.py


示例19: train_cancer

def train_cancer(param_in, comparison_tree_method):
    data = load_breast_cancer()
    dtrain = xgb.DMatrix(data.data, label=data.target)
    param = {}
    param['objective'] = 'binary:logistic'
    param.update(param_in)
    res_tmp = {}
    res = {}
    num_rounds = 10
    xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
    res[param['tree_method']] = res_tmp['train']['error']
    param["tree_method"] = comparison_tree_method
    xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
    res[comparison_tree_method] = res_tmp['train']['error']
    return res
开发者ID:wyj2046,项目名称:xgboost,代码行数:15,代码来源:test_gpu_updaters.py


示例20: load_datasets

def load_datasets():    
    iris = load_iris()
    iris_X, iris_y = iris['data'], iris['target']
    digits = load_digits()
    digits_X, digits_y = digits['data'], digits['target']
    breast_cancer = load_breast_cancer()
    breast_cancer_X, breast_cancer_y = breast_cancer['data'], breast_cancer['target']
    diabetes = load_diabetes()
    diabetes_X, diabetes_y = diabetes['data'], diabetes['target']
    mnist = fetch_mldata('MNIST original', data_home='datasets/')
    mnist_X, mnist_y = mnist['data'], mnist['target']
    datasets = {'iris':("Iris Plants Dataset",iris_X, iris_y),'digits':("UCI ML hand-written digits dataset",digits_X, digits_y),'breast_cancer':("Breast Cancer Wisconsin (Diagnostic) Dataset",breast_cancer_X, breast_cancer_y),'mnist':("The MNIST database of handwritten digits",mnist_X, mnist_y)}
    #,'diabetes':("Diabetes dataset",diabetes_X, diabetes_y)})}
    #'breast_cancer':("Breast Cancer Wisconsin (Diagnostic) Dataset",breast_cancer_X, breast_cancer_y),
    return datasets
开发者ID:Fixiki,项目名称:hotfixies,代码行数:15,代码来源:score_script.py



注:本文中的sklearn.datasets.load_breast_cancer函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python datasets.load_diabetes函数代码示例发布时间:2022-05-27
下一篇:
Python datasets.load_boston函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap