本文整理汇总了Python中sklearn.datasets.load_breast_cancer函数的典型用法代码示例。如果您正苦于以下问题:Python load_breast_cancer函数的具体用法?Python load_breast_cancer怎么用?Python load_breast_cancer使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了load_breast_cancer函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_load_breast_cancer
def test_load_breast_cancer():
res = load_breast_cancer()
assert_equal(res.data.shape, (569, 30))
assert_equal(res.target.size, 569)
assert_equal(res.target_names.size, 2)
assert_true(res.DESCR)
# test return_X_y option
X_y_tuple = load_breast_cancer(return_X_y=True)
bunch = load_breast_cancer()
assert_true(isinstance(X_y_tuple, tuple))
assert_array_equal(X_y_tuple[0], bunch.data)
assert_array_equal(X_y_tuple[1], bunch.target)
开发者ID:NazBen,项目名称:scikit-learn,代码行数:13,代码来源:test_base.py
示例2: Breast_cancer
def Breast_cancer(training_size, test_size, n, PLOT_DATA):
class_labels = [r'A', r'B']
data, target = datasets.load_breast_cancer(True)
sample_train, sample_test, label_train, label_test = train_test_split(data, target, test_size=0.3, random_state=12)
# Now we standarize for gaussian around 0 with unit variance
std_scale = StandardScaler().fit(sample_train)
sample_train = std_scale.transform(sample_train)
sample_test = std_scale.transform(sample_test)
# Now reduce number of features to number of qubits
pca = PCA(n_components=n).fit(sample_train)
sample_train = pca.transform(sample_train)
sample_test = pca.transform(sample_test)
# Scale to the range (-1,+1)
samples = np.append(sample_train, sample_test, axis=0)
minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
sample_train = minmax_scale.transform(sample_train)
sample_test = minmax_scale.transform(sample_test)
# Pick training size number of samples from each distro
training_input = {key: (sample_train[label_train == k, :])[:training_size] for k, key in enumerate(class_labels)}
test_input = {key: (sample_train[label_train == k, :])[training_size:(
training_size+test_size)] for k, key in enumerate(class_labels)}
if PLOT_DATA:
for k in range(0, 2):
plt.scatter(sample_train[label_train == k, 0][:training_size],
sample_train[label_train == k, 1][:training_size])
plt.title("PCA dim. reduced Breast cancer dataset")
plt.show()
return sample_train, training_input, test_input, class_labels
开发者ID:GiuseppeOrlando878776,项目名称:qiskit-tutorials,代码行数:35,代码来源:svm_datasets.py
示例3: test_dt
def test_dt():
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target
feature_names = cancer.feature_names
sk_dt = SKDT(random_state=1, max_depth=3)
our_dt = ClassificationTree(feature_names=feature_names, random_state=1)
sk_dt.fit(X, y)
our_dt.fit(X, y)
sk_pred = sk_dt.predict_proba(X)
our_pred = our_dt.predict_proba(X)
assert np.allclose(sk_pred, our_pred)
sk_pred = sk_dt.predict(X)
our_pred = our_dt.predict(X)
assert np.allclose(sk_pred, our_pred)
# With labels
local_expl = our_dt.explain_local(X, y)
local_viz = local_expl.visualize(0)
assert local_viz is not None
# Without labels
local_expl = our_dt.explain_local(X)
local_viz = local_expl.visualize(0)
assert local_viz is not None
global_expl = our_dt.explain_global()
global_viz = global_expl.visualize()
assert global_viz is not None
开发者ID:caskeep,项目名称:interpret,代码行数:32,代码来源:test_decisiontree.py
示例4: test_RFECV
def test_RFECV():
from sklearn.datasets import load_boston
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import load_iris
from sklearn.feature_selection import RFECV
# Regression
X, y = load_boston(return_X_y=True)
bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1,
n_estimators=10, n_jobs=1,
objective='reg:squarederror',
random_state=0, verbosity=0)
rfecv = RFECV(
estimator=bst, step=1, cv=3, scoring='neg_mean_squared_error')
rfecv.fit(X, y)
# Binary classification
X, y = load_breast_cancer(return_X_y=True)
bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1,
n_estimators=10, n_jobs=1,
objective='binary:logistic',
random_state=0, verbosity=0)
rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='roc_auc')
rfecv.fit(X, y)
# Multi-class classification
X, y = load_iris(return_X_y=True)
bst = xgb.XGBClassifier(base_score=0.4, booster='gblinear',
learning_rate=0.1,
n_estimators=10, n_jobs=1,
objective='multi:softprob',
random_state=0, reg_alpha=0.001, reg_lambda=0.01,
scale_pos_weight=0.5, verbosity=0)
rfecv = RFECV(estimator=bst, step=1, cv=3, scoring='neg_log_loss')
rfecv.fit(X, y)
开发者ID:dmlc,项目名称:xgboost,代码行数:35,代码来源:test_with_sklearn.py
示例5: main
def main():
dataset = datasets.load_breast_cancer()
features = dataset.data
labels = dataset.target
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.3,
stratify=labels)
parameter_set = {'loss': ('hinge', 'squared_hinge'), 'C': [1, 10, 100, 1000, 5, 50, 500, 5000]}
model = LinearSVC()
grid_scores, best_score, best_params, test_score = validate_model(model=model, parameter_set=parameter_set,
train_data=[train_features, train_labels], test_data=[test_features, test_labels])
print(grid_scores)
print('SVM best score: {}'.format(best_score))
print('SVM best params : {}'.format(best_params))
print('SVM test score : {}'.format(test_score))
parameter_set = {'activation': ['identity', 'logistic', 'tanh', 'relu'],
'solver': ['sgd', 'adam'],
'batch_size': [16, 32, 64, 128],}
model = MLPClassifier()
grid_scores, best_score, best_params, test_score = validate_model(model=model, parameter_set=parameter_set,
train_data=[train_features, train_labels], test_data=[test_features, test_labels])
print(grid_scores)
print('MLP best score: {}'.format(best_score))
print('MLP best params : {}'.format(best_params))
print('MLP test score : {}'.format(test_score))
开发者ID:TaihuLight,项目名称:wisconsin-breast-cancer,代码行数:32,代码来源:grid_search.py
示例6: test_early_stopping
def test_early_stopping(self):
X, y = load_breast_cancer(True)
params = {
'objective': 'binary',
'metric': 'binary_logloss',
'verbose': -1
}
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
lgb_train = lgb.Dataset(X_train, y_train)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
valid_set_name = 'valid_set'
# no early stopping
gbm = lgb.train(params, lgb_train,
num_boost_round=10,
valid_sets=lgb_eval,
valid_names=valid_set_name,
verbose_eval=False,
early_stopping_rounds=5)
self.assertEqual(gbm.best_iteration, 10)
self.assertIn(valid_set_name, gbm.best_score)
self.assertIn('binary_logloss', gbm.best_score[valid_set_name])
# early stopping occurs
gbm = lgb.train(params, lgb_train,
valid_sets=lgb_eval,
valid_names=valid_set_name,
verbose_eval=False,
early_stopping_rounds=5)
self.assertLessEqual(gbm.best_iteration, 100)
self.assertIn(valid_set_name, gbm.best_score)
self.assertIn('binary_logloss', gbm.best_score[valid_set_name])
开发者ID:hubei2626662,项目名称:LightGBM,代码行数:30,代码来源:test_engine.py
示例7: load_breast_cancer_df
def load_breast_cancer_df(include_tgt=True, tgt_name="target", shuffle=False):
"""Loads the breast cancer dataset into a dataframe with the
target set as the "target" feature or whatever name
is specified in ``tgt_name``.
Parameters
----------
include_tgt : bool, optional (default=True)
Whether to include the target
tgt_name : str, optional (default="target")
The name of the target feature
shuffle : bool, optional (default=False)
Whether to shuffle the rows
Returns
-------
X : pd.DataFrame, shape=(n_samples, n_features)
The loaded dataset
"""
bc = load_breast_cancer()
X = pd.DataFrame.from_records(data=bc.data, columns=bc.feature_names)
if include_tgt:
X[tgt_name] = bc.target
return X if not shuffle else shuffle_dataframe(X)
开发者ID:tgsmith61591,项目名称:skutil,代码行数:31,代码来源:util.py
示例8: setUp
def setUp(self):
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(*load_breast_cancer(True), test_size=0.1, random_state=1)
self.train_data = lgb.Dataset(self.X_train, self.y_train)
self.params = {
"objective": "binary",
"verbose": -1,
"num_leaves": 3
}
开发者ID:Se7enZHOU,项目名称:LightGBM,代码行数:8,代码来源:test_plotting.py
示例9: test_binary
def test_binary(self):
X, y = load_breast_cancer(True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMClassifier(n_estimators=50, silent=True)
gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
ret = log_loss(y_test, gbm.predict_proba(X_test))
self.assertLess(ret, 0.15)
self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1], places=5)
开发者ID:Se7enZHOU,项目名称:LightGBM,代码行数:8,代码来源:test_sklearn.py
示例10: main
def main(arguments):
# load the features of the dataset
features = datasets.load_breast_cancer().data
# standardize the features
features = StandardScaler().fit_transform(features)
# get the number of features
num_features = features.shape[1]
# load the corresponding labels for the features
labels = datasets.load_breast_cancer().target
# transform the labels to {-1, +1}
labels[labels == 0] = -1
# split the dataset to 70/30 partition: 70% train, 30% test
train_features, test_features, train_labels, test_labels = train_test_split(features, labels,
test_size=0.3, stratify=labels)
train_size = train_features.shape[0]
test_size = test_features.shape[0]
# slice the dataset as per the batch size
train_features = train_features[:train_size - (train_size % BATCH_SIZE)]
train_labels = train_labels[:train_size - (train_size % BATCH_SIZE)]
test_features = test_features[:test_size - (test_size % BATCH_SIZE)]
test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)]
# instantiate the SVM class
model = SVM(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, svm_c=arguments.svm_c, num_classes=NUM_CLASSES,
num_features=num_features)
# train the instantiated model
model.train(epochs=arguments.num_epochs, log_path=arguments.log_path, train_data=[train_features, train_labels],
train_size=train_features.shape[0], validation_data=[test_features, test_labels],
validation_size=test_features.shape[0], result_path=arguments.result_path)
test_conf, test_accuracy = utils.plot_confusion_matrix(phase='testing', path=arguments.result_path,
class_names=['benign', 'malignant'])
print('True negatives : {}'.format(test_conf[0][0]))
print('False negatives : {}'.format(test_conf[1][0]))
print('True positives : {}'.format(test_conf[1][1]))
print('False positives : {}'.format(test_conf[0][1]))
print('Testing accuracy : {}'.format(test_accuracy))
开发者ID:TaihuLight,项目名称:wisconsin-breast-cancer,代码行数:46,代码来源:main_svm.py
示例11: load_binary_data
def load_binary_data(self, shuffled=True):
samples = load_breast_cancer()
if shuffled:
self.X = shuffle(samples.data, random_state=self.SEED)
self.y = shuffle(samples.target, random_state=self.SEED)
else:
self.X, self.y = samples.data, samples.target
self.n_features = len(self.X[0])
开发者ID:nok,项目名称:scikit-learn-model-porting,代码行数:8,代码来源:Classifier.py
示例12: test_binary
def test_binary(self):
X_y = load_breast_cancer(True)
params = {
'objective': 'binary',
'metric': 'binary_logloss'
}
evals_result, ret = template.test_template(params, X_y, log_loss)
self.assertLess(ret, 0.15)
self.assertAlmostEqual(min(evals_result['eval']['binary_logloss']), ret, places=5)
开发者ID:kqdmqx,项目名称:LightGBM,代码行数:9,代码来源:test_engine.py
示例13: test_issues_161_and_189
def test_issues_161_and_189(self):
"""
ensure DataManager(data).data == data
"""
X, y = load_breast_cancer(True)
X, y = X[15:40], y[15:40]
model = KNeighborsClassifier(weights='distance', p=2, n_neighbors=10).fit(X, y)
skater_model = InMemoryModel(model.predict_proba, examples=X, probability=True)
assert skater_model.probability is True
assert skater_model.model_type == StaticTypes.model_types.classifier
开发者ID:ashishyadavppe,项目名称:Skater,代码行数:10,代码来源:test_model.py
示例14: train_breast_cancer
def train_breast_cancer(param_in):
data = datasets.load_breast_cancer()
X = scale(data.data)
dtrain = xgb.DMatrix(X, label=data.target)
param = {'objective': 'binary:logistic'}
param.update(param_in)
bst = xgb.train(param, dtrain, num_rounds)
xgb_pred = bst.predict(dtrain)
xgb_score = metrics.accuracy_score(data.target, np.round(xgb_pred))
assert xgb_score >= 0.8
开发者ID:amitkr492,项目名称:MACHINE_LEARNING,代码行数:10,代码来源:test_linear.py
示例15: test_load_breast_cancer
def test_load_breast_cancer():
res = load_breast_cancer()
assert_equal(res.data.shape, (569, 30))
assert_equal(res.target.size, 569)
assert_equal(res.target_names.size, 2)
assert_true(res.DESCR)
assert_true(os.path.exists(res.filename))
# test return_X_y option
check_return_X_y(res, partial(load_breast_cancer))
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:10,代码来源:test_base.py
示例16: test_chunked_dataset
def test_chunked_dataset(self):
X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(True), test_size=0.1, random_state=2)
chunk_size = X_train.shape[0] // 10 + 1
X_train = [X_train[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_train.shape[0] // chunk_size + 1)]
X_test = [X_test[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_test.shape[0] // chunk_size + 1)]
train_data = lgb.Dataset(X_train, label=y_train, params={"bin_construct_sample_cnt": 100})
valid_data = train_data.create_valid(X_test, label=y_test, params={"bin_construct_sample_cnt": 100})
train_data.construct()
valid_data.construct()
开发者ID:srngit,项目名称:LightGBM,代码行数:12,代码来源:test_basic.py
示例17: main
def main(arguments):
# load the features of the dataset
features = datasets.load_breast_cancer().data
# standardize the features
features = StandardScaler().fit_transform(features)
# get the number of features
num_features = features.shape[1]
# load the labels for the features
labels = datasets.load_breast_cancer().target
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.30,
stratify=labels)
model = MLP(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, node_size=NUM_NODES, num_classes=NUM_CLASSES,
num_features=num_features)
model.train(num_epochs=arguments.num_epochs, log_path=arguments.log_path, train_data=[train_features, train_labels],
train_size=train_features.shape[0], test_data=[test_features, test_labels],
test_size=test_features.shape[0], result_path=arguments.result_path)
开发者ID:TaihuLight,项目名称:wisconsin-breast-cancer,代码行数:22,代码来源:main_mlp.py
示例18: load_cancer_data
def load_cancer_data():
# clinical measurements of breast cancer tumors
# for the classification
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer() # cancer is like dict
#import pdb; pdb.set_trace()
# Format contain "replacement fields" surrounded by {}
print("cancer.keys(): \n{}".format(cancer.keys()))
print("shape of cancer data: {}".format(cancer.data.shape))
print("sample counts per class:\n{}".format(
{n:v for n, v in zip(cancer.target_names, np.bincount(cancer.target))}
)) # bincount counts number of occurrences of each value in array of ints
print("Feature names:\n{}".format(cancer.feature_names))
开发者ID:muyun,项目名称:dev.machinelearning,代码行数:14,代码来源:load_data.py
示例19: train_cancer
def train_cancer(param_in, comparison_tree_method):
data = load_breast_cancer()
dtrain = xgb.DMatrix(data.data, label=data.target)
param = {}
param['objective'] = 'binary:logistic'
param.update(param_in)
res_tmp = {}
res = {}
num_rounds = 10
xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
res[param['tree_method']] = res_tmp['train']['error']
param["tree_method"] = comparison_tree_method
xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')], evals_result=res_tmp)
res[comparison_tree_method] = res_tmp['train']['error']
return res
开发者ID:wyj2046,项目名称:xgboost,代码行数:15,代码来源:test_gpu_updaters.py
示例20: load_datasets
def load_datasets():
iris = load_iris()
iris_X, iris_y = iris['data'], iris['target']
digits = load_digits()
digits_X, digits_y = digits['data'], digits['target']
breast_cancer = load_breast_cancer()
breast_cancer_X, breast_cancer_y = breast_cancer['data'], breast_cancer['target']
diabetes = load_diabetes()
diabetes_X, diabetes_y = diabetes['data'], diabetes['target']
mnist = fetch_mldata('MNIST original', data_home='datasets/')
mnist_X, mnist_y = mnist['data'], mnist['target']
datasets = {'iris':("Iris Plants Dataset",iris_X, iris_y),'digits':("UCI ML hand-written digits dataset",digits_X, digits_y),'breast_cancer':("Breast Cancer Wisconsin (Diagnostic) Dataset",breast_cancer_X, breast_cancer_y),'mnist':("The MNIST database of handwritten digits",mnist_X, mnist_y)}
#,'diabetes':("Diabetes dataset",diabetes_X, diabetes_y)})}
#'breast_cancer':("Breast Cancer Wisconsin (Diagnostic) Dataset",breast_cancer_X, breast_cancer_y),
return datasets
开发者ID:Fixiki,项目名称:hotfixies,代码行数:15,代码来源:score_script.py
注:本文中的sklearn.datasets.load_breast_cancer函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论