本文整理汇总了Python中sklearn.datasets.make_blobs函数的典型用法代码示例。如果您正苦于以下问题:Python make_blobs函数的具体用法?Python make_blobs怎么用?Python make_blobs使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了make_blobs函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: check_transformer_pickle
def check_transformer_pickle(name, Transformer):
X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
random_state=0, n_features=2, cluster_std=0.1)
n_samples, n_features = X.shape
X = StandardScaler().fit_transform(X)
X -= X.min()
# catch deprecation warnings
with warnings.catch_warnings(record=True):
transformer = Transformer()
if not hasattr(transformer, 'transform'):
return
set_random_state(transformer)
set_fast_parameters(transformer)
# fit
if name in CROSS_DECOMPOSITION:
random_state = np.random.RandomState(seed=12345)
y_ = np.vstack([y, 2 * y + random_state.randint(2, size=len(y))])
y_ = y_.T
else:
y_ = y
transformer.fit(X, y_)
X_pred = transformer.fit(X, y_).transform(X)
pickled_transformer = pickle.dumps(transformer)
unpickled_transformer = pickle.loads(pickled_transformer)
pickled_X_pred = unpickled_transformer.transform(X)
assert_array_almost_equal(pickled_X_pred, X_pred)
开发者ID:AlexMarshall011,项目名称:scikit-learn,代码行数:29,代码来源:estimator_checks.py
示例2: check_classifiers_classes
def check_classifiers_classes(name, Classifier):
X, y = make_blobs(n_samples=30, random_state=0, cluster_std=0.1)
X, y = shuffle(X, y, random_state=7)
X = StandardScaler().fit_transform(X)
# We need to make sure that we have non negative data, for things
# like NMF
X -= X.min() - .1
y_names = np.array(["one", "two", "three"])[y]
for y_names in [y_names, y_names.astype('O')]:
if name in ["LabelPropagation", "LabelSpreading"]:
# TODO some complication with -1 label
y_ = y
else:
y_ = y_names
classes = np.unique(y_)
# catch deprecation warnings
with warnings.catch_warnings(record=True):
classifier = Classifier()
if name == 'BernoulliNB':
classifier.set_params(binarize=X.mean())
set_fast_parameters(classifier)
# fit
classifier.fit(X, y_)
y_pred = classifier.predict(X)
# training set performance
assert_array_equal(np.unique(y_), np.unique(y_pred))
if np.any(classifier.classes_ != classes):
print("Unexpected classes_ attribute for %r: "
"expected %s, got %s" %
(classifier, classes, classifier.classes_))
开发者ID:AlexMarshall011,项目名称:scikit-learn,代码行数:33,代码来源:estimator_checks.py
示例3: test_thresholded_scorers
def test_thresholded_scorers():
"""Test scorers that take thresholds."""
X, y = make_blobs(random_state=0, centers=2)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
clf = LogisticRegression(random_state=0)
clf.fit(X_train, y_train)
score1 = SCORERS['roc_auc'](clf, X_test, y_test)
score2 = roc_auc_score(y_test, clf.decision_function(X_test))
score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
assert_almost_equal(score1, score2)
assert_almost_equal(score1, score3)
logscore = SCORERS['log_loss'](clf, X_test, y_test)
logloss = log_loss(y_test, clf.predict_proba(X_test))
assert_almost_equal(-logscore, logloss)
# same for an estimator without decision_function
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
score1 = SCORERS['roc_auc'](clf, X_test, y_test)
score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
assert_almost_equal(score1, score2)
# Test that an exception is raised on more than two classes
X, y = make_blobs(random_state=0, centers=3)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
clf.fit(X_train, y_train)
assert_raises(ValueError, SCORERS['roc_auc'], clf, X_test, y_test)
开发者ID:2011200799,项目名称:scikit-learn,代码行数:28,代码来源:test_score_objects.py
示例4: test_fit_uuu
def test_fit_uuu():
n_samples1 = 10000
n_features = 5
centers1 = np.array([[10, 5, 1, -5, -10],
[-10, -5, -1, 5, 10]])
cluster_std1 = np.array([[1.0, 2.0, 3.0, 4.0, 5.0],
[5.0, 4.0, 3.0, 2.0, 1.0]])
X1, y1 = make_blobs(n_features=n_features,
n_samples=n_samples1,
centers=centers1,
cluster_std=cluster_std1)
n_samples2 = 5000
centers2 = np.array([[10, 5, 1, -5, -10]])
cluster_std2 = np.array([[1.0, 2.0, 3.0, 4.0, 5.0]])
X2, y2 = make_blobs(n_features=n_features,
n_samples=n_samples2,
centers=centers2,
cluster_std=cluster_std2)
X = np.vstack((X1, X2))
model = mixture.PGMM(covariance_type='UUU', n_components=2, n_pc=3)
model.fit(X)
assert_array_almost_equal(np.sum(model.means_, 0), np.sum(centers1, 0), decimal=0)
assert_array_almost_equal(np.sort(model.weights_), np.array([0.333, 0.666]), decimal=1)
assert_equal(model.means_.shape, np.array([2, n_features]))
assert_equal(model.weights_.shape, np.array([2]))
assert_equal(model.noise_.shape, np.array([2, n_features]))
assert_equal(model.principal_subspace_.shape, np.array([2, n_features, 3]))
assert_equal(model.covars_.shape, np.array([2, n_features, n_features]))
logging.info('TestFitUUU: OK')
开发者ID:akionakamura,项目名称:scikit-learn,代码行数:32,代码来源:test_pgmm.py
示例5: test_check_is_fitted
def test_check_is_fitted():
# Check is ValueError raised when non estimator instance passed
assert_raises(ValueError, check_is_fitted, ARDRegression, "coef_")
assert_raises(TypeError, check_is_fitted, "SVR", "support_")
ard = ARDRegression()
svr = SVR()
try:
assert_raises(NotFittedError, check_is_fitted, ard, "coef_")
assert_raises(NotFittedError, check_is_fitted, svr, "support_")
except ValueError:
assert False, "check_is_fitted failed with ValueError"
# NotFittedError is a subclass of both ValueError and AttributeError
try:
check_is_fitted(ard, "coef_", "Random message %(name)s, %(name)s")
except ValueError as e:
assert_equal(str(e), "Random message ARDRegression, ARDRegression")
try:
check_is_fitted(svr, "support_", "Another message %(name)s, %(name)s")
except AttributeError as e:
assert_equal(str(e), "Another message SVR, SVR")
ard.fit(*make_blobs())
svr.fit(*make_blobs())
assert_equal(None, check_is_fitted(ard, "coef_"))
assert_equal(None, check_is_fitted(svr, "support_"))
开发者ID:Afey,项目名称:scikit-learn,代码行数:30,代码来源:test_validation.py
示例6: show_dbscan
def show_dbscan():
"""
simulate 1 month of normal hourly room percentage data followed by an anomalous percentage
the normal data is bimodal following most peoples activity patterns in which there is routinely a weekday
percentage and a weekend percentage. 1 day in which the person spends a large amount of time in the bathroom
is simulated
"""
# simulate normal hourly data
weekday = ([0.05, 0.95], 0.05) #bath, bed
weekend = ([0.3, 0.7], 0.1)
roomperwd, truelabelswd = make_blobs(n_samples=23, centers=weekday[0],
cluster_std=weekday[1], random_state=0)
roomperwe, truelabelswe = make_blobs(n_samples=8, centers=weekend[0],
cluster_std=weekend[1], random_state=0)
# combine modes
roompers = np.vstack((roomperwd, roomperwe))
# make positive and sum to one to simulate valid distribution
for i in range(roompers.shape[0]):
for j in range(roompers.shape[1]):
if roompers[i, j] < 0:
roompers[i, j] = 0
roompersnorm = normalize(roompers, norm='l1')
# simulate anomaly on most recent day where don't leave bedroom
roompersnorm[-1, :] = np.array([0.8, 0.2])
# detect outliers
roompersdetector = HourlyRoomPercentageAnomalyDetection(roompersnorm, eps=0.3, min_samples=3)
labels = roompersdetector.scale_and_proximity_cluster(eps=0.3, min_samples=3)
# plot results
plt.figure()
seenflag1 = False; seenflag2 = False; seenflag3 = False;
for i, label in enumerate(labels):
if label == 0:
if seenflag1:
plt.plot(roompersnorm[i][0], roompersnorm[i][1], 'ro')
else:
plt.plot(roompersnorm[i][0], roompersnorm[i][1], 'ro', label='Cluster 1')
seenflag1 = True
elif label == 1:
if seenflag2:
plt.plot(roompersnorm[i][0], roompersnorm[i][1], 'kx')
else:
plt.plot(roompersnorm[i][0], roompersnorm[i][1], 'kx', label='Cluster 2')
seenflag2 = True
elif label == -1:
if seenflag3:
plt.plot(roompersnorm[i][0], roompersnorm[i][1], 'b^')
else:
plt.plot(roompersnorm[i][0], roompersnorm[i][1], 'b^', label='Outlier')
seenflag3 = True
plt.legend(loc='lower left')
plt.axis([0, 1, 0, 1])
plt.show()
开发者ID:PhoenixYanrongLi,项目名称:CareEcoSystem_ServerCode,代码行数:58,代码来源:Simulations.py
示例7: test_calibration_multiclass
def test_calibration_multiclass():
"""Test calibration for multiclass """
# test multi-class setting with classifier that implements
# only decision function
clf = LinearSVC()
X, y_idx = make_blobs(n_samples=100, n_features=2, random_state=42,
centers=3, cluster_std=3.0)
# Use categorical labels to check that CalibratedClassifierCV supports
# them correctly
target_names = np.array(['a', 'b', 'c'])
y = target_names[y_idx]
X_train, y_train = X[::2], y[::2]
X_test, y_test = X[1::2], y[1::2]
clf.fit(X_train, y_train)
for method in ['isotonic', 'sigmoid']:
cal_clf = CalibratedClassifierCV(clf, method=method, cv=2)
cal_clf.fit(X_train, y_train)
probas = cal_clf.predict_proba(X_test)
assert_array_almost_equal(np.sum(probas, axis=1), np.ones(len(X_test)))
# Check that log-loss of calibrated classifier is smaller than
# log-loss of naively turned OvR decision function to probabilities
# via softmax
def softmax(y_pred):
e = np.exp(-y_pred)
return e / e.sum(axis=1).reshape(-1, 1)
uncalibrated_log_loss = \
log_loss(y_test, softmax(clf.decision_function(X_test)))
calibrated_log_loss = log_loss(y_test, probas)
assert_greater_equal(uncalibrated_log_loss, calibrated_log_loss)
# Test that calibration of a multiclass classifier decreases log-loss
# for RandomForestClassifier
X, y = make_blobs(n_samples=100, n_features=2, random_state=42,
cluster_std=3.0)
X_train, y_train = X[::2], y[::2]
X_test, y_test = X[1::2], y[1::2]
clf = RandomForestClassifier(n_estimators=10, random_state=42)
clf.fit(X_train, y_train)
clf_probs = clf.predict_proba(X_test)
loss = log_loss(y_test, clf_probs)
for method in ['isotonic', 'sigmoid']:
cal_clf = CalibratedClassifierCV(clf, method=method, cv=3)
cal_clf.fit(X_train, y_train)
cal_clf_probs = cal_clf.predict_proba(X_test)
cal_loss = log_loss(y_test, cal_clf_probs)
assert_greater(loss, cal_loss)
开发者ID:abecadel,项目名称:scikit-learn,代码行数:53,代码来源:test_calibration.py
示例8: test_thresholded_scorers
def test_thresholded_scorers():
# Test scorers that take thresholds.
X, y = make_blobs(random_state=0, centers=2)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
clf = LogisticRegression(random_state=0)
clf.fit(X_train, y_train)
score1 = get_scorer('roc_auc')(clf, X_test, y_test)
score2 = roc_auc_score(y_test, clf.decision_function(X_test))
score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
assert_almost_equal(score1, score2)
assert_almost_equal(score1, score3)
logscore = get_scorer('neg_log_loss')(clf, X_test, y_test)
logloss = log_loss(y_test, clf.predict_proba(X_test))
assert_almost_equal(-logscore, logloss)
# same for an estimator without decision_function
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
score1 = get_scorer('roc_auc')(clf, X_test, y_test)
score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
assert_almost_equal(score1, score2)
# test with a regressor (no decision_function)
reg = DecisionTreeRegressor()
reg.fit(X_train, y_train)
score1 = get_scorer('roc_auc')(reg, X_test, y_test)
score2 = roc_auc_score(y_test, reg.predict(X_test))
assert_almost_equal(score1, score2)
# Test that an exception is raised on more than two classes
X, y = make_blobs(random_state=0, centers=3)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
clf.fit(X_train, y_train)
with pytest.raises(ValueError, match="multiclass format is not supported"):
get_scorer('roc_auc')(clf, X_test, y_test)
# test error is raised with a single class present in model
# (predict_proba shape is not suitable for binary auc)
X, y = make_blobs(random_state=0, centers=2)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
clf = DecisionTreeClassifier()
clf.fit(X_train, np.zeros_like(y_train))
with pytest.raises(ValueError, match="need classifier with two classes"):
get_scorer('roc_auc')(clf, X_test, y_test)
# for proba scorers
with pytest.raises(ValueError, match="need classifier with two classes"):
get_scorer('neg_log_loss')(clf, X_test, y_test)
开发者ID:srinivasreddy,项目名称:scikit-learn,代码行数:49,代码来源:test_score_objects.py
示例9: single_calc
def single_calc(n_sample):
#n_sample = 1000
n_feature = 2
cluster_std = 0.5
center = 2
#for n_sample in [10, 50, 100, 500, 1000, 5000, 10000]:
pts, labels = datasets.make_blobs(n_samples=n_sample, n_features=n_feature, cluster_std=cluster_std, centers=center)
start = timer()
tri = Tri(pts)
end = timer()
tri_time = end - start
print(tri_time)
#tri_res = compare_labels(labels, tri.labels)
start = timer()
auto = Autoclust(pts)
end = timer()
auto_time = end - start
print(auto_time)
#auto_res = compare_labels(labels, auto.labels)
res_dict = {'tri': tri_time, 'auto': auto_time, 'samples': n_sample}
with open('times', 'a') as f:
print(res_dict, file=f)
开发者ID:hotator,项目名称:python-clustering,代码行数:26,代码来源:main.py
示例10: test_transformers_data_not_an_array
def test_transformers_data_not_an_array():
# test if transformers do something sensible on training set
# also test all shapes / shape errors
transformers = all_estimators(type_filter='transformer')
X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
random_state=0, n_features=2, cluster_std=0.1)
X = StandardScaler().fit_transform(X)
# We need to make sure that we have non negative data, for things
# like NMF
X -= X.min() - .1
for name, Transformer in transformers:
# XXX: some transformers are transforming the input
# data. This is a bug that we'll fix later. Right now we copy
# the data each time
this_X = NotAnArray(X.copy())
this_y = NotAnArray(np.asarray(y))
if name in dont_test:
continue
# these don't actually fit the data:
if name in ['AdditiveChi2Sampler', 'Binarizer', 'Normalizer']:
continue
# And these wan't multivariate output
if name in ('PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD'):
continue
yield check_transformer, name, Transformer, this_X, this_y
开发者ID:akashaio,项目名称:scikit-learn,代码行数:26,代码来源:test_common.py
示例11: separable_demo
def separable_demo():
""" Generate a linearly-separable dataset D, train a linear SVM on
D, then output the resulting decision boundary on a figure.
"""
from sklearn.datasets import make_blobs
X, y = make_blobs(n_samples=200, n_features=2,
centers=((0,0), (4, 4)),
cluster_std=1.0)
plot_data(X, y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
svc = svm.SVC(class_weight='auto')
param_grid = {'kernel': ['linear'],
'C': [1e0, 1e1, 1e2, 1e3, 1e4]}
strat_2fold = StratifiedKFold(y_train, k=2)
print " Parameters to be chosen through cross validation:"
for name, vals in param_grid.iteritems():
if name != 'kernel':
print " {0}: {1}".format(name, vals)
clf = GridSearchCV(svc, param_grid, n_jobs=1, cv=strat_2fold)
clf.fit(X_train, y_train)
print "== Best Parameters:", clf.best_params_
y_pred = clf.predict(X_test)
acc = len(np.where(y_pred == y_test)[0]) / float(len(y_pred))
print "== Accuracy:", acc
print classification_report(y_test, y_pred)
plot_svm(clf.best_estimator_, X, y, X_test, y_test,
title="SVM Decision Boundary, Linear Kernel ({0} accuracy, C={1})".format(acc, clf.best_params_['C']))
开发者ID:ewestern,项目名称:machine_learning,代码行数:27,代码来源:kernel_trick.py
示例12: test_compute_class_weight_invariance
def test_compute_class_weight_invariance():
# Test that results with class_weight="balanced" is invariant wrt
# class imbalance if the number of samples is identical.
# The test uses a balanced two class dataset with 100 datapoints.
# It creates three versions, one where class 1 is duplicated
# resulting in 150 points of class 1 and 50 of class 0,
# one where there are 50 points in class 1 and 150 in class 0,
# and one where there are 100 points of each class (this one is balanced
# again).
# With balancing class weights, all three should give the same model.
X, y = make_blobs(centers=2, random_state=0)
# create dataset where class 1 is duplicated twice
X_1 = np.vstack([X] + [X[y == 1]] * 2)
y_1 = np.hstack([y] + [y[y == 1]] * 2)
# create dataset where class 0 is duplicated twice
X_0 = np.vstack([X] + [X[y == 0]] * 2)
y_0 = np.hstack([y] + [y[y == 0]] * 2)
# duplicate everything
X_ = np.vstack([X] * 2)
y_ = np.hstack([y] * 2)
# results should be identical
logreg1 = LogisticRegression(class_weight="balanced").fit(X_1, y_1)
logreg0 = LogisticRegression(class_weight="balanced").fit(X_0, y_0)
logreg = LogisticRegression(class_weight="balanced").fit(X_, y_)
assert_array_almost_equal(logreg1.coef_, logreg0.coef_)
assert_array_almost_equal(logreg.coef_, logreg0.coef_)
开发者ID:daniel-perry,项目名称:scikit-learn,代码行数:26,代码来源:test_class_weight.py
示例13: check_estimators_overwrite_params
def check_estimators_overwrite_params(name, Estimator):
X, y = make_blobs(random_state=0, n_samples=9)
y = multioutput_estimator_convert_y_2d(name, y)
# some want non-negative input
X -= X.min()
with warnings.catch_warnings(record=True):
# catch deprecation warnings
estimator = Estimator()
if name == 'MiniBatchDictLearning' or name == 'MiniBatchSparsePCA':
# FIXME
# for MiniBatchDictLearning and MiniBatchSparsePCA
estimator.batch_size = 1
set_fast_parameters(estimator)
set_random_state(estimator)
params = estimator.get_params()
estimator.fit(X, y)
new_params = estimator.get_params()
for k, v in params.items():
assert_false(np.any(new_params[k] != v),
"Estimator %s changes its parameter %s"
" from %s to %s during fit."
% (name, k, v, new_params[k]))
开发者ID:CarpLi,项目名称:scikit-learn,代码行数:26,代码来源:estimator_checks.py
示例14: check_transformer_general
def check_transformer_general(name, Transformer):
X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
random_state=0, n_features=2, cluster_std=0.1)
X = StandardScaler().fit_transform(X)
X -= X.min()
_check_transformer(name, Transformer, X, y)
_check_transformer(name, Transformer, X.tolist(), y.tolist())
开发者ID:AlexMarshall011,项目名称:scikit-learn,代码行数:7,代码来源:estimator_checks.py
示例15: test_multiclass_classifier_class_weight
def test_multiclass_classifier_class_weight():
"""tests multiclass with classweights for each class"""
alpha = .1
n_samples = 20
tol = .00001
max_iter = 50
class_weight = {0: .45, 1: .55, 2: .75}
fit_intercept = True
X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0,
cluster_std=0.1)
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
classes = np.unique(y)
clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
max_iter=max_iter, tol=tol, random_state=77,
fit_intercept=fit_intercept,
class_weight=class_weight)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
le = LabelEncoder()
class_weight_ = compute_class_weight(class_weight, np.unique(y), y)
sample_weight = class_weight_[le.fit_transform(y)]
coef1 = []
intercept1 = []
coef2 = []
intercept2 = []
for cl in classes:
y_encoded = np.ones(n_samples)
y_encoded[y != cl] = -1
spweights1, spintercept1 = sag_sparse(X, y_encoded, step_size, alpha,
n_iter=max_iter, dloss=log_dloss,
sample_weight=sample_weight)
spweights2, spintercept2 = sag_sparse(X, y_encoded, step_size, alpha,
n_iter=max_iter, dloss=log_dloss,
sample_weight=sample_weight,
sparse=True)
coef1.append(spweights1)
intercept1.append(spintercept1)
coef2.append(spweights2)
intercept2.append(spintercept2)
coef1 = np.vstack(coef1)
intercept1 = np.array(intercept1)
coef2 = np.vstack(coef2)
intercept2 = np.array(intercept2)
for i, cl in enumerate(classes):
assert_array_almost_equal(clf1.coef_[i].ravel(),
coef1[i].ravel(),
decimal=2)
assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1)
assert_array_almost_equal(clf2.coef_[i].ravel(),
coef2[i].ravel(),
decimal=2)
assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:60,代码来源:test_sag.py
示例16: test_sag_pobj_matches_logistic_regression
def test_sag_pobj_matches_logistic_regression():
"""tests if the sag pobj matches log reg"""
n_samples = 100
alpha = 1.0
max_iter = 20
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0,
cluster_std=0.1)
clf1 = LogisticRegression(solver='sag', fit_intercept=False, tol=.0000001,
C=1. / alpha / n_samples, max_iter=max_iter,
random_state=10)
clf2 = clone(clf1)
clf3 = LogisticRegression(fit_intercept=False, tol=.0000001,
C=1. / alpha / n_samples, max_iter=max_iter,
random_state=10)
clf1.fit(X, y)
clf2.fit(sp.csr_matrix(X), y)
clf3.fit(X, y)
pobj1 = get_pobj(clf1.coef_, alpha, X, y, log_loss)
pobj2 = get_pobj(clf2.coef_, alpha, X, y, log_loss)
pobj3 = get_pobj(clf3.coef_, alpha, X, y, log_loss)
assert_array_almost_equal(pobj1, pobj2, decimal=4)
assert_array_almost_equal(pobj2, pobj3, decimal=4)
assert_array_almost_equal(pobj3, pobj1, decimal=4)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:27,代码来源:test_sag.py
示例17: plot_scaling
def plot_scaling():
X, y = make_blobs(n_samples=50, centers=2, random_state=4, cluster_std=1)
X += 3
plt.figure(figsize=(15, 8))
main_ax = plt.subplot2grid((2, 4), (0, 0), rowspan=2, colspan=2)
main_ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm2, s=60)
maxx = np.abs(X[:, 0]).max()
maxy = np.abs(X[:, 1]).max()
main_ax.set_xlim(-maxx + 1, maxx + 1)
main_ax.set_ylim(-maxy + 1, maxy + 1)
main_ax.set_title("Original Data")
other_axes = [plt.subplot2grid((2, 4), (i, j)) for j in range(2, 4) for i in range(2)]
for ax, scaler in zip(other_axes, [StandardScaler(), RobustScaler(),
MinMaxScaler(), Normalizer(norm='l2')]):
X_ = scaler.fit_transform(X)
ax.scatter(X_[:, 0], X_[:, 1], c=y, cmap=cm2, s=60)
ax.set_xlim(-2, 2)
ax.set_ylim(-2, 2)
ax.set_title(type(scaler).__name__)
other_axes.append(main_ax)
for ax in other_axes:
ax.spines['left'].set_position('center')
ax.spines['right'].set_color('none')
ax.spines['bottom'].set_position('center')
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
开发者ID:361793842,项目名称:datascience-practice-handbook,代码行数:33,代码来源:plot_scaling.py
示例18: test_grid_search_correct_score_results
def test_grid_search_correct_score_results():
# test that correct scores are used
n_splits = 3
clf = LinearSVC(random_state=0)
X, y = make_blobs(random_state=0, centers=2)
Cs = [.1, 1, 10]
for score in ['f1', 'roc_auc']:
grid_search = GridSearchCV(clf, {'C': Cs}, scoring=score, cv=n_splits)
results = grid_search.fit(X, y).cv_results_
# Test scorer names
result_keys = list(results.keys())
expected_keys = (("mean_test_score", "rank_test_score") +
tuple("split%d_test_score" % cv_i
for cv_i in range(n_splits)))
assert_true(all(in1d(expected_keys, result_keys)))
cv = StratifiedKFold(n_splits=n_splits)
n_splits = grid_search.n_splits_
for candidate_i, C in enumerate(Cs):
clf.set_params(C=C)
cv_scores = np.array(
list(grid_search.cv_results_['split%d_test_score'
% s][candidate_i]
for s in range(n_splits)))
for i, (train, test) in enumerate(cv.split(X, y)):
clf.fit(X[train], y[train])
if score == "f1":
correct_score = f1_score(y[test], clf.predict(X[test]))
elif score == "roc_auc":
dec = clf.decision_function(X[test])
correct_score = roc_auc_score(y[test], dec)
assert_almost_equal(correct_score, cv_scores[i])
开发者ID:YinongLong,项目名称:scikit-learn,代码行数:33,代码来源:test_search.py
示例19: test_search_cv_results_rank_tie_breaking
def test_search_cv_results_rank_tie_breaking():
X, y = make_blobs(n_samples=50, random_state=42)
# The two C values are close enough to give similar models
# which would result in a tie of their mean cv-scores
param_grid = {'C': [1, 1.001, 0.001]}
grid_search = GridSearchCV(SVC(), param_grid=param_grid)
random_search = RandomizedSearchCV(SVC(), n_iter=3,
param_distributions=param_grid)
for search in (grid_search, random_search):
search.fit(X, y)
results = search.cv_results_
# Check tie breaking strategy -
# Check that there is a tie in the mean scores between
# candidates 1 and 2 alone
assert_almost_equal(results['mean_test_score'][0],
results['mean_test_score'][1])
try:
assert_almost_equal(results['mean_test_score'][1],
results['mean_test_score'][2])
except AssertionError:
pass
# 'min' rank should be assigned to the tied candidates
assert_almost_equal(search.cv_results_['rank_test_score'], [1, 1, 3])
开发者ID:YinongLong,项目名称:scikit-learn,代码行数:26,代码来源:test_search.py
示例20: test_verbose_boolean
def test_verbose_boolean():
# checks that the output for the verbose output is the same
# for the flag values '1' and 'True'
# simple 3 cluster dataset
X, y = make_blobs(random_state=1)
for Model in [DPGMM, VBGMM]:
dpgmm_bool = Model(n_components=10, random_state=1, alpha=20,
n_iter=50, verbose=True)
dpgmm_int = Model(n_components=10, random_state=1, alpha=20,
n_iter=50, verbose=1)
old_stdout = sys.stdout
sys.stdout = StringIO()
try:
# generate output with the boolean flag
dpgmm_bool.fit(X)
verbose_output = sys.stdout
verbose_output.seek(0)
bool_output = verbose_output.readline()
# generate output with the int flag
dpgmm_int.fit(X)
verbose_output = sys.stdout
verbose_output.seek(0)
int_output = verbose_output.readline()
assert_equal(bool_output, int_output)
finally:
sys.stdout = old_stdout
开发者ID:Erotemic,项目名称:scikit-learn,代码行数:27,代码来源:test_dpgmm.py
注:本文中的sklearn.datasets.make_blobs函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论