• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python datasets.make_blobs函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.datasets.make_blobs函数的典型用法代码示例。如果您正苦于以下问题:Python make_blobs函数的具体用法?Python make_blobs怎么用?Python make_blobs使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了make_blobs函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: check_transformer_pickle

def check_transformer_pickle(name, Transformer):
    X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                      random_state=0, n_features=2, cluster_std=0.1)
    n_samples, n_features = X.shape
    X = StandardScaler().fit_transform(X)
    X -= X.min()
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        transformer = Transformer()
    if not hasattr(transformer, 'transform'):
        return
    set_random_state(transformer)
    set_fast_parameters(transformer)

    # fit
    if name in CROSS_DECOMPOSITION:
        random_state = np.random.RandomState(seed=12345)
        y_ = np.vstack([y, 2 * y + random_state.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y

    transformer.fit(X, y_)
    X_pred = transformer.fit(X, y_).transform(X)
    pickled_transformer = pickle.dumps(transformer)
    unpickled_transformer = pickle.loads(pickled_transformer)
    pickled_X_pred = unpickled_transformer.transform(X)

    assert_array_almost_equal(pickled_X_pred, X_pred)
开发者ID:AlexMarshall011,项目名称:scikit-learn,代码行数:29,代码来源:estimator_checks.py


示例2: check_classifiers_classes

def check_classifiers_classes(name, Classifier):
    X, y = make_blobs(n_samples=30, random_state=0, cluster_std=0.1)
    X, y = shuffle(X, y, random_state=7)
    X = StandardScaler().fit_transform(X)
    # We need to make sure that we have non negative data, for things
    # like NMF
    X -= X.min() - .1
    y_names = np.array(["one", "two", "three"])[y]

    for y_names in [y_names, y_names.astype('O')]:
        if name in ["LabelPropagation", "LabelSpreading"]:
            # TODO some complication with -1 label
            y_ = y
        else:
            y_ = y_names

        classes = np.unique(y_)
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            classifier = Classifier()
        if name == 'BernoulliNB':
            classifier.set_params(binarize=X.mean())
        set_fast_parameters(classifier)
        # fit
        classifier.fit(X, y_)

        y_pred = classifier.predict(X)
        # training set performance
        assert_array_equal(np.unique(y_), np.unique(y_pred))
        if np.any(classifier.classes_ != classes):
            print("Unexpected classes_ attribute for %r: "
                  "expected %s, got %s" %
                  (classifier, classes, classifier.classes_))
开发者ID:AlexMarshall011,项目名称:scikit-learn,代码行数:33,代码来源:estimator_checks.py


示例3: test_thresholded_scorers

def test_thresholded_scorers():
    """Test scorers that take thresholds."""
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = LogisticRegression(random_state=0)
    clf.fit(X_train, y_train)
    score1 = SCORERS['roc_auc'](clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.decision_function(X_test))
    score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)
    assert_almost_equal(score1, score3)

    logscore = SCORERS['log_loss'](clf, X_test, y_test)
    logloss = log_loss(y_test, clf.predict_proba(X_test))
    assert_almost_equal(-logscore, logloss)

    # same for an estimator without decision_function
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    score1 = SCORERS['roc_auc'](clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)

    # Test that an exception is raised on more than two classes
    X, y = make_blobs(random_state=0, centers=3)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf.fit(X_train, y_train)
    assert_raises(ValueError, SCORERS['roc_auc'], clf, X_test, y_test)
开发者ID:2011200799,项目名称:scikit-learn,代码行数:28,代码来源:test_score_objects.py


示例4: test_fit_uuu

def test_fit_uuu():
    n_samples1 = 10000
    n_features = 5
    centers1 = np.array([[10, 5, 1, -5, -10],
                        [-10, -5, -1, 5, 10]])
    cluster_std1 = np.array([[1.0, 2.0, 3.0, 4.0, 5.0],
                            [5.0, 4.0, 3.0, 2.0, 1.0]])

    X1, y1 = make_blobs(n_features=n_features,
                      n_samples=n_samples1,
                      centers=centers1,
                      cluster_std=cluster_std1)

    n_samples2 = 5000
    centers2 = np.array([[10, 5, 1, -5, -10]])
    cluster_std2 = np.array([[1.0, 2.0, 3.0, 4.0, 5.0]])
    X2, y2 = make_blobs(n_features=n_features,
                      n_samples=n_samples2,
                      centers=centers2,
                      cluster_std=cluster_std2)
    X = np.vstack((X1, X2))

    model = mixture.PGMM(covariance_type='UUU', n_components=2, n_pc=3)
    model.fit(X)
    assert_array_almost_equal(np.sum(model.means_, 0), np.sum(centers1, 0), decimal=0)
    assert_array_almost_equal(np.sort(model.weights_), np.array([0.333, 0.666]), decimal=1)
    assert_equal(model.means_.shape, np.array([2, n_features]))
    assert_equal(model.weights_.shape, np.array([2]))
    assert_equal(model.noise_.shape, np.array([2, n_features]))
    assert_equal(model.principal_subspace_.shape, np.array([2, n_features, 3]))
    assert_equal(model.covars_.shape, np.array([2, n_features, n_features]))
    logging.info('TestFitUUU: OK')
开发者ID:akionakamura,项目名称:scikit-learn,代码行数:32,代码来源:test_pgmm.py


示例5: test_check_is_fitted

def test_check_is_fitted():
    # Check is ValueError raised when non estimator instance passed
    assert_raises(ValueError, check_is_fitted, ARDRegression, "coef_")
    assert_raises(TypeError, check_is_fitted, "SVR", "support_")

    ard = ARDRegression()
    svr = SVR()

    try:
        assert_raises(NotFittedError, check_is_fitted, ard, "coef_")
        assert_raises(NotFittedError, check_is_fitted, svr, "support_")
    except ValueError:
        assert False, "check_is_fitted failed with ValueError"

    # NotFittedError is a subclass of both ValueError and AttributeError
    try:
        check_is_fitted(ard, "coef_", "Random message %(name)s, %(name)s")
    except ValueError as e:
        assert_equal(str(e), "Random message ARDRegression, ARDRegression")

    try:
        check_is_fitted(svr, "support_", "Another message %(name)s, %(name)s")
    except AttributeError as e:
        assert_equal(str(e), "Another message SVR, SVR")

    ard.fit(*make_blobs())
    svr.fit(*make_blobs())

    assert_equal(None, check_is_fitted(ard, "coef_"))
    assert_equal(None, check_is_fitted(svr, "support_"))
开发者ID:Afey,项目名称:scikit-learn,代码行数:30,代码来源:test_validation.py


示例6: show_dbscan

    def show_dbscan():
        """
        simulate 1 month of normal hourly room percentage data followed by an anomalous percentage
        the normal data is bimodal following most peoples activity patterns in which there is routinely a weekday
        percentage and a weekend percentage. 1 day in which the person spends a large amount of time in the bathroom
        is simulated
        """

        # simulate normal hourly data
        weekday = ([0.05, 0.95], 0.05) #bath, bed
        weekend = ([0.3, 0.7], 0.1)
        roomperwd, truelabelswd = make_blobs(n_samples=23, centers=weekday[0],
                                             cluster_std=weekday[1], random_state=0)
        roomperwe, truelabelswe = make_blobs(n_samples=8, centers=weekend[0],
                                             cluster_std=weekend[1], random_state=0)

        # combine modes
        roompers = np.vstack((roomperwd, roomperwe))

        # make positive and sum to one to simulate valid distribution
        for i in range(roompers.shape[0]):
            for j in range(roompers.shape[1]):
                if roompers[i, j] < 0:
                    roompers[i, j] = 0
        roompersnorm = normalize(roompers, norm='l1')

        # simulate anomaly on most recent day where don't leave bedroom
        roompersnorm[-1, :] = np.array([0.8, 0.2])

        # detect outliers
        roompersdetector = HourlyRoomPercentageAnomalyDetection(roompersnorm, eps=0.3, min_samples=3)
        labels = roompersdetector.scale_and_proximity_cluster(eps=0.3, min_samples=3)

        # plot results
        plt.figure()
        seenflag1 = False; seenflag2 = False; seenflag3 = False;
        for i, label in enumerate(labels):
            if label == 0:
                if seenflag1:
                    plt.plot(roompersnorm[i][0], roompersnorm[i][1], 'ro')
                else:
                    plt.plot(roompersnorm[i][0], roompersnorm[i][1], 'ro', label='Cluster 1')
                    seenflag1 = True
            elif label == 1:
                if seenflag2:
                    plt.plot(roompersnorm[i][0], roompersnorm[i][1], 'kx')
                else:
                    plt.plot(roompersnorm[i][0], roompersnorm[i][1], 'kx', label='Cluster 2')
                    seenflag2 = True
            elif label == -1:
                if seenflag3:
                    plt.plot(roompersnorm[i][0], roompersnorm[i][1], 'b^')
                else:
                    plt.plot(roompersnorm[i][0], roompersnorm[i][1], 'b^', label='Outlier')
                    seenflag3 = True
        plt.legend(loc='lower left')
        plt.axis([0, 1, 0, 1])
        plt.show()
开发者ID:PhoenixYanrongLi,项目名称:CareEcoSystem_ServerCode,代码行数:58,代码来源:Simulations.py


示例7: test_calibration_multiclass

def test_calibration_multiclass():
    """Test calibration for multiclass """
    # test multi-class setting with classifier that implements
    # only decision function
    clf = LinearSVC()
    X, y_idx = make_blobs(n_samples=100, n_features=2, random_state=42,
                          centers=3, cluster_std=3.0)

    # Use categorical labels to check that CalibratedClassifierCV supports
    # them correctly
    target_names = np.array(['a', 'b', 'c'])
    y = target_names[y_idx]

    X_train, y_train = X[::2], y[::2]
    X_test, y_test = X[1::2], y[1::2]

    clf.fit(X_train, y_train)
    for method in ['isotonic', 'sigmoid']:
        cal_clf = CalibratedClassifierCV(clf, method=method, cv=2)
        cal_clf.fit(X_train, y_train)
        probas = cal_clf.predict_proba(X_test)
        assert_array_almost_equal(np.sum(probas, axis=1), np.ones(len(X_test)))

        # Check that log-loss of calibrated classifier is smaller than
        # log-loss of naively turned OvR decision function to probabilities
        # via softmax
        def softmax(y_pred):
            e = np.exp(-y_pred)
            return e / e.sum(axis=1).reshape(-1, 1)

        uncalibrated_log_loss = \
            log_loss(y_test, softmax(clf.decision_function(X_test)))
        calibrated_log_loss = log_loss(y_test, probas)
        assert_greater_equal(uncalibrated_log_loss, calibrated_log_loss)

    # Test that calibration of a multiclass classifier decreases log-loss
    # for RandomForestClassifier
    X, y = make_blobs(n_samples=100, n_features=2, random_state=42,
                      cluster_std=3.0)
    X_train, y_train = X[::2], y[::2]
    X_test, y_test = X[1::2], y[1::2]

    clf = RandomForestClassifier(n_estimators=10, random_state=42)
    clf.fit(X_train, y_train)
    clf_probs = clf.predict_proba(X_test)
    loss = log_loss(y_test, clf_probs)

    for method in ['isotonic', 'sigmoid']:
        cal_clf = CalibratedClassifierCV(clf, method=method, cv=3)
        cal_clf.fit(X_train, y_train)
        cal_clf_probs = cal_clf.predict_proba(X_test)
        cal_loss = log_loss(y_test, cal_clf_probs)
        assert_greater(loss, cal_loss)
开发者ID:abecadel,项目名称:scikit-learn,代码行数:53,代码来源:test_calibration.py


示例8: test_thresholded_scorers

def test_thresholded_scorers():
    # Test scorers that take thresholds.
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = LogisticRegression(random_state=0)
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.decision_function(X_test))
    score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)
    assert_almost_equal(score1, score3)

    logscore = get_scorer('neg_log_loss')(clf, X_test, y_test)
    logloss = log_loss(y_test, clf.predict_proba(X_test))
    assert_almost_equal(-logscore, logloss)

    # same for an estimator without decision_function
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)

    # test with a regressor (no decision_function)
    reg = DecisionTreeRegressor()
    reg.fit(X_train, y_train)
    score1 = get_scorer('roc_auc')(reg, X_test, y_test)
    score2 = roc_auc_score(y_test, reg.predict(X_test))
    assert_almost_equal(score1, score2)

    # Test that an exception is raised on more than two classes
    X, y = make_blobs(random_state=0, centers=3)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf.fit(X_train, y_train)
    with pytest.raises(ValueError, match="multiclass format is not supported"):
        get_scorer('roc_auc')(clf, X_test, y_test)

    # test error is raised with a single class present in model
    # (predict_proba shape is not suitable for binary auc)
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = DecisionTreeClassifier()
    clf.fit(X_train, np.zeros_like(y_train))
    with pytest.raises(ValueError, match="need classifier with two classes"):
        get_scorer('roc_auc')(clf, X_test, y_test)

    # for proba scorers
    with pytest.raises(ValueError, match="need classifier with two classes"):
        get_scorer('neg_log_loss')(clf, X_test, y_test)
开发者ID:srinivasreddy,项目名称:scikit-learn,代码行数:49,代码来源:test_score_objects.py


示例9: single_calc

def single_calc(n_sample):
    #n_sample = 1000
    n_feature = 2
    cluster_std = 0.5
    center = 2

    #for n_sample in [10, 50, 100, 500, 1000, 5000, 10000]:

    pts, labels = datasets.make_blobs(n_samples=n_sample, n_features=n_feature, cluster_std=cluster_std, centers=center)
    start = timer()
    tri = Tri(pts)
    end = timer()
    tri_time = end - start
    print(tri_time)

    #tri_res = compare_labels(labels, tri.labels)
    start = timer()
    auto = Autoclust(pts)
    end = timer()
    auto_time = end - start
    print(auto_time)
    #auto_res = compare_labels(labels, auto.labels)
    res_dict = {'tri': tri_time, 'auto': auto_time, 'samples': n_sample}

    with open('times', 'a') as f:
        print(res_dict, file=f)
开发者ID:hotator,项目名称:python-clustering,代码行数:26,代码来源:main.py


示例10: test_transformers_data_not_an_array

def test_transformers_data_not_an_array():
    # test if transformers do something sensible on training set
    # also test all shapes / shape errors
    transformers = all_estimators(type_filter='transformer')
    X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                      random_state=0, n_features=2, cluster_std=0.1)
    X = StandardScaler().fit_transform(X)
    # We need to make sure that we have non negative data, for things
    # like NMF
    X -= X.min() - .1

    for name, Transformer in transformers:
        # XXX: some transformers are transforming the input
        # data. This is a bug that we'll fix later. Right now we copy
        # the data each time
        this_X = NotAnArray(X.copy())
        this_y = NotAnArray(np.asarray(y))
        if name in dont_test:
            continue
        # these don't actually fit the data:
        if name in ['AdditiveChi2Sampler', 'Binarizer', 'Normalizer']:
            continue
        # And these wan't multivariate output
        if name in ('PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD'):
            continue
        yield check_transformer, name, Transformer, this_X, this_y
开发者ID:akashaio,项目名称:scikit-learn,代码行数:26,代码来源:test_common.py


示例11: separable_demo

def separable_demo():
    """ Generate a linearly-separable dataset D, train a linear SVM on
    D, then output the resulting decision boundary on a figure.
    """
    from sklearn.datasets import make_blobs
    X, y = make_blobs(n_samples=200, n_features=2, 
                      centers=((0,0), (4, 4)),
                      cluster_std=1.0)
    plot_data(X, y)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
    svc = svm.SVC(class_weight='auto')
    param_grid = {'kernel': ['linear'],
                  'C': [1e0, 1e1, 1e2, 1e3, 1e4]}
    strat_2fold = StratifiedKFold(y_train, k=2)
    print "    Parameters to be chosen through cross validation:"
    for name, vals in param_grid.iteritems():
        if name != 'kernel':
            print "        {0}: {1}".format(name, vals)
    clf = GridSearchCV(svc, param_grid, n_jobs=1, cv=strat_2fold)
    clf.fit(X_train, y_train)
    print "== Best Parameters:", clf.best_params_
    y_pred = clf.predict(X_test)
    acc = len(np.where(y_pred == y_test)[0]) / float(len(y_pred))
    print "== Accuracy:", acc
    print classification_report(y_test, y_pred)
    plot_svm(clf.best_estimator_, X, y, X_test, y_test, 
             title="SVM Decision Boundary, Linear Kernel ({0} accuracy, C={1})".format(acc, clf.best_params_['C']))
开发者ID:ewestern,项目名称:machine_learning,代码行数:27,代码来源:kernel_trick.py


示例12: test_compute_class_weight_invariance

def test_compute_class_weight_invariance():
    # Test that results with class_weight="balanced" is invariant wrt
    # class imbalance if the number of samples is identical.
    # The test uses a balanced two class dataset with 100 datapoints.
    # It creates three versions, one where class 1 is duplicated
    # resulting in 150 points of class 1 and 50 of class 0,
    # one where there are 50 points in class 1 and 150 in class 0,
    # and one where there are 100 points of each class (this one is balanced
    # again).
    # With balancing class weights, all three should give the same model.
    X, y = make_blobs(centers=2, random_state=0)
    # create dataset where class 1 is duplicated twice
    X_1 = np.vstack([X] + [X[y == 1]] * 2)
    y_1 = np.hstack([y] + [y[y == 1]] * 2)
    # create dataset where class 0 is duplicated twice
    X_0 = np.vstack([X] + [X[y == 0]] * 2)
    y_0 = np.hstack([y] + [y[y == 0]] * 2)
    # duplicate everything
    X_ = np.vstack([X] * 2)
    y_ = np.hstack([y] * 2)
    # results should be identical
    logreg1 = LogisticRegression(class_weight="balanced").fit(X_1, y_1)
    logreg0 = LogisticRegression(class_weight="balanced").fit(X_0, y_0)
    logreg = LogisticRegression(class_weight="balanced").fit(X_, y_)
    assert_array_almost_equal(logreg1.coef_, logreg0.coef_)
    assert_array_almost_equal(logreg.coef_, logreg0.coef_)
开发者ID:daniel-perry,项目名称:scikit-learn,代码行数:26,代码来源:test_class_weight.py


示例13: check_estimators_overwrite_params

def check_estimators_overwrite_params(name, Estimator):
    X, y = make_blobs(random_state=0, n_samples=9)
    y = multioutput_estimator_convert_y_2d(name, y)
    # some want non-negative input
    X -= X.min()
    with warnings.catch_warnings(record=True):
        # catch deprecation warnings
        estimator = Estimator()

    if name == 'MiniBatchDictLearning' or name == 'MiniBatchSparsePCA':
        # FIXME
        # for MiniBatchDictLearning and MiniBatchSparsePCA
        estimator.batch_size = 1

    set_fast_parameters(estimator)

    set_random_state(estimator)

    params = estimator.get_params()
    estimator.fit(X, y)
    new_params = estimator.get_params()
    for k, v in params.items():
        assert_false(np.any(new_params[k] != v),
                     "Estimator %s changes its parameter %s"
                     " from %s to %s during fit."
                     % (name, k, v, new_params[k]))
开发者ID:CarpLi,项目名称:scikit-learn,代码行数:26,代码来源:estimator_checks.py


示例14: check_transformer_general

def check_transformer_general(name, Transformer):
    X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                      random_state=0, n_features=2, cluster_std=0.1)
    X = StandardScaler().fit_transform(X)
    X -= X.min()
    _check_transformer(name, Transformer, X, y)
    _check_transformer(name, Transformer, X.tolist(), y.tolist())
开发者ID:AlexMarshall011,项目名称:scikit-learn,代码行数:7,代码来源:estimator_checks.py


示例15: test_multiclass_classifier_class_weight

def test_multiclass_classifier_class_weight():
    """tests multiclass with classweights for each class"""
    alpha = .1
    n_samples = 20
    tol = .00001
    max_iter = 50
    class_weight = {0: .45, 1: .55, 2: .75}
    fit_intercept = True
    X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0,
                      cluster_std=0.1)
    step_size = get_step_size(X, alpha, fit_intercept, classification=True)
    classes = np.unique(y)

    clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
                              max_iter=max_iter, tol=tol, random_state=77,
                              fit_intercept=fit_intercept,
                              class_weight=class_weight)
    clf2 = clone(clf1)
    clf1.fit(X, y)
    clf2.fit(sp.csr_matrix(X), y)

    le = LabelEncoder()
    class_weight_ = compute_class_weight(class_weight, np.unique(y), y)
    sample_weight = class_weight_[le.fit_transform(y)]

    coef1 = []
    intercept1 = []
    coef2 = []
    intercept2 = []
    for cl in classes:
        y_encoded = np.ones(n_samples)
        y_encoded[y != cl] = -1

        spweights1, spintercept1 = sag_sparse(X, y_encoded, step_size, alpha,
                                              n_iter=max_iter, dloss=log_dloss,
                                              sample_weight=sample_weight)
        spweights2, spintercept2 = sag_sparse(X, y_encoded, step_size, alpha,
                                              n_iter=max_iter, dloss=log_dloss,
                                              sample_weight=sample_weight,
                                              sparse=True)
        coef1.append(spweights1)
        intercept1.append(spintercept1)
        coef2.append(spweights2)
        intercept2.append(spintercept2)

    coef1 = np.vstack(coef1)
    intercept1 = np.array(intercept1)
    coef2 = np.vstack(coef2)
    intercept2 = np.array(intercept2)

    for i, cl in enumerate(classes):
        assert_array_almost_equal(clf1.coef_[i].ravel(),
                                  coef1[i].ravel(),
                                  decimal=2)
        assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1)

        assert_array_almost_equal(clf2.coef_[i].ravel(),
                                  coef2[i].ravel(),
                                  decimal=2)
        assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:60,代码来源:test_sag.py


示例16: test_sag_pobj_matches_logistic_regression

def test_sag_pobj_matches_logistic_regression():
    """tests if the sag pobj matches log reg"""
    n_samples = 100
    alpha = 1.0
    max_iter = 20
    X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0,
                      cluster_std=0.1)

    clf1 = LogisticRegression(solver='sag', fit_intercept=False, tol=.0000001,
                              C=1. / alpha / n_samples, max_iter=max_iter,
                              random_state=10)
    clf2 = clone(clf1)
    clf3 = LogisticRegression(fit_intercept=False, tol=.0000001,
                              C=1. / alpha / n_samples, max_iter=max_iter,
                              random_state=10)

    clf1.fit(X, y)
    clf2.fit(sp.csr_matrix(X), y)
    clf3.fit(X, y)

    pobj1 = get_pobj(clf1.coef_, alpha, X, y, log_loss)
    pobj2 = get_pobj(clf2.coef_, alpha, X, y, log_loss)
    pobj3 = get_pobj(clf3.coef_, alpha, X, y, log_loss)

    assert_array_almost_equal(pobj1, pobj2, decimal=4)
    assert_array_almost_equal(pobj2, pobj3, decimal=4)
    assert_array_almost_equal(pobj3, pobj1, decimal=4)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:27,代码来源:test_sag.py


示例17: plot_scaling

def plot_scaling():
    X, y = make_blobs(n_samples=50, centers=2, random_state=4, cluster_std=1)
    X += 3

    plt.figure(figsize=(15, 8))
    main_ax = plt.subplot2grid((2, 4), (0, 0), rowspan=2, colspan=2)

    main_ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cm2, s=60)
    maxx = np.abs(X[:, 0]).max()
    maxy = np.abs(X[:, 1]).max()

    main_ax.set_xlim(-maxx + 1, maxx + 1)
    main_ax.set_ylim(-maxy + 1, maxy + 1)
    main_ax.set_title("Original Data")
    other_axes = [plt.subplot2grid((2, 4), (i, j)) for j in range(2, 4) for i in range(2)]

    for ax, scaler in zip(other_axes, [StandardScaler(), RobustScaler(),
                                       MinMaxScaler(), Normalizer(norm='l2')]):
        X_ = scaler.fit_transform(X)
        ax.scatter(X_[:, 0], X_[:, 1], c=y, cmap=cm2, s=60)
        ax.set_xlim(-2, 2)
        ax.set_ylim(-2, 2)
        ax.set_title(type(scaler).__name__)

    other_axes.append(main_ax)

    for ax in other_axes:
        ax.spines['left'].set_position('center')
        ax.spines['right'].set_color('none')
        ax.spines['bottom'].set_position('center')
        ax.spines['top'].set_color('none')
        ax.xaxis.set_ticks_position('bottom')
        ax.yaxis.set_ticks_position('left')
开发者ID:361793842,项目名称:datascience-practice-handbook,代码行数:33,代码来源:plot_scaling.py


示例18: test_grid_search_correct_score_results

def test_grid_search_correct_score_results():
    # test that correct scores are used
    n_splits = 3
    clf = LinearSVC(random_state=0)
    X, y = make_blobs(random_state=0, centers=2)
    Cs = [.1, 1, 10]
    for score in ['f1', 'roc_auc']:
        grid_search = GridSearchCV(clf, {'C': Cs}, scoring=score, cv=n_splits)
        results = grid_search.fit(X, y).cv_results_

        # Test scorer names
        result_keys = list(results.keys())
        expected_keys = (("mean_test_score", "rank_test_score") +
                         tuple("split%d_test_score" % cv_i
                               for cv_i in range(n_splits)))
        assert_true(all(in1d(expected_keys, result_keys)))

        cv = StratifiedKFold(n_splits=n_splits)
        n_splits = grid_search.n_splits_
        for candidate_i, C in enumerate(Cs):
            clf.set_params(C=C)
            cv_scores = np.array(
                list(grid_search.cv_results_['split%d_test_score'
                                             % s][candidate_i]
                     for s in range(n_splits)))
            for i, (train, test) in enumerate(cv.split(X, y)):
                clf.fit(X[train], y[train])
                if score == "f1":
                    correct_score = f1_score(y[test], clf.predict(X[test]))
                elif score == "roc_auc":
                    dec = clf.decision_function(X[test])
                    correct_score = roc_auc_score(y[test], dec)
                assert_almost_equal(correct_score, cv_scores[i])
开发者ID:YinongLong,项目名称:scikit-learn,代码行数:33,代码来源:test_search.py


示例19: test_search_cv_results_rank_tie_breaking

def test_search_cv_results_rank_tie_breaking():
    X, y = make_blobs(n_samples=50, random_state=42)

    # The two C values are close enough to give similar models
    # which would result in a tie of their mean cv-scores
    param_grid = {'C': [1, 1.001, 0.001]}

    grid_search = GridSearchCV(SVC(), param_grid=param_grid)
    random_search = RandomizedSearchCV(SVC(), n_iter=3,
                                       param_distributions=param_grid)

    for search in (grid_search, random_search):
        search.fit(X, y)
        results = search.cv_results_
        # Check tie breaking strategy -
        # Check that there is a tie in the mean scores between
        # candidates 1 and 2 alone
        assert_almost_equal(results['mean_test_score'][0],
                            results['mean_test_score'][1])
        try:
            assert_almost_equal(results['mean_test_score'][1],
                                results['mean_test_score'][2])
        except AssertionError:
            pass
        # 'min' rank should be assigned to the tied candidates
        assert_almost_equal(search.cv_results_['rank_test_score'], [1, 1, 3])
开发者ID:YinongLong,项目名称:scikit-learn,代码行数:26,代码来源:test_search.py


示例20: test_verbose_boolean

def test_verbose_boolean():
    # checks that the output for the verbose output is the same
    # for the flag values '1' and 'True'
    # simple 3 cluster dataset
    X, y = make_blobs(random_state=1)
    for Model in [DPGMM, VBGMM]:
        dpgmm_bool = Model(n_components=10, random_state=1, alpha=20,
                           n_iter=50, verbose=True)
        dpgmm_int = Model(n_components=10, random_state=1, alpha=20,
                          n_iter=50, verbose=1)

        old_stdout = sys.stdout
        sys.stdout = StringIO()
        try:
            # generate output with the boolean flag
            dpgmm_bool.fit(X)
            verbose_output = sys.stdout
            verbose_output.seek(0)
            bool_output = verbose_output.readline()
            # generate output with the int flag
            dpgmm_int.fit(X)
            verbose_output = sys.stdout
            verbose_output.seek(0)
            int_output = verbose_output.readline()
            assert_equal(bool_output, int_output)
        finally:
            sys.stdout = old_stdout
开发者ID:Erotemic,项目名称:scikit-learn,代码行数:27,代码来源:test_dpgmm.py



注:本文中的sklearn.datasets.make_blobs函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python datasets.make_circles函数代码示例发布时间:2022-05-27
下一篇:
Python datasets.load_svmlight_files函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap