• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python dummy.DummyClassifier类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.dummy.DummyClassifier的典型用法代码示例。如果您正苦于以下问题:Python DummyClassifier类的具体用法?Python DummyClassifier怎么用?Python DummyClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了DummyClassifier类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: get_scores

def get_scores(X, y):
    nfolds = 200
    cv = StratifiedShuffleSplit(y, n_iter=nfolds, test_size=0.2)
    dumb = DummyClassifier(strategy="most_frequent")
    clf = svm.SVC(class_weight="auto")
    clf = linear_model.LogisticRegression()
    param_dist = {"C": [0.1, 1, 10], "kernel": ["rbf", "linear", "poly"]}
    param_dist = {"C": [1e6, 1e5, 1e4, 1e3, 1e2, 10, 1, 0.1, 0.01, 0.001]}
    search = GridSearchCV(clf, param_grid=param_dist, scoring="mean_absolute_error")
    test_scores, train_scores, dummy_scores = [], [], []
    preds, true_labels = [], []
    for oidx, (train, test) in enumerate(cv):
        y_train, y_test = y[train], y[test]
        X_train, X_test = X[train, :], X[test, :]

        search.fit(X_train, y_train)
        clf = search.best_estimator_
        print search.best_params_

        clf.fit(X_train, y_train)
        train_scores.append(accuracy_score(clf.predict(X_train), y_train))
        test_scores.append(accuracy_score(clf.predict(X_test), y_test))
        dumb.fit(X_train, y_train)
        dummy_scores.append(accuracy_score(dumb.predict(X_test), y_test))
        preds += list(clf.predict(X_test))
        true_labels += list(y_test)
    return test_scores, train_scores, dummy_scores, preds, true_labels
开发者ID:gsudre,项目名称:research_code,代码行数:27,代码来源:classify_rest_ica_example11.py


示例2: do_cross_validation

def do_cross_validation(labels):
    """Perform the k-fold cross validation.

    Perform the k-fold cross validation, collect the result and return the
    single test instance predictions, as well as the classification results for
    each single fold and for the combination of all folds.

    Keyword arguments:
    features -- all features
    labels -- all labels
    """
    skf = StratifiedKFold(labels, NO_OF_FOLDS)
    single_predictions = []  # Store each single classification decision

    # Store classification results for each fold and for the entire task (i.e.,
    # entire cross validation).
    classification_result = np.zeros((NO_OF_FOLDS + 1, 5))

    for cur_fold, (train_idx, test_idx) in enumerate(skf):
        model = DummyClassifier(strategy='most_frequent')
        model.fit(None, labels[train_idx])
        pred_labels = model.predict(np.zeros(labels[test_idx].shape[0]))

        fold_array = np.empty(test_idx.shape[0])
        fold_array.fill(cur_fold)
        single_predictions.append(np.transpose(np.vstack((fold_array, test_idx,
                labels[test_idx], pred_labels))))
        classification_result[cur_fold, :] = get_classification_result(cur_fold,
                labels[test_idx], pred_labels)

    single_predictions = np.vstack(single_predictions)
    return single_predictions, classification_result
开发者ID:herbertchen1,项目名称:SciTail,代码行数:32,代码来源:classify_mode.py


示例3: get_scores

def get_scores(X, y):
    nfolds = 40
    cv = StratifiedShuffleSplit(y, n_iter=nfolds, test_size=.05)
    dumb = DummyClassifier(strategy='most_frequent')
    clf = svm.SVC(class_weight='auto')
    param_dist = {"C": [.1, 1, 10],
                  "kernel": ['rbf', 'linear', 'poly']
                  }
    search = GridSearchCV(clf, param_grid=param_dist,
                          scoring='mean_absolute_error')
    stest, strain, sdummy = [], [], []
    for nfeats in range(X.shape[1]):
        test_scores, train_scores, dummy_scores = [], [], []
        # figure out our possible feature combinations
        feats = itertools.combinations(range(X.shape[1]), nfeats + 1)
        for my_feats in feats:
            for oidx, (train, test) in enumerate(cv):
                idx = np.array(my_feats)
                y_train, y_test = y[train], y[test]
                X_train, X_test = X[train, :], X[test, :]

                search.fit(X_train, y_train)
                clf = search.best_estimator_

                clf.fit(X_train[:, idx], y_train)
                train_scores.append(accuracy_score(clf.predict(X_train[:, idx]), y_train))
                test_scores.append(accuracy_score(clf.predict(X_test[:, idx]), y_test))
                dumb.fit(X_train[:, idx], y_train)
                dummy_scores.append(accuracy_score(dumb.predict(X_test[:, idx]), y_test))
        sdummy.append(np.mean(dummy_scores))
        strain.append(np.mean(train_scores))
        stest.append(np.mean(test_scores))
    return stest, strain, sdummy
开发者ID:gsudre,项目名称:research_code,代码行数:33,代码来源:classify_rest_ica.py


示例4: _run_dummy_detection

def _run_dummy_detection(x_train, x_test, y_train, y_test):
    clf = DummyClassifier(strategy='most_frequent')

    print "Training Dummy..."
    clf.fit(x_train, y_train)
    print "Predicting Test Set..."
    print "Score for test set: {}".format(clf.score(x_test, y_test))
开发者ID:Brok-Bucholtz,项目名称:Ultrasound-Nerve-Segmentation,代码行数:7,代码来源:run.py


示例5: test_dtype_of_classifier_probas

def test_dtype_of_classifier_probas(strategy):
    y = [0, 2, 1, 1]
    X = np.zeros(4)
    model = DummyClassifier(strategy=strategy, random_state=0, constant=0)
    probas = model.fit(X, y).predict_proba(X)

    assert probas.dtype == np.float64
开发者ID:allefpablo,项目名称:scikit-learn,代码行数:7,代码来源:test_dummy.py


示例6: test_dummy_classifier_on_nan_value

def test_dummy_classifier_on_nan_value():
    X = [[np.NaN]]
    y = [1]
    y_expected = [1]
    clf = DummyClassifier()
    clf.fit(X, y)
    y_pred = clf.predict(X)
    assert_array_equal(y_pred, y_expected)
开发者ID:NelleV,项目名称:scikit-learn,代码行数:8,代码来源:test_dummy.py


示例7: test_most_frequent_strategy

def test_most_frequent_strategy():
    X = [[0], [0], [0], [0]]  # ignored
    y = [1, 2, 1, 1]

    clf = DummyClassifier(strategy="most_frequent", random_state=0)
    clf.fit(X, y)
    assert_array_equal(clf.predict(X), np.ones(len(X)))
    _check_predict_proba(clf, X, y)
开发者ID:RONNCC,项目名称:scikit-learn,代码行数:8,代码来源:test_dummy.py


示例8: test_constant_strategy_multioutput

def test_constant_strategy_multioutput():
    X = [[0], [0], [0], [0]]  # ignored
    y = np.array([[2, 3], [1, 3], [2, 3], [2, 0]])

    n_samples = len(X)

    clf = DummyClassifier(strategy="constant", random_state=0, constant=[1, 0])
    clf.fit(X, y)
    assert_array_equal(clf.predict(X), np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))]))
    _check_predict_proba(clf, X, y)
开发者ID:jonathanwoodard,项目名称:scikit-learn,代码行数:10,代码来源:test_dummy.py


示例9: test_dummy_classifier_on_3D_array

def test_dummy_classifier_on_3D_array():
    X = np.array([[['foo']], [['bar']], [['baz']]])
    y = [2, 2, 2]
    y_expected = [2, 2, 2]
    y_proba_expected = [[1], [1], [1]]
    cls = DummyClassifier()
    cls.fit(X, y)
    y_pred = cls.predict(X)
    y_pred_proba = cls.predict_proba(X)
    assert_array_equal(y_pred, y_expected)
    assert_array_equal(y_pred_proba, y_proba_expected)
开发者ID:aniryou,项目名称:scikit-learn,代码行数:11,代码来源:test_dummy.py


示例10: test_constant_strategy_sparse_target

def test_constant_strategy_sparse_target():
    X = [[0]] * 5  # ignored
    y = sp.csc_matrix(np.array([[0, 1], [4, 0], [1, 1], [1, 4], [1, 1]]))

    n_samples = len(X)

    clf = DummyClassifier(strategy="constant", random_state=0, constant=[1, 0])
    clf.fit(X, y)
    y_pred = clf.predict(X)
    assert_true(sp.issparse(y_pred))
    assert_array_equal(y_pred.toarray(), np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))]))
开发者ID:jonathanwoodard,项目名称:scikit-learn,代码行数:11,代码来源:test_dummy.py


示例11: test_uniform_strategy

def test_uniform_strategy():
    X = [[0]] * 4  # ignored
    y = [1, 2, 1, 1]
    clf = DummyClassifier(strategy="uniform", random_state=0)
    clf.fit(X, y)

    X = [[0]] * 500
    y_pred = clf.predict(X)
    p = np.bincount(y_pred) / float(len(X))
    assert_almost_equal(p[1], 0.5, decimal=1)
    assert_almost_equal(p[2], 0.5, decimal=1)
    _check_predict_proba(clf, X, y)
开发者ID:Aerlinger,项目名称:scikit-learn,代码行数:12,代码来源:test_dummy.py


示例12: test_stratified_strategy

def test_stratified_strategy():
    X = [[0]] * 5  # ignored
    y = [1, 2, 1, 1, 2]
    clf = DummyClassifier(strategy="stratified", random_state=0)
    clf.fit(X, y)

    X = [[0]] * 1000
    y_pred = clf.predict(X)
    p = np.bincount(y_pred) / float(len(X))
    assert_almost_equal(p[1], 3. / 5, decimal=1)
    assert_almost_equal(p[2], 2. / 5, decimal=1)
    _check_predict_proba(clf, X, y)
开发者ID:Jetafull,项目名称:scikit-learn,代码行数:12,代码来源:test_dummy.py


示例13: test_most_frequent_and_prior_strategy_multioutput

def test_most_frequent_and_prior_strategy_multioutput():
    X = [[0], [0], [0], [0]]  # ignored
    y = np.array([[1, 0], [2, 0], [1, 0], [1, 3]])

    n_samples = len(X)

    for strategy in ("prior", "most_frequent"):
        clf = DummyClassifier(strategy=strategy, random_state=0)
        clf.fit(X, y)
        assert_array_equal(clf.predict(X), np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))]))
        _check_predict_proba(clf, X, y)
        _check_behavior_2d(clf)
开发者ID:jonathanwoodard,项目名称:scikit-learn,代码行数:12,代码来源:test_dummy.py


示例14: test_most_frequent_and_prior_strategy_sparse_target

def test_most_frequent_and_prior_strategy_sparse_target():
    X = [[0]] * 5  # ignored
    y = sp.csc_matrix(np.array([[1, 0], [1, 3], [4, 0], [0, 1], [1, 0]]))

    n_samples = len(X)
    y_expected = np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
    for strategy in ("most_frequent", "prior"):
        clf = DummyClassifier(strategy=strategy, random_state=0)
        clf.fit(X, y)

        y_pred = clf.predict(X)
        assert_true(sp.issparse(y_pred))
        assert_array_equal(y_pred.toarray(), y_expected)
开发者ID:jonathanwoodard,项目名称:scikit-learn,代码行数:13,代码来源:test_dummy.py


示例15: main

def main(training_set, language, gold_standard, gazetteer):
    """ Searches for the best hyperparameters """

    gazetteer = reverse_gazetteer(json.load(gazetteer)) if gazetteer else {}

    logger.info('Building training set')
    extractor = FactExtractorFeatureExtractor(language)
    for row in training_set:
        data = json.loads(row)
        extractor.process_sentence(data['sentence'], data['fes'],
                                   add_unknown=True, gazetteer=gazetteer)

    logger.info('Finalizing training set')
    x, y = extractor.get_features()

    logger.info('Searching for the best model parameters')
    svc = LinearSVC()
    search = GridSearchCV(
        svc,
        param_grid=[{
            'C': [0.01, 0.1, 1.0, 10.0, 100.0, 1000.0],
            'multi_class': ['ovr', 'crammer_singer'],
        }],
        scoring='f1_weighted',
        cv=10)
    search.fit(x, y)

    logger.info('The best model (weighted-averaged F1 of %.4f) has parameters %s',
                search.best_score_, search.best_params_)

    if not gold_standard:
        logger.info('Skipping gold standard evaluation')
        return

    logger.info('Evaluating on the gold standard')
    for row in gold_standard:
        data = json.loads(row)
        extractor.process_sentence(data['sentence'], data['fes'])
    x_gold, y_gold = extractor.get_features()

    dummy = DummyClassifier(strategy='stratified')
    dummy.fit(x, y)

    y_dummy = dummy.predict(x_gold)
    logger.info('Dummy model has a weighted-averaged F1 on the gold standard of %.4f',
                metrics.f1_score(y_gold, y_dummy, average='weighted'))

    y_best = search.predict(x_gold)
    logger.info('Best model has a weighted-averaged F1 on the gold standard of %.4f',
                metrics.f1_score(y_gold, y_best, average='weighted'))
开发者ID:clear-datacenter,项目名称:StrepHit,代码行数:50,代码来源:model_selection.py


示例16: find_best_dummy_classification

def find_best_dummy_classification(X, y, test_size=0.3, random_state=0, thresh=0.5, target_names=None, n=1):
    """Try all dummy models."""
    X = X.reshape((len(X) ,-1))
    # y = y.reshape((len(y) ,-1))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    dummy_scores = []
    for i in range(n):
        for strategy in ['most_frequent', 'uniform', 'prior', 'stratified']:
            clf = DummyClassifier(strategy=strategy)
            clf.fit(X_train, y_train)
            y_pred = clf.predict(X_test)
            score = clf.score(X_test, y_test)

            matthews_corrcoef=sklearn.metrics.matthews_corrcoef(y_test > thresh, y_pred > thresh)

            report=parse_classification_report(sklearn.metrics.classification_report(y_test > thresh, y_pred > thresh, target_names=target_names))

            dummy_scores.append(
                collections.OrderedDict(
                    strategy='classifier_' + strategy,
                    matthews_corrcoef=matthews_corrcoef,
                    score=score,
                    report=report
                )
            )

        for strategy in ['mean', 'median']:
            clf=DummyRegressor(strategy=strategy)
            clf.fit(X_train, y_train)
            y_pred=clf.predict(X_test)
            score=clf.score(X_test, y_test)

            matthews_corrcoef=sklearn.metrics.matthews_corrcoef(y_test > thresh, y_pred > thresh)

            report=parse_classification_report(sklearn.metrics.classification_report(y_test > thresh, y_pred > thresh, target_names=target_names))

            dummy_scores.append(
                collections.OrderedDict(
                    strategy='regressor_' + strategy,
                    matthews_corrcoef=matthews_corrcoef,
                    score=score,
                    report=report
                )
                )

    df=pd.DataFrame(dummy_scores)
    df=df.sort_values('matthews_corrcoef', ascending=False)
    return df, df[:1].iloc[0].to_dict()
开发者ID:kinect59,项目名称:satellite_leak_detection,代码行数:50,代码来源:analysis.py


示例17: run_ML_leave_one_subject_out

def run_ML_leave_one_subject_out(config, filename, question, clf, cols, return_arr=None, return_index=-1):
    working_directory = config['DATA_DIRECTORY']
    data_X, data_y = load_data(working_directory, filename, cols, question)
    data = leave_one_subject_out(data_X, data_y, 'User')
    score = 0
    score_dummy_mf = 0
    score_dummy_sf = 0
    dummy_clf_mf = DummyClassifier('most_frequent')
    dummy_clf_sf = DummyClassifier('stratified')
    for (training_X, training_y), (testing_X, testing_y) in data:
        clf.fit(training_X, training_y)
        dummy_clf_mf.fit(training_X, training_y)
        dummy_clf_sf.fit(training_X, training_y)

        single_score = clf.score(testing_X, testing_y)
        single_score_dummy_mf = dummy_clf_mf.score(testing_X, testing_y)
        single_score_dummy_sf = dummy_clf_sf.score(testing_X, testing_y)
        #print 'Single run score: ' + ("%0.2f" % single_score.mean())
        #print 'Single run score (dummy most frequent): ' + ("%0.2f" % single_score_dummy_mf.mean())
        #print 'Single run score (dummy stratified): ' + ("%0.2f" % single_score_dummy_sf.mean())

        score = score + single_score.mean()
        score_dummy_mf = score_dummy_mf + single_score_dummy_mf.mean()
        score_dummy_sf = score_dummy_sf + single_score_dummy_sf.mean()
    score = round(float(score / len(data)), 2)
    score_dummy_mf = round(float(score_dummy_mf / len(data)), 2)
    score_dummy_sf = round(float(score_dummy_sf / len(data)), 2)
    #print 'Total score: ' + str(score)
    #print 'Total score (dummy most frequent): ' + str(score_dummy_mf)
    #print 'Total score (dummy stratified): ' + str(score_dummy_sf)
    if return_index == -1:
        return score, score_dummy_mf, score_dummy_sf
    else:
        return_arr[return_index] = (score, score_dummy_mf, score_dummy_sf)
开发者ID:GurraB,项目名称:REU2018,代码行数:34,代码来源:main.py


示例18: test_uniform_strategy_sparse_target_warning

def test_uniform_strategy_sparse_target_warning():
    X = [[0]] * 5  # ignored
    y = sp.csc_matrix(np.array([[2, 1], [2, 2], [1, 4], [4, 2], [1, 1]]))

    clf = DummyClassifier(strategy="uniform", random_state=0)
    assert_warns_message(UserWarning, "the uniform strategy would not save memory", clf.fit, X, y)

    X = [[0]] * 500
    y_pred = clf.predict(X)

    for k in range(y.shape[1]):
        p = np.bincount(y_pred[:, k]) / float(len(X))
        assert_almost_equal(p[1], 1 / 3, decimal=1)
        assert_almost_equal(p[2], 1 / 3, decimal=1)
        assert_almost_equal(p[4], 1 / 3, decimal=1)
开发者ID:jonathanwoodard,项目名称:scikit-learn,代码行数:15,代码来源:test_dummy.py


示例19: run_regression

def run_regression(train_embeds, train_labels, test_embeds, test_labels):
    np.random.seed(1)
    from sklearn.linear_model import SGDClassifier
    from sklearn.dummy import DummyClassifier
    from sklearn.metrics import accuracy_score
    dummy = DummyClassifier()
    dummy.fit(train_embeds, train_labels)
    log = SGDClassifier(loss="log", n_jobs=55)
    log.fit(train_embeds, train_labels)
    print("Test scores")
    print(accuracy_score(test_labels, log.predict(test_embeds)))
    print("Train scores")
    print(accuracy_score(train_labels, log.predict(train_embeds)))
    print("Random baseline")
    print(accuracy_score(test_labels, dummy.predict(test_embeds)))
开发者ID:hammadhaleem,项目名称:FastGCN,代码行数:15,代码来源:train_batch_multiRank_inductive_reddit_onelayer.py


示例20: main

def main(training_set, language, gold_standard, gazetteer, n_folds, n_jobs,
         scoring, output, test, word2vec_model, independent_lus):
    """ Searches for the best hyperparameters """

    logger.info('Searching for the best model and parameters')

    training_sets = get_training_sets(training_set, language, gazetteer, word2vec_model, independent_lus)
    models = get_models(test)

    search = MultimodelGridSearchCV(*models, cv=n_folds, n_jobs=n_jobs,
                                    scoring=Scorer(scoring, True))
    (x_tr, y_tr, best_training_meta), best_score, best_params, best_model = search.fit(training_sets)

    logger.info('Evaluation Results')
    logger.info('  Best model: %s', best_model.__class__.__name__)
    logger.info('  Score: %f', best_score)
    logger.info('  Parameters: %s', best_params)
    logger.info('  Gazetteer: %s', best_training_meta['gazetteer'])
    logger.info('  Extractor: %s', best_training_meta['extractor_cls'].__name__)
    logger.info('  Extractor args: %s', best_training_meta['extractor_args'])

    joblib.dump((best_model, best_training_meta), output)
    logger.info("Done, dumped model to '%s'", output)

    if not gold_standard:
        logger.info('Skipping gold standard evaluation')
        return

    logger.info('Evaluating on the gold standard')

    extractor = best_training_meta['extractor']
    gazetteer = best_training_meta['gazetteer']

    extractor.start()
    for row in gold_standard:
        data = json.loads(row)
        extractor.process_sentence(data['sentence'], data['lu'], data['fes'],
                                   add_unknown=False, gazetteer=gazetteer)
    x_gold, y_gold = extractor.get_features(refit=False)

    dummy = DummyClassifier(strategy='stratified')
    dummy.fit(x_tr, y_tr)

    logger.info('Dummy model has a weighted-averaged F1 on the gold standard of %.4f',
                Scorer(scoring, True)(dummy, x_gold, y_gold))

    logger.info('Best model has a weighted-averaged F1 on the gold standard of %.4f',
                Scorer(scoring, True)(best_model, x_gold, y_gold))
开发者ID:Wikidata,项目名称:StrepHit,代码行数:48,代码来源:model_selection.py



注:本文中的sklearn.dummy.DummyClassifier类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python dummy.DummyRegressor类代码示例发布时间:2022-05-27
下一篇:
Python discriminant_analysis.QuadraticDiscriminantAnalysis类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap