• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python model_selection.learning_curve函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.model_selection.learning_curve函数的典型用法代码示例。如果您正苦于以下问题:Python learning_curve函数的具体用法?Python learning_curve怎么用?Python learning_curve使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了learning_curve函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_learning_curve

def test_learning_curve():
    n_samples = 30
    n_splits = 3
    X, y = make_classification(n_samples=n_samples, n_features=1,
                               n_informative=1, n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(n_samples * ((n_splits - 1) / n_splits))
    for shuffle_train in [False, True]:
        with warnings.catch_warnings(record=True) as w:
            train_sizes, train_scores, test_scores = learning_curve(
                estimator, X, y, cv=KFold(n_splits=n_splits),
                train_sizes=np.linspace(0.1, 1.0, 10),
                shuffle=shuffle_train)
        if len(w) > 0:
            raise RuntimeError("Unexpected warning: %r" % w[0].message)
        assert_equal(train_scores.shape, (10, 3))
        assert_equal(test_scores.shape, (10, 3))
        assert_array_equal(train_sizes, np.linspace(2, 20, 10))
        assert_array_almost_equal(train_scores.mean(axis=1),
                                  np.linspace(1.9, 1.0, 10))
        assert_array_almost_equal(test_scores.mean(axis=1),
                                  np.linspace(0.1, 1.0, 10))

        # Test a custom cv splitter that can iterate only once
        with warnings.catch_warnings(record=True) as w:
            train_sizes2, train_scores2, test_scores2 = learning_curve(
                estimator, X, y,
                cv=OneTimeSplitter(n_splits=n_splits, n_samples=n_samples),
                train_sizes=np.linspace(0.1, 1.0, 10),
                shuffle=shuffle_train)
        if len(w) > 0:
            raise RuntimeError("Unexpected warning: %r" % w[0].message)
        assert_array_almost_equal(train_scores2, train_scores)
        assert_array_almost_equal(test_scores2, test_scores)
开发者ID:RomainBrault,项目名称:scikit-learn,代码行数:34,代码来源:test_validation.py


示例2: test_learning_curve_with_shuffle

def test_learning_curve_with_shuffle():
    # Following test case was designed this way to verify the code
    # changes made in pull request: #7506.
    X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [11, 12], [13, 14], [15, 16],
                 [17, 18], [19, 20], [7, 8], [9, 10], [11, 12], [13, 14],
                 [15, 16], [17, 18]])
    y = np.array([1, 1, 1, 2, 3, 4, 1, 1, 2, 3, 4, 1, 2, 3, 4])
    groups = np.array([1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 4, 4, 4, 4])
    # Splits on these groups fail without shuffle as the first iteration
    # of the learning curve doesn't contain label 4 in the training set.
    estimator = PassiveAggressiveClassifier(shuffle=False)

    cv = GroupKFold(n_splits=2)
    train_sizes_batch, train_scores_batch, test_scores_batch = learning_curve(
        estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3),
        groups=groups, shuffle=True, random_state=2)
    assert_array_almost_equal(train_scores_batch.mean(axis=1),
                              np.array([0.75, 0.3, 0.36111111]))
    assert_array_almost_equal(test_scores_batch.mean(axis=1),
                              np.array([0.36111111, 0.25, 0.25]))
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=cv, n_jobs=1,
                  train_sizes=np.linspace(0.3, 1.0, 3), groups=groups)

    train_sizes_inc, train_scores_inc, test_scores_inc = learning_curve(
        estimator, X, y, cv=cv, n_jobs=1, train_sizes=np.linspace(0.3, 1.0, 3),
        groups=groups, shuffle=True, random_state=2,
        exploit_incremental_learning=True)
    assert_array_almost_equal(train_scores_inc.mean(axis=1),
                              train_scores_batch.mean(axis=1))
    assert_array_almost_equal(test_scores_inc.mean(axis=1),
                              test_scores_batch.mean(axis=1))
开发者ID:RomainBrault,项目名称:scikit-learn,代码行数:31,代码来源:test_validation.py


示例3: plot_learning_curve

 def plot_learning_curve(self, estimator, title, X, y, ylim=None, cv=None,
                         n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5),
                         filename=None):
     
     plt.figure()
     plt.title(title)
     if ylim is not None:
         plt.ylim(*ylim)
     plt.xlabel("Training examples")
     plt.ylabel("Score")
     train_sizes, train_scores, test_scores = learning_curve(
         estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
     train_scores_mean = np.mean(train_scores, axis=1)
     train_scores_std = np.std(train_scores, axis=1)
     test_scores_mean = np.mean(test_scores, axis=1)
     test_scores_std = np.std(test_scores, axis=1)
     plt.grid()
 
     plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                      train_scores_mean + train_scores_std, alpha=0.1,
                      color="r")
     plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                      test_scores_mean + test_scores_std, alpha=0.1, color="g")
     plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
              label="Training score")
     plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
              label="Cross-validation score")
 
     plt.legend(loc="best")
     
     if filename != None:
         plt.savefig(filename)
     return plt
开发者ID:rbaxter1,项目名称:CS7641,代码行数:33,代码来源:plot_helper.py


示例4: plot

def plot(estimator, title, X, y, ylim=None, cv=None,
         n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)):
    plt.figure()
    plt.title(title)
    if ylim is not None: plt.ylim(*ylim)
    plt.xlabel(u'Veri nokta sayısı')
    plt.ylabel(u"Hata")
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
    train_scores = 1.0-train_scores
    test_scores = 1.0-test_scores
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    plt.grid()

    plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.1,
                     color="r")
    plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.1, color="g")
    plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
             label=u'Eğitim hatası')
    plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
             label=u'Test hatası')
    plt.legend(loc="best")
    return plt
开发者ID:burakbayramli,项目名称:classnotes,代码行数:28,代码来源:lcurve.py


示例5: plot_learning_curve

def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=1, train_sizes=np.linspace(.05, 1., 20), verbose=0, plot=True):
    # 画出 data 在某模型上的 learning curve.
    # estimator: 你用的分类器
    # title: 表格的标题
    # X: 输入的 feature, numpy 类型
    # y: 输入的 target vector
    # ylim: tuple 格式的(ymin, ymax), 设定图像中纵坐标的最低点和最高点
    # cv: 做 cross-validation 的时候, 数据分成的份数, 其中一份作为 cv 集, 其余 n-1 份作为 training(默认为 3 份)
    # n_jobs: 并行的的任务数(默认 1)
    train_sizes, train_scores, test_scores = learning_curve(estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes, verbose=verbose)
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    if plot:
        plt.figure()
        plt.title(title)
        if ylim is not None: plt.ylim(*ylim)
        plt.xlabel("训练样本数")
        plt.ylabel("得分")
        plt.gca().invert_yaxis()
        plt.grid()
        plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="b")
        plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="r")
        plt.plot(train_sizes, train_scores_mean, 'o-', color="b", label="训练集上得分")
        plt.plot(train_sizes, test_scores_mean, 'o-', color="r", label="交叉验证集上得分")
        plt.legend(loc="best")
        plt.draw()
        plt.show()
        plt.gca().invert_yaxis()
    midpoint = ((train_scores_mean[-1] + train_scores_std[-1]) + (test_scores_mean[-1] - test_scores_std[-1])) / 2
    diff = (train_scores_mean[-1] + train_scores_std[-1]) - (test_scores_mean[-1] - test_scores_std[-1])
    return midpoint, diff
开发者ID:coder352,项目名称:shellscript,代码行数:33,代码来源:l6_Kaggle-Titanic_LogisticRegression_二分类-多属性间关系分析-缺失值处理-特征工程-模型融合.py


示例6: plot_learning_curve

def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)):
    """
    Generate a simple plot of the test and traning learning curve. Taken
    from sklearn website.

    Parameters
    ----------
    estimator : object type that implements the "fit" and "predict" methods
        An object of that type which is cloned for each validation.

    title : string
        Title for the chart.

    X : array-like, shape (n_samples, n_features)
        Training vector, where n_samples is the number of samples and
        n_features is the number of features.

    y : array-like, shape (n_samples) or (n_samples, n_features), optional
        Target relative to X for classification or regression;
        None for unsupervised learning.

    ylim : tuple, shape (ymin, ymax), optional
        Defines minimum and maximum yvalues plotted.

    cv : integer, cross-validation generator, optional
        If an integer is passed, it is the number of folds (defaults to 3).
        Specific cross-validation objects can be passed, see
        sklearn.cross_validation module for the list of possible objects

    train_sizes : sizes to test over.

    n_jobs : integer, optional
        Number of jobs to run in parallel (default 1).
    """
    plt.figure()
    plt.title(title)
    if ylim is not None:
        plt.ylim(*ylim)
    plt.xlabel("Training examples")
    plt.ylabel("Score")
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    plt.grid()

    plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.1,
                     color="r")
    plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.1, color="g")
    plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
             label="Training score")
    plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
             label="Cross-validation score")

    plt.legend(loc="best")
    plt.show()
开发者ID:daniaki,项目名称:ppi_wrangler,代码行数:60,代码来源:plotting.py


示例7: plot_learning_curve

 def plot_learning_curve(self):
     # Plot the learning curve
     plt.figure(figsize=(9, 6))
     train_sizes, train_scores, test_scores = learning_curve(
         self.model, X=self.X_train, y=self.y_train,
         cv=3, scoring='neg_mean_squared_error')
     self.plot_learning_curve_helper(train_sizes, train_scores, test_scores, 'Learning Curve')
     plt.show()
开发者ID:wblx1024,项目名称:NLP-JD,代码行数:8,代码来源:train.py


示例8: test_learning_curve

    def test_learning_curve(self):
        digits = datasets.load_digits()
        df = pdml.ModelFrame(digits)

        result = df.learning_curve.learning_curve(df.naive_bayes.GaussianNB())
        expected = ms.learning_curve(nb.GaussianNB(), digits.data, digits.target)

        self.assertEqual(len(result), 3)
        self.assert_numpy_array_almost_equal(result[0], expected[0])
        self.assert_numpy_array_almost_equal(result[1], expected[1])
        self.assert_numpy_array_almost_equal(result[2], expected[2])
开发者ID:sinhrks,项目名称:pandas-ml,代码行数:11,代码来源:test_model_selection.py


示例9: plot_learning_curve

def plot_learning_curve(est, X, y):
    training_set_size, train_scores, test_scores = learning_curve(
        est, X, y, train_sizes=np.linspace(.1, 1, 20), cv=KFold(20, shuffle=True, random_state=1))
    estimator_name = est.__class__.__name__
    line = plt.plot(training_set_size, train_scores.mean(axis=1), '--',
                    label="training " + estimator_name)
    plt.plot(training_set_size, test_scores.mean(axis=1), '-',
             label="test " + estimator_name, c=line[0].get_color())
    plt.xlabel('Training set size')
    plt.ylabel('Score (R^2)')
    plt.ylim(0, 1.1)
开发者ID:ABcDexter,项目名称:introduction_to_ml_with_python,代码行数:11,代码来源:plot_ridge.py


示例10: test_learning_curve_unsupervised

def test_learning_curve_unsupervised():
    X, _ = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y=None, cv=3, train_sizes=np.linspace(0.1, 1.0, 10))
    assert_array_equal(train_sizes, np.linspace(2, 20, 10))
    assert_array_almost_equal(train_scores.mean(axis=1),
                              np.linspace(1.9, 1.0, 10))
    assert_array_almost_equal(test_scores.mean(axis=1),
                              np.linspace(0.1, 1.0, 10))
开发者ID:447327642,项目名称:scikit-learn,代码行数:12,代码来源:test_validation.py


示例11: learning_curve

 def learning_curve(self, graphs, targets,
                    cv=5, n_steps=10, start_fraction=0.1):
     """learning_curve."""
     graphs, targets = paired_shuffle(graphs, targets)
     x = self.transform(graphs)
     train_sizes = np.linspace(start_fraction, 1.0, n_steps)
     scoring = 'roc_auc'
     train_sizes, train_scores, test_scores = learning_curve(
         self.model, x, targets,
         cv=cv, train_sizes=train_sizes,
         scoring=scoring)
     return train_sizes, train_scores, test_scores
开发者ID:fabriziocosta,项目名称:EDeN,代码行数:12,代码来源:estimator.py


示例12: test_learning_curve_batch_and_incremental_learning_are_equal

def test_learning_curve_batch_and_incremental_learning_are_equal():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    train_sizes = np.linspace(0.2, 1.0, 5)
    estimator = PassiveAggressiveClassifier(n_iter=1, shuffle=False)

    train_sizes_inc, train_scores_inc, test_scores_inc = \
        learning_curve(
            estimator, X, y, train_sizes=train_sizes,
            cv=3, exploit_incremental_learning=True)
    train_sizes_batch, train_scores_batch, test_scores_batch = \
        learning_curve(
            estimator, X, y, cv=3, train_sizes=train_sizes,
            exploit_incremental_learning=False)

    assert_array_equal(train_sizes_inc, train_sizes_batch)
    assert_array_almost_equal(train_scores_inc.mean(axis=1),
                              train_scores_batch.mean(axis=1))
    assert_array_almost_equal(test_scores_inc.mean(axis=1),
                              test_scores_batch.mean(axis=1))
开发者ID:447327642,项目名称:scikit-learn,代码行数:21,代码来源:test_validation.py


示例13: main

def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("data_dir")
    parser.add_argument('--method','-m',type=int,default=0,choices=range(5),
        help=
        """chose methods from:
                0:linear_svc
                1:logistic regression
                2:naive bayes
                3:decision  tree
                4:ExtraTreesClassifier
        """)
    args= parser.parse_args()

    silent_feature_vector,threshold_feature_vector,threshold_vector,silent_classification_vector\
        = load_data_set(args.data_dir)
    regr = linear_model.LinearRegression()
    clf = get_classifier(args.method)
    
    #regr_train_sizes = gene_train_sizes(len(threshold_feature_vector))        
    #clf_train_sizes = gene_train_sizes(len(silent_feature_vector)) 
    regr_train_sizes = [0.3,0.6,1.0]
    clf_train_sizes = [0.3,0.6,1.0]

    print "cross validation:"
    regr_train_sizes, regr_train_scores, regr_valid_scores =\
        learning_curve(regr, threshold_feature_vector, threshold_vector, train_sizes=regr_train_sizes, cv=5)
    
    clf_train_sizes, clf_train_scores, clf_valid_scores =\
        learning_curve(clf, silent_feature_vector, silent_classification_vector, train_sizes=clf_train_sizes, cv=5)

    print "Thresholding:"
    print regr_train_scores
    print regr_valid_scores

    print "-"*20

    print "Classification:"
    print clf_train_scores
    print clf_valid_scores
开发者ID:lukuang,项目名称:2016-rts,代码行数:40,代码来源:plt_learning_curve.py


示例14: test_learning_curve_with_boolean_indices

def test_learning_curve_with_boolean_indices():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)
    cv = KFold(n_folds=3)
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10))
    assert_array_equal(train_sizes, np.linspace(2, 20, 10))
    assert_array_almost_equal(train_scores.mean(axis=1),
                              np.linspace(1.9, 1.0, 10))
    assert_array_almost_equal(test_scores.mean(axis=1),
                              np.linspace(0.1, 1.0, 10))
开发者ID:447327642,项目名称:scikit-learn,代码行数:13,代码来源:test_validation.py


示例15: ModelLearning

def ModelLearning(X, y):
    """ Calculates the performance of several models with varying sizes of training data.
        The learning and validation scores for each model are then plotted. """
    
    # Create 10 cross-validation sets for training and testing
    cv = ShuffleSplit(n_splits = 10, test_size = 0.2, random_state = 0)


    # Generate the training set sizes increasing by 50
    train_sizes = np.rint(np.linspace(1, X.shape[0]*0.8 - 1, 9)).astype(int)

    # Create the figure window
    fig = pl.figure(figsize=(10,7))

    # Create three different models based on max_depth
    for k, depth in enumerate([1,3,6,10]):
        
        # Create a Decision tree regressor at max_depth = depth
        regressor = DecisionTreeRegressor(max_depth = depth)

        # Calculate the training and testing scores
        sizes, train_scores, valid_scores = learning_curve(regressor, X, y, \
            cv = cv, train_sizes = train_sizes, scoring = 'r2')
        
        # Find the mean and standard deviation for smoothing
        train_std = np.std(train_scores, axis = 1)
        train_mean = np.mean(train_scores, axis = 1)
        valid_std = np.std(valid_scores, axis = 1)
        valid_mean = np.mean(valid_scores, axis = 1)

        # Subplot the learning curve 
        ax = fig.add_subplot(2, 2, k+1)
        ax.plot(sizes, train_mean, 'o-', color = 'r', label = 'Training Score')
        ax.plot(sizes, valid_mean, 'o-', color = 'g', label = 'Validation Score')
        ax.fill_between(sizes, train_mean - train_std, \
            train_mean + train_std, alpha = 0.15, color = 'r')
        ax.fill_between(sizes, valid_mean - valid_std, \
            valid_mean + valid_std, alpha = 0.15, color = 'g')
        
        # Labels
        ax.set_title('max_depth = %s'%(depth))
        ax.set_xlabel('Number of Training Points')
        ax.set_ylabel('r2_score')
        ax.set_xlim([0, X.shape[0]*0.8])
        ax.set_ylim([-0.05, 1.05])
    
    # Visual aesthetics
    ax.legend(bbox_to_anchor=(1.05, 2.05), loc='lower left', borderaxespad = 0.)
    fig.suptitle('Decision Tree Regressor Learning Performances', fontsize = 16, y = 1.03)
    fig.tight_layout()
    fig.show()
开发者ID:lssxfy123,项目名称:PythonStudy,代码行数:51,代码来源:visuals.py


示例16: test_learning_curve_incremental_learning

def test_learning_curve_incremental_learning():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockIncrementalImprovingEstimator(20)
    for shuffle_train in [False, True]:
        train_sizes, train_scores, test_scores = learning_curve(
            estimator, X, y, cv=3, exploit_incremental_learning=True,
            train_sizes=np.linspace(0.1, 1.0, 10), shuffle=shuffle_train)
        assert_array_equal(train_sizes, np.linspace(2, 20, 10))
        assert_array_almost_equal(train_scores.mean(axis=1),
                                  np.linspace(1.9, 1.0, 10))
        assert_array_almost_equal(test_scores.mean(axis=1),
                                  np.linspace(0.1, 1.0, 10))
开发者ID:RomainBrault,项目名称:scikit-learn,代码行数:14,代码来源:test_validation.py


示例17: __calc_learning_curve

    def __calc_learning_curve(self, algorithm):
        estimator = algorithm.estimator
        train_sizes, train_scores, test_scores = learning_curve(
            estimator,
            self.data.X,
            self.data.y,
            cv=self.cv,
            scoring=self.scoring,
            n_jobs=self.n_jobs)  # parallel run in cross validation
        train_scores_mean = np.mean(train_scores, axis=1)
        test_scores_mean = np.mean(test_scores, axis=1)

        return {'x': train_sizes, 'y_train': train_scores_mean,
                'y_cv': test_scores_mean}
开发者ID:canard0328,项目名称:malss,代码行数:14,代码来源:malss.py


示例18: test_learning_curve_implementation

def test_learning_curve_implementation():
    """
    Test to ensure that the learning curve results match scikit-learn
    """

    # This test is different from the other tests which just use regression data.
    # The reason is that we want this test to fail in case our implementation
    # diverges from the scikit-learn implementation. This test essentially
    # serves as a regression test as well.

    # Load in the digits data set
    digits = load_digits()
    X, y = digits.data, digits.target

    # get the learning curve results from scikit-learn for this data
    cv_folds = 10
    random_state = 123456789
    cv = ShuffleSplit(n_splits=cv_folds, test_size=0.2, random_state=random_state)
    estimator = MultinomialNB()
    train_sizes = np.linspace(.1, 1.0, 5)
    train_sizes1, train_scores1, test_scores1 = learning_curve(estimator,
                                                               X,
                                                               y,
                                                               cv=cv,
                                                               train_sizes=train_sizes,
                                                               scoring='accuracy')

    # get the features from this data into a FeatureSet instance we can use
    # with the SKLL API
    feature_names = ['f{:02}'.format(n) for n in range(X.shape[1])]
    features = []
    for row in X:
        features.append(dict(zip(feature_names, row)))
    fs = FeatureSet('train', features=features, labels=y, ids=list(range(X.shape[0])))

    # we don't want to filter out any features since scikit-learn
    # does not do that either
    learner = Learner('MultinomialNB', min_feature_count=0)
    (train_scores2,
     test_scores2,
     train_sizes2) = learner.learning_curve(fs,
                                            cv_folds=cv_folds,
                                            train_sizes=train_sizes,
                                            metric='accuracy')

    assert np.all(train_sizes1 == train_sizes2)
    assert np.allclose(train_scores1, train_scores2)
    assert np.allclose(test_scores1, test_scores2)
开发者ID:EducationalTestingService,项目名称:skll,代码行数:48,代码来源:test_output.py


示例19: plot_learning_curve

def plot_learning_curve(mod, X, y, cv, n_jobs, title, ax=None, invert=True):
    '''
    Generates a simple plot of test & training learning curves.
    Inspired from https://github.com/cs109/a-2017/blob/master/Sections/Standard/section_9_student.ipynb
    and from lecture/section.
    
    Inputs:
    -----------------------------------------------------------------
     mod: model for which learning curve must be plotted
     X: predictor data 
     y: true labels
     cv: number cross validation iterations
     n_jobs: number of cores (-1 for all available)
     ax: optional matplotlib Axes object on which to plot
    
    Outputs:
    -----------------------------------------------------------------
     None: plotted learning curves
    '''
    plt.style.use('seaborn-whitegrid')
    
    train_sizes, train_scores, test_scores = learning_curve(mod, X=X, y=y_train.values.ravel(), cv=20, n_jobs=-1)

    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    
    if ax == None: fig, ax = plt.subplots(figsize=(12, 7))
    if invert: ax.invert_yaxis()
        
    ax.plot(train_sizes, train_scores_mean, 'o-', color='r', label='training score')
    ax.plot(train_sizes, test_scores_mean, 'o-', color='g', label='test score')
    ax.set_xlabel('Training Examples')
    ax.set_ylabel('Score')
    ax.set_title(title)
    ax.grid(alpha=0.5)
    sns.despine(bottom=True, left=True)
    ax.legend(loc='best')
    ax.fill_between(train_sizes, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.1,
                     color="r")
    ax.fill_between(train_sizes, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.1, color="g")
    
    return None
开发者ID:paulmattheww,项目名称:Original-Projects,代码行数:46,代码来源:plot_learning_curve.py


示例20: test_learning_curve_verbose

def test_learning_curve_verbose():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        train_sizes, train_scores, test_scores = \
            learning_curve(estimator, X, y, cv=3, verbose=1)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    assert("[learning_curve]" in out)
开发者ID:447327642,项目名称:scikit-learn,代码行数:17,代码来源:test_validation.py



注:本文中的sklearn.model_selection.learning_curve函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python model_selection.train_test_split函数代码示例发布时间:2022-05-27
下一篇:
Python model_selection.cross_val_score函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap