• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python experiments.run_configuration函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中skll.experiments.run_configuration函数的典型用法代码示例。如果您正苦于以下问题:Python run_configuration函数的具体用法?Python run_configuration怎么用?Python run_configuration使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了run_configuration函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_folds_file_logging_num_folds

def test_folds_file_logging_num_folds():
    """
    Test when using `folds_file`, log shows number of folds and appropriate warning.
    """
    # Run experiment
    suffix = '.jsonlines'
    train_path = join(_my_dir, 'train', 'f0{}'.format(suffix))

    config_path = fill_in_config_paths_for_single_file(join(_my_dir,
                                                            "configs",
                                                            "test_folds_file"
                                                            ".template.cfg"),
                                                       train_path,
                                                       None)
    run_configuration(config_path, quiet=True)

    # Check experiment log output
    with open(join(_my_dir,
                   'output',
                   'test_folds_file_logging.log')) as f:
        cv_file_pattern = re.compile('Specifying "folds_file" overrides both explicit and default "num_cv_folds".')
        matches = re.findall(cv_file_pattern, f.read())
        assert_equal(len(matches), 1)

    # Check job log output
    with open(join(_my_dir,
                   'output',
                   'test_folds_file_logging_train_f0.'
                   'jsonlines_LogisticRegression.log')) as f:
        cv_folds_pattern = re.compile("(Task: cross_validate\n)(.+)(Cross-validating \([0-9]+ folds\))")
        matches = re.findall(cv_folds_pattern, f.read())
        assert_equal(len(matches), 1)
开发者ID:EducationalTestingService,项目名称:skll,代码行数:32,代码来源:test_cv.py


示例2: check_specified_cv_folds

def check_specified_cv_folds(numeric_ids):
    make_cv_folds_data(numeric_ids)

    # test_cv_folds1.cfg has prespecified folds and should have ~50% accuracy
    # test_cv_folds2.cfg doesn't have prespecified folds and >95% accuracy
    for experiment_name, test_func, grid_size in [('test_cv_folds1',
                                                   lambda x: x < 0.6,
                                                   3),
                                                  ('test_cv_folds2',
                                                   lambda x: x > 0.95,
                                                   10)]:
        config_template_file = '{}.template.cfg'.format(experiment_name)
        config_template_path = os.path.join(_my_dir, 'configs',
                                            config_template_file)
        config_path = os.path.join(_my_dir,
                                   fill_in_config_paths(config_template_path))

        # Modify config file to change ids_to_floats depending on numeric_ids
        # setting
        with open(config_path, 'r+') as config_template_file:
            lines = config_template_file.readlines()
            config_template_file.seek(0)
            config_template_file.truncate()
            for line in lines:
                if line.startswith('ids_to_floats='):
                    if numeric_ids:
                        line = 'ids_to_floats=true\n'
                    else:
                        line = 'ids_to_floats=false\n'
                config_template_file.write(line)

        run_configuration(config_path, quiet=True)
        result_filename = ('{}_test_cv_folds_LogisticRegression.' +
                           'results').format(experiment_name)
        with open(os.path.join(_my_dir, 'output', result_filename)) as f:
            # check held out scores
            outstr = f.read()
            score = float(SCORE_OUTPUT_RE.search(outstr).groups()[-1])
            assert test_func(score)

            grid_score_matches = GRID_RE.findall(outstr)
            assert len(grid_score_matches) == grid_size
            for match_str in grid_score_matches:
                assert test_func(float(match_str))

    # try the same tests for just training (and specifying the folds for the
    # grid search)
    dirpath = os.path.join(_my_dir, 'train')
    suffix = '.jsonlines'
    featureset = ['test_cv_folds']
    examples = _load_featureset(dirpath, featureset, suffix, quiet=True)
    clf = Learner('LogisticRegression', probability=True)
    cv_folds = _load_cv_folds(os.path.join(_my_dir, 'train',
                                           'test_cv_folds.csv'))
    grid_search_score = clf.train(examples, grid_search_folds=cv_folds,
                                  grid_objective='accuracy', grid_jobs=1)
    assert grid_search_score < 0.6
    grid_search_score = clf.train(examples, grid_search_folds=5,
                                  grid_objective='accuracy', grid_jobs=1)
    assert grid_search_score > 0.95
开发者ID:wavelets,项目名称:skll,代码行数:60,代码来源:test_skll.py


示例3: test_scaling

def test_scaling():
    '''
    Test to validate whether feature scaling works
    '''
    make_scaling_data()

    # run the experiment without scaling
    config_template_path = os.path.join(_my_dir, 'configs', 'test_scaling_without.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    # now run the version with scaling
    config_template_path = os.path.join(_my_dir, 'configs', 'test_scaling_with.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    # make sure that the result with and without scaling aren't the same
    with open(os.path.join(_my_dir, 'output', 'without_scaling_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        row = list(reader)[0]
        without_scaling_score = row['score']
        without_scaling_scaling_value = row['feature_scaling']

    with open(os.path.join(_my_dir, 'output', 'with_scaling_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        row = list(reader)[0]
        with_scaling_score = row['score']
        with_scaling_scaling_value = row['feature_scaling']

    assert_not_equal(without_scaling_score, with_scaling_score)
    eq_(without_scaling_scaling_value, 'none')
    eq_(with_scaling_scaling_value, 'both')
开发者ID:wavelets,项目名称:skll,代码行数:34,代码来源:test_skll.py


示例4: test_regression1

def test_regression1():
    '''
    This is a bit of a contrived test, but it should fail
    if anything drastic happens to the regression code.
    '''

    y = make_regression_data()

    config_template_path = os.path.join(_my_dir, 'configs', 'test_regression1.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    config_template_path = "test_regression1.cfg"

    run_configuration(os.path.join(_my_dir, config_path), quiet=True)

    with open(os.path.join(_my_dir, 'output', 'test_regression1_test_regression1_RescaledRidge.results')) as f:
        # check held out scores
        outstr = f.read()
        score = float(SCORE_OUTPUT_RE.search(outstr).groups()[-1])
        assert score > 0.7

    with open(os.path.join(_my_dir, 'output', 'test_regression1_test_regression1_RescaledRidge.predictions'), 'r') as f:
        reader = csv.reader(f, dialect='excel-tab')
        next(reader)
        pred = [float(row[1]) for row in reader]

        assert np.min(pred) >= np.min(y)
        assert np.max(pred) <= np.max(y)

        assert abs(np.mean(pred) - np.mean(y)) < 0.1
        assert abs(np.std(pred) - np.std(y)) < 0.1
开发者ID:wavelets,项目名称:skll,代码行数:31,代码来源:test_skll.py


示例5: test_ablation_cv_feature_hasher

def test_ablation_cv_feature_hasher():
    """
    Test if ablation works with cross-validate and feature_hasher
    """
    make_ablation_data()

    config_template_path = join(_my_dir, 'configs',
                                'test_ablation_feature_hasher.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True, ablation=1)

    # read in the summary file and make sure it has
    # 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154
    # lines
    with open(join(_my_dir, 'output',
                   'ablation_cv_feature_hasher_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        num_rows = check_ablation_rows(reader)
        eq_(num_rows, 154)

    # make sure there are 6 ablated featuresets * 2 learners = 12 results files
    num_result_files = len(glob.glob(join(_my_dir, 'output',
                                          ('ablation_cv_feature_hasher_'
                                           '*.results'))))
    eq_(num_result_files, 14)
开发者ID:BK-University,项目名称:skll,代码行数:26,代码来源:test_ablation.py


示例6: test_learning_curve_output

def test_learning_curve_output():
    """
    Test learning curve output for experiment with metrics option
    """

    # Test to validate learning curve output
    make_learning_curve_data()

    config_template_path = join(_my_dir, 'configs', 'test_learning_curve.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    # run the learning curve experiment
    run_configuration(config_path, quiet=True)
    outprefix = 'test_learning_curve'

    # make sure that the TSV file is created with the right columns
    output_tsv_path = join(_my_dir, 'output', '{}_summary.tsv'.format(outprefix))
    ok_(exists(output_tsv_path))
    with open(output_tsv_path, 'r') as tsvf:
        r = csv.reader(tsvf, dialect=csv.excel_tab)
        header = next(r)
        # make sure we have the expected number of columns
        eq_(len(header), 11)
        num_rows = len(list(r))
        # we should have 2 featuresets x 3 learners x 2 objectives x 5 (default)
        # training sizes = 60 rows
        eq_(num_rows, 60)

    # make sure that the two PNG files (one per featureset) are created
    # if the requirements are satisfied
    if _HAVE_SEABORN:
        for featureset_name in ["test_learning_curve1", "test_learning_curve2"]:
            ok_(exists(join(_my_dir,
                            'output',
                            '{}_{}.png'.format(outprefix, featureset_name))))
开发者ID:EducationalTestingService,项目名称:skll,代码行数:35,代码来源:test_output.py


示例7: test_train_file_test_file_ablation

def test_train_file_test_file_ablation():
    """
    Test that specifying ablation with train and test file is ignored
    """
    # Create data files
    make_single_file_featureset_data()

    # Run experiment
    config_path = fill_in_config_paths_for_single_file(join(_my_dir, "configs",
                                                            "test_single_file"
                                                            ".template.cfg"),
                                                       join(_my_dir, 'train',
                                                            'train_single_file'
                                                            '.jsonlines'),
                                                       join(_my_dir, 'test',
                                                            'test_single_file.'
                                                            'jsonlines'))
    run_configuration(config_path, quiet=True, ablation=None)

    # check that we see the message that ablation was ignored in the experiment log
    # Check experiment log output
    with open(join(_my_dir,
                   'output',
                   'train_test_single_file.log')) as f:
        cv_file_pattern = re.compile('Not enough featuresets for ablation. Ignoring.')
        matches = re.findall(cv_file_pattern, f.read())
        eq_(len(matches), 1)
开发者ID:EducationalTestingService,项目名称:skll,代码行数:27,代码来源:test_classification.py


示例8: test_ablation_cv_feature_hasher_all_combos_sampler

def test_ablation_cv_feature_hasher_all_combos_sampler():
    """
    Test to validate whether ablation works with cross-validate
    and feature_hasher
    """
    make_ablation_data()

    config_template_path = join(_my_dir, 'configs', ('test_ablation_feature_'
                                                     'hasher_sampler.template'
                                                     '.cfg'))
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True, ablation=None)

    # read in the summary file and make sure it has
    # 10 ablated featuresets * (10 folds + 1 average line) * 2 learners = 220
    # lines
    with open(join(_my_dir, 'output',
                   'ablation_cv_feature_hasher_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        num_rows = check_ablation_rows(reader)
        eq_(num_rows, 220)

    # make sure there are 10 ablated featuresets * 2 learners = 20 results
    # files
    num_result_files = len(glob.glob(join(_my_dir, 'output',
                                          ('ablation_cv_feature_hasher_'
                                           '*results'))))
    eq_(num_result_files, 20)
开发者ID:BK-University,项目名称:skll,代码行数:29,代码来源:test_ablation.py


示例9: test_predict_on_subset_with_existing_model

def test_predict_on_subset_with_existing_model():
    """
    Test generating predictions on subset with existing model
    """
    # Create data files
    make_single_file_featureset_data()

    # train and save a model on the training file
    train_fs = NDJReader.for_path(join(_my_dir, 'train', 'train_single_file.jsonlines')).read()
    learner = Learner('RandomForestClassifier')
    learner.train(train_fs, grid_search=True, grid_objective="accuracy")
    model_filename = join(_my_dir, 'output', ('train_test_single_file_train_train_'
                                              'single_file.jsonlines_test_test_single'
                                              '_file_subset.jsonlines_RandomForestClassifier'
                                              '.model'))

    learner.save(model_filename)

    # Run experiment
    config_path = fill_in_config_paths_for_single_file(join(_my_dir, "configs",
                                                            "test_single_file_saved_subset"
                                                            ".template.cfg"),
                                                       join(_my_dir, 'train', 'train_single_file.jsonlines'),
                                                       join(_my_dir, 'test',
                                                            'test_single_file_subset.'
                                                            'jsonlines'))
    run_configuration(config_path, quiet=True, overwrite=False)

    # Check results
    with open(join(_my_dir, 'output', ('train_test_single_file_train_train_'
                                       'single_file.jsonlines_test_test_single'
                                       '_file_subset.jsonlines_RandomForestClassifier'
                                       '.results.json'))) as f:
        result_dict = json.load(f)[0]
    assert_almost_equal(result_dict['accuracy'], 0.7333333)
开发者ID:EducationalTestingService,项目名称:skll,代码行数:35,代码来源:test_classification.py


示例10: test_ablation_cv_feature_hasher_all_combos

def test_ablation_cv_feature_hasher_all_combos():
    """
    Test ablation all-combos + cross-validation + feature hashing
    """

    config_template_path = join(_my_dir,
                                'configs',
                                'test_ablation_feature_hasher_all_combos.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True, ablation=None)

    # read in the summary file and make sure it has
    #    10 ablated featuresets
    #      * (10 folds + 1 average line)
    #      * 2 learners
    #    = 220 lines in total
    with open(join(_my_dir,
                   'output',
                   'ablation_cv_feature_hasher_all_combos_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        num_rows = check_ablation_rows(reader)
        eq_(num_rows, 220)

    # make sure there are 10 ablated featuresets * 2 learners = 20 results
    # files
    num_result_files = len(glob(join(_my_dir,
                                     'output',
                                     'ablation_cv_feature_hasher_all_combos*.results')))
    eq_(num_result_files, 20)
开发者ID:EducationalTestingService,项目名称:skll,代码行数:30,代码来源:test_ablation.py


示例11: test_train_file_test_file

def test_train_file_test_file():
    """
    Test that train_file and test_file experiments work
    """
    # Create data files
    make_single_file_featureset_data()

    # Run experiment
    config_path = fill_in_config_paths_for_single_file(join(_my_dir, "configs",
                                                            "test_single_file"
                                                            ".template.cfg"),
                                                       join(_my_dir, 'train',
                                                            'train_single_file'
                                                            '.jsonlines'),
                                                       join(_my_dir, 'test',
                                                            'test_single_file.'
                                                            'jsonlines'))
    run_configuration(config_path, quiet=True)

    # Check results
    with open(join(_my_dir, 'output', ('train_test_single_file_train_train_'
                                       'single_file.jsonlines_test_test_single'
                                       '_file.jsonlines_RandomForestClassifier'
                                       '.results.json'))) as f:
        result_dict = json.load(f)[0]

    assert_almost_equal(result_dict['score'], 0.925)
开发者ID:MechCoder,项目名称:skll,代码行数:27,代码来源:test_classification.py


示例12: test_custom_learner_model_loading

def test_custom_learner_model_loading():
    num_labels = 10

    class_weights = [(0.5 / (num_labels - 1))
                     for x in range(num_labels - 1)] + [0.5]
    train_fs, test_fs = make_classification_data(num_examples=600,
                                                 train_test_ratio=0.8,
                                                 num_labels=num_labels,
                                                 num_features=5,
                                                 non_negative=True,
                                                 class_weights=class_weights)

    # Write training feature set to a file
    train_path = join(_my_dir, 'train',
                      'test_model_custom_learner.jsonlines')
    writer = NDJWriter(train_path, train_fs)
    writer.write()

    # Write test feature set to a file
    test_path = join(_my_dir, 'test',
                     'test_model_custom_learner.jsonlines')
    writer = NDJWriter(test_path, test_fs)
    writer.write()

    # run the configuration that trains the custom model and saves it
    cfgfile = 'test_model_save_custom_learner.template.cfg'
    config_template_path = join(_my_dir, 'configs', cfgfile)
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    # save the predictions from disk into memory
    # and delete the predictions file
    outprefix = 'test_model_custom_learner'
    pred_file = join(_my_dir, 'output',
                     '{}_{}_CustomLogisticRegressionWrapper'
                     '.predictions'.format(outprefix,
                                           outprefix))
    preds1 = read_predictions(pred_file)
    os.unlink(pred_file)

    # run the configuration that loads the saved model
    # and generates the predictions again
    cfgfile = 'test_model_load_custom_learner.template.cfg'
    config_template_path = join(_my_dir, 'configs', cfgfile)
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, overwrite=False, quiet=True)

    # load the newly generated predictions
    preds2 = read_predictions(pred_file)

    # make sure that they are the same as before
    assert_array_equal(preds1, preds2)
开发者ID:BK-University,项目名称:skll,代码行数:54,代码来源:test_custom_learner.py


示例13: train_rst_parsing_model

def train_rst_parsing_model(working_path, model_path, parameter_settings):
    '''
    parameter_settings is a dict of scikit-learn hyperparameter settings
    '''

    C_value = parameter_settings['C']
    working_subdir = os.path.join(working_path, 'C{}'.format(C_value))
    assert not os.path.exists(working_subdir)
    os.makedirs(working_subdir)

    if not os.path.exists(model_path):
        os.makedirs(model_path)

    learner_name = 'LogisticRegression'
    fixed_parameters = [{'random_state': 123456789, 'penalty': 'l1',
                         'C': C_value}]

    # Make the SKLL config file.
    cfg_dict = {"General": {"task": "train",
                            "experiment_name": "rst_parsing"},
                "Input": {"train_location": working_path,
                          "ids_to_floats": "False",
                          "featuresets": json.dumps([["rst_parsing"]]),
                          "featureset_names": json.dumps(["all_feats"]),
                          "suffix": '.jsonlines',
                          "fixed_parameters": json.dumps(fixed_parameters),
                          "learners": json.dumps([learner_name])},
                "Tuning": {"feature_scaling": "none",
                           "grid_search": "False",
                           "min_feature_count": "1"},
                "Output": {"probability": "True",
                           "models": model_path,
                           "log": working_subdir}
               }

    # write config file
    cfg_path = os.path.join(working_subdir, 'rst_parsing.cfg')
    cfg = ConfigParser()
    for section_name, section_dict in list(cfg_dict.items()):
        cfg.add_section(section_name)
        for key, val in section_dict.items():
            cfg.set(section_name, key, val)

    assert not os.path.exists(cfg_path)
    with open(cfg_path, 'w') as config_file:
        cfg.write(config_file)

    # run SKLL
    run_configuration(cfg_path)

    # make the model smaller/faster
    minimize_model(model_path,
                   'rst_parsing_all_feats_LogisticRegression.model')
开发者ID:BinbinBian,项目名称:discourse-parsing,代码行数:53,代码来源:tune_rst_parser.py


示例14: test_class_map

def test_class_map():
    make_class_map_data()

    config_template_path = os.path.join(_my_dir, 'configs', 'test_class_map.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    with open(os.path.join(_my_dir, 'output', 'test_class_map_test_class_map_LogisticRegression.results')) as f:
        outstr = f.read()
        logistic_result_score = float(SCORE_OUTPUT_RE.search(outstr).groups()[0])

    assert_almost_equal(logistic_result_score, 0.5)
开发者ID:wavelets,项目名称:skll,代码行数:13,代码来源:test_skll.py


示例15: test_sparse_predict

def test_sparse_predict():
    '''
    Test to validate whether predict works with sparse data
    '''
    make_sparse_data()

    config_template_path = os.path.join(_my_dir, 'configs', 'test_sparse.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    with open(os.path.join(_my_dir, 'output', 'test_sparse_test_sparse_LogisticRegression.results')) as f:
        outstr = f.read()
        logistic_result_score = float(SCORE_OUTPUT_RE.search(outstr).groups()[0])

    assert_almost_equal(logistic_result_score, 0.5)
开发者ID:wavelets,项目名称:skll,代码行数:16,代码来源:test_skll.py


示例16: test_summary

def test_summary():
    '''
    Test to validate summary file scores
    '''
    make_summary_data()

    config_template_path = os.path.join(_my_dir, 'configs', 'test_summary.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    with open(os.path.join(_my_dir, 'output', 'test_summary_test_summary_LogisticRegression.results')) as f:
        outstr = f.read()
        logistic_result_score = float(SCORE_OUTPUT_RE.search(outstr).groups()[0])

    with open(os.path.join(_my_dir, 'output', 'test_summary_test_summary_MultinomialNB.results')) as f:
        outstr = f.read()
        naivebayes_result_score = float(SCORE_OUTPUT_RE.search(outstr).groups()[0])

    with open(os.path.join(_my_dir, 'output', 'test_summary_test_summary_SVC.results')) as f:
        outstr = f.read()
        svm_result_score = float(SCORE_OUTPUT_RE.search(outstr).groups()[0])

    with open(os.path.join(_my_dir, 'output', 'test_summary_summary.tsv'), 'r') as f:
        reader = csv.DictReader(f, dialect='excel-tab')

        for row in reader:
            # the learner results dictionaries should have 18 rows,
            # and all of these except results_table
            # should be printed (though some columns will be blank).
            eq_(len(row), 18)
            assert row['model_params']
            assert row['grid_score']
            assert row['score']

            if row['learner_name'] == 'LogisticRegression':
                logistic_summary_score = float(row['score'])
            elif row['learner_name'] == 'MultinomialNB':
                naivebayes_summary_score = float(row['score'])
            elif row['learner_name'] == 'SVC':
                svm_summary_score = float(row['score'])

    for result_score, summary_score, learner_name in [(logistic_result_score, logistic_summary_score, 'LogisticRegression'), (naivebayes_result_score, naivebayes_summary_score, 'MultinomialNB'), (svm_result_score, svm_summary_score, 'SVC')]:
        yield check_summary_score, result_score, summary_score, learner_name
开发者ID:wavelets,项目名称:skll,代码行数:44,代码来源:test_skll.py


示例17: test_cross_validate_task

def test_cross_validate_task():
    """
    Test that 10-fold cross_validate experiments work.
    Test that fold ids get correctly saved.
    """

    # Run experiment
    suffix = '.jsonlines'
    train_path = join(_my_dir, 'train', 'f0{}'.format(suffix))

    config_path = fill_in_config_paths_for_single_file(join(_my_dir, "configs",
                                                            "test_save_cv_folds"
                                                            ".template.cfg"),
                                                       train_path,
                                                       None)
    run_configuration(config_path, quiet=True)

    # Check final average results
    with open(join(_my_dir, 'output', 'test_save_cv_folds_train_f0.' +
                                      'jsonlines_LogisticRegression.results.json')) as f:
        result_dict = json.load(f)[10]

    assert_almost_equal(result_dict['score'], 0.517)

    # Check that the fold ids were saved correctly
    expected_skll_ids = {}
    examples = _load_featureset(train_path, '', suffix, quiet=True)
    kfold = StratifiedKFold(examples.labels, n_folds=10)
    for fold_num, (_, test_indices) in enumerate(kfold):
        for index in test_indices:
            expected_skll_ids[examples.ids[index]] = fold_num

    skll_fold_ids = {}
    with open(join(_my_dir, 'output', 'test_save_cv_folds_skll_fold_ids.csv')) as f:
        reader = csv.DictReader(f)
        for row in reader:
            skll_fold_ids[row['id']] = row['cv_test_fold']

    # convert the dictionary to strings (sorted by key) for quick comparison
    skll_fold_ids_str = ''.join('{}{}'.format(key, val) for key, val in sorted(skll_fold_ids.items()))
    expected_skll_ids_str = ''.join('{}{}'.format(key, val) for key, val in sorted(expected_skll_ids.items()))

    assert_equal(skll_fold_ids_str, expected_skll_ids_str)
开发者ID:ChristianGeng,项目名称:skll,代码行数:43,代码来源:test_cv.py


示例18: test_learning_curve_plots

def test_learning_curve_plots():
    """
    Test learning curve plots for experiment with metrics option
    """

    # Test to validate learning curve output
    make_learning_curve_data()

    config_template_path = join(_my_dir, 'configs', 'test_learning_curve.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    # run the learning curve experiment
    run_configuration(config_path, quiet=True)
    outprefix = 'test_learning_curve'

    # make sure that the two PNG files (one per featureset) are created
    for featureset_name in ["test_learning_curve1", "test_learning_curve2"]:
        ok_(exists(join(_my_dir,
                        'output',
                        '{}_{}.png'.format(outprefix, featureset_name))))
开发者ID:EducationalTestingService,项目名称:skll,代码行数:20,代码来源:test_output.py


示例19: test_predict

def test_predict():
    '''
    This tests whether predict task runs.
    '''

    make_regression_data()

    config_template_path = os.path.join(_my_dir, 'configs', 'test_predict.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(os.path.join(_my_dir, config_path), quiet=True)

    with open(os.path.join(_my_dir, 'test', 'test_regression1.jsonlines')) as test_file:
        inputs = [x for x in test_file]
        assert len(inputs) == 1000

    with open(os.path.join(_my_dir, 'output', 'test_predict_test_regression1_RescaledRidge.predictions')) as outfile:
        reader = csv.DictReader(outfile, dialect=csv.excel_tab)
        predictions = [x['prediction'] for x in reader]
        assert len(predictions) == len(inputs)
开发者ID:wavelets,项目名称:skll,代码行数:20,代码来源:test_skll.py


示例20: test_logistic_custom_learner

def test_logistic_custom_learner():
    num_labels = 10

    class_weights = [(0.5 / (num_labels - 1))
                     for x in range(num_labels - 1)] + [0.5]
    train_fs, test_fs = make_classification_data(num_examples=600,
                                                 train_test_ratio=0.8,
                                                 num_labels=num_labels,
                                                 num_features=5,
                                                 non_negative=True,
                                                 class_weights=class_weights)

    # Write training feature set to a file
    train_path = join(_my_dir, 'train',
                      'test_logistic_custom_learner.jsonlines')
    writer = NDJWriter(train_path, train_fs)
    writer.write()

    # Write test feature set to a file
    test_path = join(_my_dir, 'test',
                     'test_logistic_custom_learner.jsonlines')
    writer = NDJWriter(test_path, test_fs)
    writer.write()

    cfgfile = 'test_logistic_custom_learner.template.cfg'
    config_template_path = join(_my_dir, 'configs', cfgfile)
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    outprefix = 'test_logistic_custom_learner'
    preds = read_predictions(join(_my_dir, 'output',
                                  ('{}_{}_CustomLogisticRegressionWrapper'
                                   '_predictions.tsv'.format(outprefix,
                                                             outprefix))))

    expected = read_predictions(join(_my_dir, 'output',
                                     ('{}_{}_LogisticRegression_predictions.tsv'
                                      .format(outprefix, outprefix))))

    assert_array_equal(preds, expected)
开发者ID:EducationalTestingService,项目名称:skll,代码行数:41,代码来源:test_custom_learner.py



注:本文中的skll.experiments.run_configuration函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python learner.Learner类代码示例发布时间:2022-05-27
下一篇:
Python sklearn_porter.Porter类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap