• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python base.dataset_wizard函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中mvpa.datasets.base.dataset_wizard函数的典型用法代码示例。如果您正苦于以下问题:Python dataset_wizard函数的具体用法?Python dataset_wizard怎么用?Python dataset_wizard使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了dataset_wizard函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_correct_dimensions_order

    def test_correct_dimensions_order(self, clf):
        """To check if known/present Classifiers are working properly
        with samples being first dimension. Started to worry about
        possible problems while looking at sg where samples are 2nd
        dimension
        """
        # specially crafted dataset -- if dimensions are flipped over
        # the same storage, problem becomes unseparable. Like in this case
        # incorrect order of dimensions lead to equal samples [0, 1, 0]
        traindatas = [
            dataset_wizard(samples=np.array([ [0, 0, 1.0],
                                        [1, 0, 0] ]), targets=[0, 1]),
            dataset_wizard(samples=np.array([ [0, 0.0],
                                      [1, 1] ]), targets=[0, 1])]

        clf.ca.change_temporarily(enable_ca = ['training_stats'])
        for traindata in traindatas:
            clf.train(traindata)
            self.failUnlessEqual(clf.ca.training_stats.percent_correct, 100.0,
                "Classifier %s must have 100%% correct learning on %s. Has %f" %
                (`clf`, traindata.samples, clf.ca.training_stats.percent_correct))

            # and we must be able to predict every original sample thus
            for i in xrange(traindata.nsamples):
                sample = traindata.samples[i,:]
                predicted = clf.predict([sample])
                self.failUnlessEqual([predicted], traindata.targets[i],
                    "We must be able to predict sample %s using " % sample +
                    "classifier %s" % `clf`)
        clf.ca.reset_changed_temporarily()
开发者ID:esc,项目名称:PyMVPA,代码行数:30,代码来源:test_clf.py


示例2: test_feature_selection_classifier

    def test_feature_selection_classifier(self):
        from mvpa.featsel.base import \
             SensitivityBasedFeatureSelection
        from mvpa.featsel.helpers import \
             FixedNElementTailSelector

        # should give lowest weight to the feature with lowest index
        sens_ana = SillySensitivityAnalyzer()
        # should give lowest weight to the feature with highest index
        sens_ana_rev = SillySensitivityAnalyzer(mult=-1)

        # corresponding feature selections
        feat_sel = SensitivityBasedFeatureSelection(sens_ana,
            FixedNElementTailSelector(1, mode='discard'))

        feat_sel_rev = SensitivityBasedFeatureSelection(sens_ana_rev,
            FixedNElementTailSelector(1))

        samples = np.array([ [0, 0, -1], [1, 0, 1], [-1, -1, 1],
                            [-1, 0, 1], [1, -1, 1] ])

        testdata3 = dataset_wizard(samples=samples, targets=1)
        # dummy train data so proper mapper gets created
        traindata = dataset_wizard(samples=np.array([ [0, 0, -1], [1, 0, 1] ]),
                            targets=[1, 2])

        # targets
        res110 = [1, 1, 1, -1, -1]
        res011 = [-1, 1, -1, 1, -1]

        # first classifier -- 0th feature should be discarded
        clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel,
                    enable_ca=['feature_ids'])

        self.clf_sign.ca.change_temporarily(enable_ca=['estimates'])
        clf011.train(traindata)

        self.failUnlessEqual(clf011.predict(testdata3.samples), res011)
        # just silly test if we get values assigned in the 'ProxyClassifier'
        self.failUnless(len(clf011.ca.estimates) == len(res110),
                        msg="We need to pass values into ProxyClassifier")
        self.clf_sign.ca.reset_changed_temporarily()

        self.failUnlessEqual(clf011.mapper._oshape, (2,))
        "Feature selection classifier had to be trained on 2 features"

        # first classifier -- last feature should be discarded
        clf011 = FeatureSelectionClassifier(self.clf_sign, feat_sel_rev)
        clf011.train(traindata)
        self.failUnlessEqual(clf011.predict(testdata3.samples), res110)
开发者ID:esc,项目名称:PyMVPA,代码行数:50,代码来源:test_clf.py


示例3: test_coarsen_chunks

    def test_coarsen_chunks(self):
        """Just basic testing for now"""
        chunks = [1,1,2,2,3,3,4,4]
        ds = dataset_wizard(samples=np.arange(len(chunks)).reshape(
            (len(chunks),1)), targets=[1]*8, chunks=chunks)
        coarsen_chunks(ds, nchunks=2)
        chunks1 = coarsen_chunks(chunks, nchunks=2)
        self.failUnless((chunks1 == ds.chunks).all())
        self.failUnless((chunks1 == np.asarray([0,0,0,0,1,1,1,1])).all())

        ds2 = dataset_wizard(samples=np.arange(len(chunks)).reshape(
            (len(chunks),1)), targets=[1]*8, chunks=range(len(chunks)))
        coarsen_chunks(ds2, nchunks=2)
        self.failUnless((chunks1 == ds.chunks).all())
开发者ID:B-Rich,项目名称:PyMVPA,代码行数:14,代码来源:test_datasetfx.py


示例4: test_feature_selection_classifier_with_regression

    def test_feature_selection_classifier_with_regression(self):
        from mvpa.featsel.base import \
             SensitivityBasedFeatureSelection
        from mvpa.featsel.helpers import \
             FixedNElementTailSelector
        if sample_clf_reg is None:
            # none regression was found, so nothing to test
            return
        # should give lowest weight to the feature with lowest index
        sens_ana = SillySensitivityAnalyzer()

        # corresponding feature selections
        feat_sel = SensitivityBasedFeatureSelection(sens_ana,
            FixedNElementTailSelector(1, mode='discard'))

        # now test with regression-based classifier. The problem is
        # that it is determining predictions twice from values and
        # then setting the values from the results, which the second
        # time is set to predictions.  The final outcome is that the
        # values are actually predictions...
        dat = dataset_wizard(samples=np.random.randn(4, 10),
                      targets=[-1, -1, 1, 1])
        clf_reg = FeatureSelectionClassifier(sample_clf_reg, feat_sel)
        clf_reg.train(dat)
        _ = clf_reg.predict(dat.samples)
        self.failIf((np.array(clf_reg.ca.estimates)
                     - clf_reg.ca.predictions).sum()==0,
                    msg="Values were set to the predictions in %s." %
                    sample_clf_reg)
开发者ID:esc,项目名称:PyMVPA,代码行数:29,代码来源:test_clf.py


示例5: dumb_feature_binary_dataset

def dumb_feature_binary_dataset():
    """Very simple binary (2 labels) dataset
    """
    data = [
        [1, 0],
        [1, 1],
        [2, 0],
        [2, 1],
        [3, 0],
        [3, 1],
        [4, 0],
        [4, 1],
        [5, 0],
        [5, 1],
        [6, 0],
        [6, 1],
        [7, 0],
        [7, 1],
        [8, 0],
        [8, 1],
        [9, 0],
        [9, 1],
        [10, 0],
        [10, 1],
        [11, 0],
        [11, 1],
        [12, 0],
        [12, 1],
    ]
    regs = ([0] * 12) + ([1] * 12)

    return dataset_wizard(samples=np.array(data), targets=regs, chunks=range(len(regs)))
开发者ID:arokem,项目名称:PyMVPA,代码行数:32,代码来源:data_generators.py


示例6: dumb_feature_dataset

def dumb_feature_dataset():
    """Create a very simple dataset with 2 features and 3 labels
    """
    data = [
        [1, 0],
        [1, 1],
        [2, 0],
        [2, 1],
        [3, 0],
        [3, 1],
        [4, 0],
        [4, 1],
        [5, 0],
        [5, 1],
        [6, 0],
        [6, 1],
        [7, 0],
        [7, 1],
        [8, 0],
        [8, 1],
        [9, 0],
        [9, 1],
        [10, 0],
        [10, 1],
        [11, 0],
        [11, 1],
        [12, 0],
        [12, 1],
    ]
    regs = ([1] * 8) + ([2] * 8) + ([3] * 8)

    return dataset_wizard(samples=np.array(data), targets=regs, chunks=range(len(regs)))
开发者ID:arokem,项目名称:PyMVPA,代码行数:32,代码来源:data_generators.py


示例7: linear1d_gaussian_noise

def linear1d_gaussian_noise(size=100, slope=0.5, intercept=1.0, x_min=-2.0, x_max=3.0, sigma=0.2):
    """A straight line with some Gaussian noise.
    """
    x = np.linspace(start=x_min, stop=x_max, num=size)
    noise = np.random.randn(size) * sigma
    y = x * slope + intercept + noise
    return dataset_wizard(samples=x[:, None], targets=y)
开发者ID:arokem,项目名称:PyMVPA,代码行数:7,代码来源:data_generators.py


示例8: test_origid_handling

def test_origid_handling():
    ds = dataset_wizard(np.atleast_2d(np.arange(35)).T)
    ds.init_origids('both')
    ok_(ds.nsamples == 35)
    assert_equal(len(np.unique(ds.sa.origids)), 35)
    assert_equal(len(np.unique(ds.fa.origids)), 1)
    selector = [3, 7, 10, 15]
    subds = ds[selector]
    assert_array_equal(subds.sa.origids, ds.sa.origids[selector])

    # Now if we request new origids if they are present we could
    # expect different behavior
    assert_raises(ValueError, subds.init_origids, 'both', mode='raises')
    sa_origids = subds.sa.origids.copy()
    fa_origids = subds.fa.origids.copy()
    for s in ('both', 'samples', 'features'):
        assert_raises(RuntimeError, subds.init_origids, s, mode='raise')
        subds.init_origids(s, mode='existing')
        # we should have the same origids as before
        assert_array_equal(subds.sa.origids, sa_origids)
        assert_array_equal(subds.fa.origids, fa_origids)

    # Lets now change, which should be default behavior
    subds.init_origids('both')
    assert_equal(len(sa_origids), len(subds.sa.origids))
    assert_equal(len(fa_origids), len(subds.fa.origids))
    # values should change though
    ok_((sa_origids != subds.sa.origids).any())
    ok_((fa_origids != subds.fa.origids).any())
开发者ID:geeragh,项目名称:PyMVPA,代码行数:29,代码来源:test_datasetng.py


示例9: test_idhash

def test_idhash():
    ds = dataset_wizard(np.arange(12).reshape((4, 3)),
                 targets=1, chunks=1)
    origid = ds.idhash
    #XXX BUG -- no assurance that labels would become an array... for now -- do manually
    ds.targets = np.array([3, 1, 2, 3])   # change all labels
    ok_(origid != ds.idhash,
                    msg="Changing all targets should alter dataset's idhash")

    origid = ds.idhash

    z = ds.targets[1]
    assert_equal(origid, ds.idhash,
                 msg="Accessing shouldn't change idhash")
    z = ds.chunks
    assert_equal(origid, ds.idhash,
                 msg="Accessing shouldn't change idhash")
    z[2] = 333
    ok_(origid != ds.idhash,
        msg="Changing value in attribute should change idhash")

    origid = ds.idhash
    ds.samples[1, 1] = 1000
    ok_(origid != ds.idhash,
        msg="Changing value in data should change idhash")

    origid = ds.idhash
    orig_labels = ds.targets #.copy()
    ds.permute_targets()
    ok_(origid != ds.idhash,
        msg="Permutation also changes idhash")

    ds.targets = orig_labels
    ok_(origid == ds.idhash,
        msg="idhash should be restored after reassigning orig targets")
开发者ID:geeragh,项目名称:PyMVPA,代码行数:35,代码来源:test_datasetng.py


示例10: pure_multivariate_signal

def pure_multivariate_signal(patterns, signal2noise = 1.5, chunks=None, targets=[0, 1]):
    """ Create a 2d dataset with a clear multivariate signal, but no
    univariate information.

    ::

      %%%%%%%%%
      % O % X %
      %%%%%%%%%
      % X % O %
      %%%%%%%%%
    """

    # start with noise
    data = np.random.normal(size=(4*patterns, 2))

    # add signal
    data[:2*patterns, 1] += signal2noise

    data[2*patterns:4*patterns, 1] -= signal2noise
    data[:patterns, 0] -= signal2noise
    data[2*patterns:3*patterns, 0] -= signal2noise
    data[patterns:2*patterns, 0] += signal2noise
    data[3*patterns:4*patterns, 0] += signal2noise

    # two conditions
    regs = np.array((targets[0:1] * patterns) + (targets[1:2] * 2 * patterns) + (targets[0:1] * patterns))

    if chunks is None:
        chunks = range(len(data))
    return dataset_wizard(samples=data, targets=regs, chunks=chunks)
开发者ID:esc,项目名称:PyMVPA,代码行数:31,代码来源:data_generators.py


示例11: test_aggregation

    def test_aggregation(self):
        data = dataset_wizard(np.arange( 20 ).reshape((4, 5)), targets=1, chunks=1)

        ag_data = aggregate_features(data, np.mean)

        ok_(ag_data.nsamples == 4)
        ok_(ag_data.nfeatures == 1)
        assert_array_equal(ag_data.samples[:, 0], [2, 7, 12, 17])
开发者ID:B-Rich,项目名称:PyMVPA,代码行数:8,代码来源:test_datasetfx.py


示例12: setUp

    def setUp(self):
        self.clf_sign = SameSignClassifier()
        self.clf_less1 = Less1Classifier()

        # simple binary dataset
        self.data_bin_1 = dataset_wizard(
            samples=[[0,0],[-10,-1],[1,0.1],[1,-1],[-1,1]],
            targets=[1, 1, 1, -1, -1], # labels
            chunks=[0, 1, 2,  2, 3])  # chunks
开发者ID:esc,项目名称:PyMVPA,代码行数:9,代码来源:test_clf.py


示例13: test_str

def test_str():
    args = ( np.arange(12, dtype=np.int8).reshape((4, 3)),
             range(4),
             [1, 1, 2, 2])
    for iargs in range(1, len(args)):
        ds = dataset_wizard(*(args[:iargs]))
        ds_s = str(ds)
        ok_(ds_s.startswith('<Dataset: [email protected]'))
        ok_(ds_s.endswith('>'))
开发者ID:geeragh,项目名称:PyMVPA,代码行数:9,代码来源:test_datasetng.py


示例14: test_mergeds2

def test_mergeds2():
    """Test composition of new datasets by addition of existing ones
    """
    data = dataset_wizard([range(5)], targets=1, chunks=1)

    assert_array_equal(data.UT, [1])

    # simple sequence has to be a single pattern
    assert_equal(data.nsamples, 1)
    # check correct pattern layout (1x5)
    assert_array_equal(data.samples, [[0, 1, 2, 3, 4]])

    # check for single labels and origin
    assert_array_equal(data.targets, [1])
    assert_array_equal(data.chunks, [1])

    # now try adding pattern with wrong shape
    assert_raises(DatasetError,
                  data.append,
                  dataset_wizard(np.ones((2,3)), targets=1, chunks=1))

    # now add two real patterns
    dss = datasets['uni2large'].samples
    data.append(dataset_wizard(dss[:2, :5], targets=2, chunks=2))
    assert_equal(data.nfeatures, 5)
    assert_array_equal(data.targets, [1, 2, 2])
    assert_array_equal(data.chunks, [1, 2, 2])

    # test automatic origins
    data.append(dataset_wizard(dss[3:5, :5], targets=3, chunks=[0, 1]))
    assert_array_equal(data.chunks, [1, 2, 2, 0, 1])

    # test unique class labels
    assert_array_equal(data.UT, [1, 2, 3])

    # test wrong label length
    assert_raises(ValueError, dataset_wizard, dss[:4, :5], targets=[ 1, 2, 3 ],
                                         chunks=2)

    # test wrong origin length
    assert_raises(ValueError, dataset_wizard, dss[:4, :5], targets=[ 1, 2, 3, 4 ],
                                         chunks=[ 2, 2, 2 ])
开发者ID:geeragh,项目名称:PyMVPA,代码行数:42,代码来源:test_datasetng.py


示例15: test_invar_features_removal

    def test_invar_features_removal(self):
        r = np.random.normal(size=(3,1))
        ds = dataset_wizard(samples=np.hstack((np.zeros((3,2)), r)),
                     targets=1)

        self.failUnless(ds.nfeatures == 3)

        dsc = remove_invariant_features(ds)

        self.failUnless(dsc.nfeatures == 1)
        self.failUnless((dsc.samples == r).all())
开发者ID:B-Rich,项目名称:PyMVPA,代码行数:11,代码来源:test_datasetfx.py


示例16: sin_modulated

def sin_modulated(n_instances, n_features, flat=False, noise=0.4):
    """ Generate a (quite) complex multidimensional non-linear dataset

    Used for regression testing. In the data label is a sin of a x^2 +
    uniform noise
    """
    if flat:
        data = np.arange(0.0, 1.0, 1.0 / n_instances) * np.pi
        data.resize(n_instances, n_features)
    else:
        data = np.random.rand(n_instances, n_features) * np.pi
    label = np.sin((data ** 2).sum(1)).round()
    label += np.random.rand(label.size) * noise
    return dataset_wizard(samples=data, targets=label)
开发者ID:arokem,项目名称:PyMVPA,代码行数:14,代码来源:data_generators.py


示例17: test_arrayattributes

def test_arrayattributes():
    samples = np.arange(12).reshape((4, 3))
    labels = range(4)
    chunks = [1, 1, 2, 2]
    ds = dataset_wizard(samples, labels, chunks)

    for a in (ds.samples, ds.targets, ds.chunks):
        ok_(isinstance(a, np.ndarray))

    ds.targets = labels
    ok_(isinstance(ds.targets, np.ndarray))

    ds.chunks = chunks
    ok_(isinstance(ds.chunks, np.ndarray))
开发者ID:geeragh,项目名称:PyMVPA,代码行数:14,代码来源:test_datasetng.py


示例18: test_combined_samplesfeature_selection

def test_combined_samplesfeature_selection():
    data = dataset_wizard(np.arange(20).reshape((4, 5)).view(myarray),
                   targets=[1,2,3,4],
                   chunks=[5,6,7,8])

    # array subclass survives
    ok_(isinstance(data.samples, myarray))

    ok_(data.nsamples == 4)
    ok_(data.nfeatures == 5)
    sel = data[[0, 3], [1, 2]]
    ok_(sel.nsamples == 2)
    ok_(sel.nfeatures == 2)
    assert_array_equal(sel.targets, [1, 4])
    assert_array_equal(sel.chunks, [5, 8])
    assert_array_equal(sel.samples, [[1, 2], [16, 17]])
    # array subclass survives
    ok_(isinstance(sel.samples, myarray))


    # should yield the same result if done sequentially
    sel2 = data[:, [1, 2]]
    sel2 = sel2[[0, 3]]
    assert_array_equal(sel.samples, sel2.samples)
    ok_(sel2.nsamples == 2)
    ok_(sel2.nfeatures == 2)
    # array subclass survives
    ok_(isinstance(sel.samples, myarray))


    assert_raises(ValueError, data.__getitem__, (1, 2, 3))

    # test correct behavior when selecting just single rows/columns
    single = data[0]
    ok_(single.nsamples == 1)
    ok_(single.nfeatures == 5)
    assert_array_equal(single.samples, [[0, 1, 2, 3, 4]])
    single = data[:, 0]
    ok_(single.nsamples == 4)
    ok_(single.nfeatures == 1)
    assert_array_equal(single.samples, [[0], [5], [10], [15]])
    single = data[1, 1]
    ok_(single.nsamples == 1)
    ok_(single.nfeatures == 1)
    assert_array_equal(single.samples, [[6]])
    # array subclass survives
    ok_(isinstance(single.samples, myarray))
开发者ID:geeragh,项目名称:PyMVPA,代码行数:47,代码来源:test_datasetng.py


示例19: chirp_linear

def chirp_linear(n_instances, n_features=4, n_nonbogus_features=2, data_noise=0.4, noise=0.1):
    """ Generates simple dataset for linear regressions

    Generates chirp signal, populates n_nonbogus_features out of
    n_features with it with different noise level and then provides
    signal itself with additional noise as labels
    """
    x = np.linspace(0, 1, n_instances)
    y = np.sin((10 * np.pi * x ** 2))

    data = np.random.normal(size=(n_instances, n_features)) * data_noise
    for i in xrange(n_nonbogus_features):
        data[:, i] += y[:]

    labels = y + np.random.normal(size=(n_instances,)) * noise

    return dataset_wizard(samples=data, targets=labels)
开发者ID:arokem,项目名称:PyMVPA,代码行数:17,代码来源:data_generators.py


示例20: test_samplesgroup_mapper

def test_samplesgroup_mapper():
    data = np.arange(24).reshape(8,3)
    labels = [0, 1] * 4
    chunks = np.repeat(np.array((0,1)),4)

    # correct results
    csamples = [[3, 4, 5], [6, 7, 8], [15, 16, 17], [18, 19, 20]]
    clabels = [0, 1, 0, 1]
    cchunks = [0, 0, 1, 1]

    ds = dataset_wizard(samples=data, targets=labels, chunks=chunks)
    # add some feature attribute -- just to check
    ds.fa['checker'] = np.arange(3)
    ds.init_origids('samples')

    m = mean_group_sample(['targets', 'chunks'])
    mds = m.forward(ds)
    assert_array_equal(mds.samples, csamples)
    # FAs should simply remain the same
    assert_array_equal(mds.fa.checker, np.arange(3))

    # now without grouping
    m = mean_sample()
    # forwarding just the samples should yield the same result
    assert_array_equal(m.forward(ds.samples),
                       m.forward(ds).samples)

    # directly apply to dataset
    # using untrained mapper
    m = mean_group_sample(['targets', 'chunks'])
    mapped = ds.get_mapped(m)

    assert_equal(mapped.nsamples, 4)
    assert_equal(mapped.nfeatures, 3)
    assert_array_equal(mapped.samples, csamples)
    assert_array_equal(mapped.targets, clabels)
    assert_array_equal(mapped.chunks, cchunks)
    # make sure origids get regenerated
    assert_array_equal([s.count('+') for s in mapped.sa.origids], [1] * 4)

    # disbalanced dataset -- lets remove 0th sample so there is no target
    # 0 in 0th chunk
    ds_ = ds[[0, 1, 3, 5]]
    mapped = ds_.get_mapped(m)
    ok_(len(mapped) == 3)
    ok_(not None in mapped.sa.origids)
开发者ID:esc,项目名称:PyMVPA,代码行数:46,代码来源:test_fxmapper.py



注:本文中的mvpa.datasets.base.dataset_wizard函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python base.Dataset类代码示例发布时间:2022-05-27
下一篇:
Python externals.exists函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap