• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python _minhash.MinHash类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sourmash._minhash.MinHash的典型用法代码示例。如果您正苦于以下问题:Python MinHash类的具体用法?Python MinHash怎么用?Python MinHash使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了MinHash类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_set_abundance

def test_set_abundance():
    a = MinHash(20, 10, track_abundance=False)

    with pytest.raises(RuntimeError) as e:
        a.set_abundances({1: 3, 2: 4})

    assert "track_abundance=True when constructing" in e.value.args[0]
开发者ID:dib-lab,项目名称:sourmash,代码行数:7,代码来源:test__minhash.py


示例2: test_mh_asymmetric_merge

def test_mh_asymmetric_merge(track_abundance):
    # test merging two asymmetric (different size) MHs
    a = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 40, 2):
        a.add_hash(i)

    # different size: 10
    b = MinHash(10, 10, track_abundance=track_abundance)
    for i in range(0, 80, 4):
        b.add_hash(i)

    c = a.merge(b)
    d = b.merge(a)

    assert len(a) == 20
    assert len(b) == 10
    assert len(c) == len(a)
    assert len(d) == len(b)

    # can't compare different sizes without downsampling
    with pytest.raises(TypeError):
        d.compare(a)

    a = a.downsample_n(d.num)
    print(a.get_mins())
    print(d.get_mins())
    assert d.compare(a) == 1.0

    c = c.downsample_n(b.num)
    assert c.compare(b) == 1.0
开发者ID:dib-lab,项目名称:sourmash,代码行数:30,代码来源:test__minhash.py


示例3: test_no_downsample_scaled_if_n

def test_no_downsample_scaled_if_n(track_abundance):
    # make sure you can't set max_n and then downsample scaled
    mh = MinHash(2, 4, track_abundance=track_abundance)
    with pytest.raises(ValueError) as excinfo:
        mh.downsample_scaled(100000000)

    assert 'cannot downsample a standard MinHash' in str(excinfo)
开发者ID:dib-lab,项目名称:sourmash,代码行数:7,代码来源:test__minhash.py


示例4: test_div_zero_contained

def test_div_zero_contained(track_abundance):
    # verify that empty MHs do not yield divide by zero errors for contained_by
    mh = MinHash(1, 4, track_abundance=track_abundance)
    mh2 = mh.copy_and_clear()

    mh.add_sequence('ATGC')
    assert mh.contained_by(mh2) == 0
    assert mh2.contained_by(mh) == 0
开发者ID:dib-lab,项目名称:sourmash,代码行数:8,代码来源:test__minhash.py


示例5: test_basic_dna_bad

def test_basic_dna_bad(track_abundance):
    # test behavior on bad DNA
    mh = MinHash(1, 4, track_abundance=track_abundance)

    with pytest.raises(ValueError) as e:
        mh.add_sequence('ATGR')
    print(e)

    assert 'invalid DNA character in input k-mer: ATGR' in str(e)
开发者ID:dib-lab,项目名称:sourmash,代码行数:9,代码来源:test__minhash.py


示例6: test_reviving_minhash

def test_reviving_minhash():
    # simulate reading a MinHash from disk
    mh = MinHash(0, 21, max_hash=184467440737095520, seed=42,
                 track_abundance=False)
    mins = (28945103950853965, 74690756200987412, 82962372765557409,
            93503551367950366, 106923350319729608, 135116761470196737,
            160165359281648267, 162390811417732001, 177939655451276972)

    for m in mins:
        mh.add_hash(m)
开发者ID:dib-lab,项目名称:sourmash,代码行数:10,代码来源:test__minhash.py


示例7: test_mh_inplace_concat_asymmetric

def test_mh_inplace_concat_asymmetric(track_abundance):
    # test merging two asymmetric (different size) MHs
    a = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 40, 2):
        a.add_hash(i)

    # different size: 10
    b = MinHash(10, 10, track_abundance=track_abundance)
    for i in range(0, 80, 4):
        b.add_hash(i)

    c = a.__copy__()
    c += b

    d = b.__copy__()
    d += a

    assert len(a) == 20
    assert len(b) == 10
    assert len(c) == len(a)
    assert len(d) == len(b)

    try:
        d.compare(a)
    except TypeError as exc:
        assert 'must have same num' in str(exc)

    a = a.downsample_n(d.num)
    assert d.compare(a) == 1.0 # see: d += a, above.

    c = c.downsample_n(b.num)
    assert c.compare(b) == 0.5
开发者ID:dib-lab,项目名称:sourmash,代码行数:32,代码来源:test__minhash.py


示例8: test_minhash_abund_capacity_increase

def test_minhash_abund_capacity_increase():
    # this targets bug #319, a segfault caused by invalidation of
    # std::vector iterators upon vector resizing.

    # this should set capacity to 1000 - see KmerMinHash constructor call
    # to 'reserve' when n > 0 for specific parameter.
    a = MinHash(0, 10, track_abundance=True, max_hash=5000)

    # 1001 is dependent on the value passed to reserve (currently 1000).
    for i in range(1001, 0, -1):
        a.add_hash(i)
开发者ID:dib-lab,项目名称:sourmash,代码行数:11,代码来源:test__minhash.py


示例9: setup

class TimeMinHashSuite:
    def setup(self):
        self.mh = MinHash(500, 21, track_abundance=False)
        self.sequences = load_sequences(get_test_data('ecoli.genes.fna')) * 10

        self.populated_mh = MinHash(500, 21, track_abundance=False)
        for seq in self.sequences:
            self.populated_mh.add_sequence(seq)

    def time_add_sequence(self):
        mh = self.mh
        sequences = self.sequences
        for seq in sequences:
            mh.add_sequence(seq)

    def time_get_mins(self):
        mh = self.populated_mh
        for i in range(500):
            mh.get_mins()

    def time_add_hash(self):
        mh = self.mh
        for i in range(10000):
            mh.add_hash(i)

    def time_compare(self):
        mh = self.mh
        other_mh = self.populated_mh
        for i in range(500):
            mh.compare(other_mh)

    def time_count_common(self):
        mh = self.mh
        other_mh = self.populated_mh
        for i in range(500):
            mh.count_common(other_mh)

    def time_merge(self):
        mh = self.mh
        other_mh = self.populated_mh
        for i in range(500):
            mh.merge(other_mh)

    def time_copy(self):
        mh = self.populated_mh
        for i in range(500):
            mh.__copy__()

    def time_concat(self):
        mh = self.mh
        other_mh = self.populated_mh
        for i in range(500):
            mh += other_mh
开发者ID:dib-lab,项目名称:sourmash,代码行数:53,代码来源:benchmarks.py


示例10: test_size_limit

def test_size_limit(track_abundance):
    # test behavior with size limit of 3
    mh = MinHash(3, 4, track_abundance=track_abundance)
    mh.add_hash(10)
    mh.add_hash(20)
    mh.add_hash(30)
    assert mh.get_mins() == [10, 20, 30]
    mh.add_hash(5) # -> should push 30 off end
    assert mh.get_mins() == [5, 10, 20]
开发者ID:dib-lab,项目名称:sourmash,代码行数:9,代码来源:test__minhash.py


示例11: test_minhash_abund_add

def test_minhash_abund_add():
    # this targets part of bug #319, a segfault caused by invalidation of
    # std::vector iterators upon vector resizing - in this case, there
    # was also a bug in inserting into the middle of mins when scaled was set.

    a = MinHash(0, 10, track_abundance=True, max_hash=5000)

    n = 0
    for i in range(10, 0, -1):
        a.add_hash(i)
        n += 1
        assert len(a.get_mins()) == n
        print(len(a.get_mins()))
开发者ID:dib-lab,项目名称:sourmash,代码行数:13,代码来源:test__minhash.py


示例12: test_abundance_simple

def test_abundance_simple():
    a = MinHash(20, 5, False, track_abundance=True)

    a.add_sequence('AAAAA')
    assert a.get_mins() == [2110480117637990133]
    assert a.get_mins(with_abundance=True) == {2110480117637990133: 1}

    a.add_sequence('AAAAA')
    assert a.get_mins() == [2110480117637990133]
    assert a.get_mins(with_abundance=True) == {2110480117637990133: 2}
开发者ID:dib-lab,项目名称:sourmash,代码行数:10,代码来源:test__minhash.py


示例13: test_consume_lowercase

def test_consume_lowercase(track_abundance):
    a = MinHash(20, 10, track_abundance=track_abundance)
    b = MinHash(20, 10, track_abundance=track_abundance)

    a.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA'.lower())
    b.add_sequence('TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA')

    assert a.compare(b) == 1.0
    assert b.compare(b) == 1.0
    assert b.compare(a) == 1.0
    assert a.compare(a) == 1.0
开发者ID:dib-lab,项目名称:sourmash,代码行数:11,代码来源:test__minhash.py


示例14: test_mh_copy_and_clear_with_max_hash

def test_mh_copy_and_clear_with_max_hash(track_abundance):
    # test basic creation of new, empty MinHash w/max_hash param set
    a = MinHash(0, 10, track_abundance=track_abundance, max_hash=20)
    for i in range(0, 40, 2):
        a.add_hash(i)

    b = a.copy_and_clear()
    assert a.ksize == b.ksize
    assert b.num == a.num
    assert b.max_hash == 20
    assert not b.is_protein
    assert b.track_abundance == track_abundance
    assert b.seed == a.seed
    assert len(b.get_mins()) == 0
    assert a.scaled == b.scaled
    assert b.scaled != 0
开发者ID:dib-lab,项目名称:sourmash,代码行数:16,代码来源:test__minhash.py


示例15: test_pickle_scaled

def test_pickle_scaled(track_abundance):
    a = MinHash(0, 10, track_abundance=track_abundance, scaled=922337203685477632)
    for i in range(0, 40, 2):
        a.add_hash(i)

    b = pickle.loads(pickle.dumps(a))
    assert a.ksize == b.ksize
    assert b.num == a.num
    assert b.max_hash == a.max_hash
    assert b.max_hash == 20
    assert not b.is_protein
    assert b.track_abundance == track_abundance
    assert b.seed == a.seed
    assert len(b.get_mins()) == len(a.get_mins())
    assert len(b.get_mins()) == 11
    assert a.scaled == b.scaled
    assert b.scaled != 0
开发者ID:dib-lab,项目名称:sourmash,代码行数:17,代码来源:test__minhash.py


示例16: test_mh_count_common

def test_mh_count_common(track_abundance):
    a = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 40, 2):
        a.add_hash(i)

    b = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 80, 4):
        b.add_hash(i)

    assert a.count_common(b) == 10
    assert b.count_common(a) == 10
开发者ID:dib-lab,项目名称:sourmash,代码行数:11,代码来源:test__minhash.py


示例17: TimeMinAbundanceSuite

class TimeMinAbundanceSuite(TimeMinHashSuite):
    def setup(self):
        TimeMinHashSuite.setup(self)
        self.mh = MinHash(500, 21, track_abundance=True)

        self.populated_mh = MinHash(500, 21, track_abundance=True)
        for seq in self.sequences:
            self.populated_mh.add_sequence(seq)

    def time_get_mins_abundance(self):
        mh = self.populated_mh
        for i in range(500):
            mh.get_mins(with_abundance=True)

    def time_set_abundances(self):
        mh = self.mh
        mins = self.populated_mh.get_mins(with_abundance=True)
        for i in range(500):
            mh.set_abundances(mins)
开发者ID:dib-lab,项目名称:sourmash,代码行数:19,代码来源:benchmarks.py


示例18: test_minhash_abund_merge_flat_2

def test_minhash_abund_merge_flat_2():
    # this targets a segfault caused by trying to merge
    # a signature with abundance and a signature without abundance.

    a = MinHash(0, 10, track_abundance=True, max_hash=5000)
    b = MinHash(0, 10, max_hash=5000)

    for i in range(0, 10, 2):
        a.add_hash(i)

    for j in range(0, 10, 3):
        b.add_hash(i)

    a.merge(b)
开发者ID:dib-lab,项目名称:sourmash,代码行数:14,代码来源:test__minhash.py


示例19: test_mh_subtract

def test_mh_subtract(track_abundance):
    # test subtracting two identically configured minhashes
    a = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 40, 2):
        a.add_hash(i)

    b = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 80, 4):
        b.add_hash(i)

    assert a.subtract_mins(b) == set(range(2, 40, 4))
开发者ID:dib-lab,项目名称:sourmash,代码行数:11,代码来源:test__minhash.py


示例20: test_mh_merge_check_length

def test_mh_merge_check_length(track_abundance):
    a = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 40, 2):
        a.add_hash(i)

    b = MinHash(20, 10, track_abundance=track_abundance)
    for i in range(0, 80, 4):
        b.add_hash(i)

    c = a.merge(b)
    assert(len(c.get_mins()) == 20)
开发者ID:dib-lab,项目名称:sourmash,代码行数:11,代码来源:test__minhash.py



注:本文中的sourmash._minhash.MinHash类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python changes.AutoChanges类代码示例发布时间:2022-05-27
下一篇:
Python sourcetree.SourceTree类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap