• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python skbio.DistanceMatrix类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中skbio.DistanceMatrix的典型用法代码示例。如果您正苦于以下问题:Python DistanceMatrix类的具体用法?Python DistanceMatrix怎么用?Python DistanceMatrix使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了DistanceMatrix类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_from_iterable_validate_equal_valid_data

 def test_from_iterable_validate_equal_valid_data(self):
     validate_true = DistanceMatrix.from_iterable((x for x in range(4)),
                                                  lambda a, b: abs(b - a),
                                                  validate=True)
     validate_false = DistanceMatrix.from_iterable((x for x in range(4)),
                                                   lambda a, b: abs(b - a),
                                                   validate=False)
     self.assertEqual(validate_true, validate_false)
开发者ID:jakereps,项目名称:scikit-bio,代码行数:8,代码来源:test_base.py


示例2: test_from_file_with_file_path

    def test_from_file_with_file_path(self):
        """Should identify the filepath correctly and parse from it."""

        # should fail with the expected exception
        with self.assertRaises(DissimilarityMatrixFormatError):
            DistanceMatrix.from_file(self.bad_dm_fp)

        obs = DistanceMatrix.from_file(self.dm_3x3_fp)
        self.assertEqual(self.dm_3x3, obs)
        self.assertTrue(isinstance(obs, DistanceMatrix))
开发者ID:nbresnick,项目名称:scikit-bio,代码行数:10,代码来源:test_distance.py


示例3: setUp

    def setUp(self):
        super(DistanceMatrixTests, self).setUp()

        self.dm_1x1 = DistanceMatrix(self.dm_1x1_data, ['a'])
        self.dm_2x2 = DistanceMatrix(self.dm_2x2_data, ['a', 'b'])
        self.dm_3x3 = DistanceMatrix(self.dm_3x3_data, ['a', 'b', 'c'])

        self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3]
        self.dm_condensed_forms = [np.array([]), np.array([0.123]),
                                   np.array([0.01, 4.2, 12.0])]
开发者ID:adamrp,项目名称:scikit-bio,代码行数:10,代码来源:test_base.py


示例4: test_to_series_4x4

 def test_to_series_4x4(self):
     dm = DistanceMatrix([
         [0, 0.25, 0.75, 0.75],
         [0.25, 0.0, 0.5, 0.5],
         [0.75, 0.5, 0.0, 0.0],
         [0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd'])
     series = dm.to_series()
     exp = pd.Series([0.25, 0.75, 0.75, 0.25, 0.5, 0.5, 0.75, 0.5, 0.75, 0.5],
                     index = [('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'a'), ('b', 'c'), ('b', 'd'),
                             ('c', 'a'), ('c', 'b'), ('d', 'a'), ('d', 'b')])
     assert_series_almost_equal(series, exp)
开发者ID:virginiasaulnier,项目名称:scikit-bio,代码行数:11,代码来源:test_base.py


示例5: progressive_msa_and_tree

def progressive_msa_and_tree(sequences,
                             pairwise_aligner,
                             metric=kmer_distance,
                             guide_tree=None,
                             display_aln=False,
                             display_tree=False):
    """ Perform progressive msa of sequences and build a UPGMA tree
    Parameters
    ----------
    sequences : skbio.SequenceCollection
        The sequences to be aligned.
    pairwise_aligner : function
        Function that should be used to perform the pairwise alignments,
        for example skbio.alignment.global_pairwise_align_nucleotide. Must
        support skbio.Sequence objects or skbio.TabularMSA objects
        as input.
    metric : function, optional
      Function that returns a single distance value when given a pair of
      skbio.Sequence objects. This will be used to build a guide tree if one
      is not provided.
    guide_tree : skbio.TreeNode, optional
        The tree that should be used to guide the alignment process.
    display_aln : bool, optional
        Print the alignment before returning.
    display_tree : bool, optional
        Print the tree before returning.

    Returns
    -------
    skbio.alignment
    skbio.TreeNode

    """
    if guide_tree is None:
        guide_dm = DistanceMatrix.from_iterable(
                        sequences, metric=metric, key='id')
        guide_lm = average(guide_dm.condensed_form())
        guide_tree = TreeNode.from_linkage_matrix(guide_lm, guide_dm.ids)

    msa = progressive_msa(sequences, guide_tree,
                          pairwise_aligner=pairwise_aligner)

    if display_aln:
        print(msa)

    msa_dm = DistanceMatrix.from_iterable(msa, metric=metric, key='id')
    msa_lm = average(msa_dm.condensed_form())
    msa_tree = TreeNode.from_linkage_matrix(msa_lm, msa_dm.ids)
    if display_tree:
        print("\nOutput tree:")
        d = dendrogram(msa_lm, labels=msa_dm.ids, orientation='right',
                   link_color_func=lambda x: 'black', leaf_font_size=24)
    return msa, msa_tree
开发者ID:lsl5,项目名称:An-Introduction-To-Applied-Bioinformatics,代码行数:53,代码来源:__init__.py


示例6: test_fsvd

    def test_fsvd(self):
        dm1 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
        dm2 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
        dm3 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))

        # Test eigh vs. fsvd pcoa and inplace parameter
        expected_results = pcoa(dm1, method="eigh", number_of_dimensions=3,
                                inplace=False)

        results = pcoa(dm2, method="fsvd", number_of_dimensions=3,
                       inplace=False)

        results_inplace = pcoa(dm2, method="fsvd", number_of_dimensions=3,
                               inplace=True)

        assert_ordination_results_equal(results, expected_results,
                                        ignore_directionality=True,
                                        ignore_method_names=True)

        assert_ordination_results_equal(results, results_inplace,
                                        ignore_directionality=True,
                                        ignore_method_names=True)

        # Test number_of_dimensions edge cases
        results2 = pcoa(dm3, method="fsvd", number_of_dimensions=0,
                        inplace=False)
        expected_results2 = pcoa(dm3, method="fsvd",
                                 number_of_dimensions=dm3.data.shape[0],
                                 inplace=False)

        assert_ordination_results_equal(results2, expected_results2,
                                        ignore_directionality=True,
                                        ignore_method_names=True)

        with self.assertRaises(ValueError):
            dim_too_large = dm1.data.shape[0] + 10
            pcoa(dm2, method="fsvd", number_of_dimensions=dim_too_large)

        with self.assertRaises(ValueError):
            pcoa(dm2, method="fsvd", number_of_dimensions=-1)

        with self.assertRaises(ValueError):
            dim_too_large = dm1.data.shape[0] + 10
            pcoa(dm2, method="eigh", number_of_dimensions=dim_too_large)

        with self.assertRaises(ValueError):
            pcoa(dm2, method="eigh", number_of_dimensions=-1)

        dm_big = DistanceMatrix.read(get_data_path('PCoA_sample_data_12dim'))
        with self.assertWarnsRegex(RuntimeWarning,
                                   "no value for number_of_dimensions"):
            pcoa(dm_big, method="fsvd", number_of_dimensions=0)
开发者ID:thermokarst,项目名称:scikit-bio,代码行数:52,代码来源:test_principal_coordinate_analysis.py


示例7: distmat_corr

def distmat_corr(truthfile, distfile, reps=3, corrstat=spearman):
    '''Returns correlation between condensed distance matrices, using corrstat'''
    distmat = DistanceMatrix.read(distfile)
    truthmat = DistanceMatrix.read(truthfile)
    truthmat = sample_matrix_to_runs(truthmat, reps)

    ids = list(sorted(distmat.ids))
    t_ids = list(sorted(truthmat.ids))
    assert ids == t_ids, (ids, t_ids)

    dist = distmat.filter(ids).condensed_form()
    truth = truthmat.filter(ids).condensed_form()
    return corrstat(truth, dist)
开发者ID:kdmurray91,项目名称:kwip-experiments,代码行数:13,代码来源:calc_rho.py


示例8: test_to_series_4x4

    def test_to_series_4x4(self):
        dm = DistanceMatrix([
            [0.0, 0.2, 0.3, 0.4],
            [0.2, 0.0, 0.5, 0.6],
            [0.3, 0.5, 0.0, 0.7],
            [0.4, 0.6, 0.7, 0.0]], ['a', 'b', 'c', 'd'])

        series = dm.to_series()

        exp = pd.Series([0.2, 0.3, 0.4, 0.5, 0.6, 0.7],
                        index=pd.Index([('a', 'b'), ('a', 'c'), ('a', 'd'),
                                        ('b', 'c'), ('b', 'd'), ('c', 'd')]))
        assert_series_almost_equal(series, exp)
开发者ID:RNAer,项目名称:scikit-bio,代码行数:13,代码来源:test_base.py


示例9: test_fsvd_inplace

    def test_fsvd_inplace(self):
        dm1 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
        dm2 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))

        expected_results = pcoa(dm1, method="eigh", number_of_dimensions=3,
                                inplace=True)

        results = pcoa(dm2, method="fsvd", number_of_dimensions=3,
                       inplace=True)

        assert_ordination_results_equal(results, expected_results,
                                        ignore_directionality=True,
                                        ignore_method_names=True)
开发者ID:thermokarst,项目名称:scikit-bio,代码行数:13,代码来源:test_principal_coordinate_analysis.py


示例10: setUp

    def setUp(self):
        self.minx = DistanceMatrix([[0, 1, 2], [1, 0, 3], [2, 3, 0]])
        self.miny = DistanceMatrix([[0, 2, 7], [2, 0, 6], [7, 6, 0]])
        self.minz = DistanceMatrix([[0, 0.5, 0.25],
                                    [0.5, 0, 0.1],
                                    [0.25, 0.1, 0]])
        self.min_dms = (self.minx, self.miny, self.minz)

        # Versions of self.minx and self.minz (above) that each have an extra
        # ID on the end.
        self.x_extra = DistanceMatrix([[0, 1, 2, 7],
                                       [1, 0, 3, 2],
                                       [2, 3, 0, 4],
                                       [7, 2, 4, 0]], ['0', '1', '2', 'foo'])
        self.z_extra = DistanceMatrix([[0, 0.5, 0.25, 3],
                                       [0.5, 0, 0.1, 24],
                                       [0.25, 0.1, 0, 5],
                                       [3, 24, 5, 0]], ['0', '1', '2', 'bar'])

        # Load expected results. We have to load the p-value column (column
        # index 3) as a string dtype in order to compare with the in-memory
        # results since we're formatting the p-values as strings with the
        # correct number of decimal places. Without this explicit converter,
        # the p-value column will be loaded as a float dtype and the frames
        # won't compare equal.
        p_val_conv = {3: str}

        self.exp_results_minimal = pd.read_csv(
            get_data_path('pwmantel_exp_results_minimal.txt'), sep='\t',
            index_col=(0, 1), converters=p_val_conv)

        self.exp_results_minimal_with_labels = pd.read_csv(
            get_data_path('pwmantel_exp_results_minimal_with_labels.txt'),
            sep='\t', index_col=(0, 1), converters=p_val_conv)

        self.exp_results_duplicate_dms = pd.read_csv(
            get_data_path('pwmantel_exp_results_duplicate_dms.txt'),
            sep='\t', index_col=(0, 1), converters=p_val_conv)

        self.exp_results_na_p_value = pd.read_csv(
            get_data_path('pwmantel_exp_results_na_p_value.txt'),
            sep='\t', index_col=(0, 1), converters=p_val_conv)

        self.exp_results_too_few_permutations = pd.read_csv(
            get_data_path('pwmantel_exp_results_too_few_permutations.txt'),
            sep='\t', index_col=(0, 1), converters=p_val_conv)

        self.exp_results_reordered_distance_matrices = pd.read_csv(
            get_data_path('pwmantel_exp_results_reordered_distance_matrices'
                          '.txt'),
            sep='\t', index_col=(0, 1), converters=p_val_conv)
开发者ID:nbresnick,项目名称:scikit-bio,代码行数:51,代码来源:test_mantel.py


示例11: bioenv

def bioenv(output_dir: str, distance_matrix: skbio.DistanceMatrix,
           metadata: qiime2.Metadata) -> None:
    # convert metadata to numeric values where applicable, drop the non-numeric
    # values, and then drop samples that contain NaNs
    df = metadata.to_dataframe()
    df = df.apply(lambda x: pd.to_numeric(x, errors='ignore'))

    # filter categorical columns
    pre_filtered_cols = set(df.columns)
    df = df.select_dtypes([numpy.number]).dropna()
    filtered_categorical_cols = pre_filtered_cols - set(df.columns)

    # filter 0 variance numerical columns
    pre_filtered_cols = set(df.columns)
    df = df.loc[:, df.var() != 0]
    filtered_zero_variance_cols = pre_filtered_cols - set(df.columns)

    # filter the distance matrix to exclude samples that were dropped from
    # the metadata, and keep track of how many samples survived the filtering
    # so that information can be presented to the user.
    initial_dm_length = distance_matrix.shape[0]
    distance_matrix = distance_matrix.filter(df.index, strict=False)
    filtered_dm_length = distance_matrix.shape[0]

    result = skbio.stats.distance.bioenv(distance_matrix, df)
    result = result.to_html(classes='table table-striped table-hover').replace(
        'border="1"', 'border="0"')

    index = os.path.join(TEMPLATES, 'bioenv_assets', 'index.html')
    q2templates.render(index, output_dir, context={
        'initial_dm_length': initial_dm_length,
        'filtered_dm_length': filtered_dm_length,
        'filtered_categorical_cols': ', '.join(filtered_categorical_cols),
        'filtered_zero_variance_cols': ', '.join(filtered_zero_variance_cols),
        'result': result})
开发者ID:jairideout,项目名称:diversity,代码行数:35,代码来源:_visualizer.py


示例12: effect_size

def effect_size(mappings, alphas, betas, output, jobs, permutations,
                overwrite, na_values):
    # As we can have multiple mapping, alpha or beta files, we will construct
    # a mfs dictionary with all the dataframes. Additionally, we will load the
    # data_dictionary.csv file so we can use it to process the data
    mappings = {f: pd.read_csv(f, sep='\t', dtype=str, na_values=na_values)
                for f in mappings}
    for m, mf in mappings.items():
        mappings[m].set_index('#SampleID', inplace=True)
    if betas:
        betas = {f: DistanceMatrix.read(f) for f in betas}

        with joblib.parallel.Parallel(n_jobs=jobs, verbose=100) as par:
            par(joblib.delayed(
                _process_column)(bf, c, fname, finfo, alphas, betas,
                                 permutations)
                for bf, c, fname, finfo in _generate_betas(
                betas, mappings, permutations, output, overwrite))
    else:
        alphas = {f: pd.read_csv(f, sep='\t', dtype=str, na_values=na_values)
                  for f in alphas}
        for a, af in alphas.items():
            alphas[a].set_index('#SampleID', inplace=True)

        for af, c, fname, finfo in _generate_alphas(alphas, mappings,
                                                    output, overwrite):
            _process_column(af, c, fname, finfo, alphas, betas, permutations)
开发者ID:antgonza,项目名称:evident,代码行数:27,代码来源:effect_size.py


示例13: setUp

    def setUp(self):
        # Crawford dataset for unweighted UniFrac
        fp = get_data_path('PCoA_sample_data_3')
        self.ordination = pcoa(DistanceMatrix.read(fp))

        fp = get_data_path('PCoA_biplot_descriptors')
        self.descriptors = pd.read_table(fp, index_col='Taxon').T
开发者ID:thermokarst,项目名称:scikit-bio,代码行数:7,代码来源:test_principal_coordinate_analysis.py


示例14: aln_distmat

def aln_distmat(alignment, reps=3):
    '''Calculate pairwise distances from a MSA of genomes'''
    aln = TabularMSA.read(alignment, constructor=DNA)
    aln.reassign_index(minter="id")
    dist = DistanceMatrix.from_iterable([seq.values for seq in aln],
                                        metric=hamming, keys=aln.index)
    return dist
开发者ID:kdmurray91,项目名称:kwip-experiments,代码行数:7,代码来源:alndist.py


示例15: test_confirm_betadispr_results

    def test_confirm_betadispr_results(self):
        mp_dm = DistanceMatrix.read(get_data_path('moving_pictures_dm.tsv'))
        mp_mf = pd.read_csv(get_data_path('moving_pictures_mf.tsv'), sep='\t')
        mp_mf.set_index('#SampleID', inplace=True)

        obs_med_mp = permdisp(mp_dm, mp_mf,
                              column='BodySite')
        obs_cen_mp = permdisp(mp_dm, mp_mf, column='BodySite',
                              test='centroid')

        exp_data_m = ['PERMDISP', 'F-value', 33, 4, 10.1956, 0.001, 999]
        exp_data_c = ['PERMDISP', 'F-value', 33, 4, 17.4242, 0.001, 999]
        exp_ind = ['method name', 'test statistic name', 'sample size',
                   'number of groups', 'test statistic', 'p-value',
                   'number of permutations']

        exp_med_mp = pd.Series(data=exp_data_m, index=exp_ind, dtype='object',
                               name='PERMDISP results')

        exp_cen_mp = pd.Series(data=exp_data_c, index=exp_ind, dtype='object',
                               name='PERMDISP results')

        self.assert_series_equal(exp_med_mp, obs_med_mp)

        self.assert_series_equal(exp_cen_mp, obs_cen_mp)
开发者ID:ElDeveloper,项目名称:biolopy,代码行数:25,代码来源:test_permdisp.py


示例16: guide_tree_from_sequences

def guide_tree_from_sequences(sequences,
                              metric=kmer_distance,
                              display_tree = False):
    """ Build a UPGMA tree by applying metric to sequences

    Parameters
    ----------
    sequences : list of skbio.Sequence objects (or subclasses)
      The sequences to be represented in the resulting guide tree.
    metric : function
      Function that returns a single distance value when given a pair of
      skbio.Sequence objects.
    display_tree : bool, optional
      Print the tree before returning.

    Returns
    -------
    skbio.TreeNode

    """
    guide_dm = DistanceMatrix.from_iterable(
                    sequences, metric=metric, key='id')
    guide_lm = average(guide_dm.condensed_form())
    guide_tree = to_tree(guide_lm)
    if display_tree:
        guide_d = dendrogram(guide_lm, labels=guide_dm.ids, orientation='right',
               link_color_func=lambda x: 'black')
    return guide_tree
开发者ID:lsl5,项目名称:An-Introduction-To-Applied-Bioinformatics,代码行数:28,代码来源:__init__.py


示例17: test_simple

    def test_simple(self):
        eigvals = [0.51236726, 0.30071909, 0.26791207, 0.20898868,
                   0.19169895, 0.16054235,  0.15017696,  0.12245775,
                   0.0]
        proportion_explained = [0.2675738328, 0.157044696, 0.1399118638,
                                0.1091402725, 0.1001110485,
                                0.0838401162, 0.0784269939,
                                0.0639511764, 0.0]
        sample_ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354',
                      'PC.593', 'PC.355', 'PC.607', 'PC.634']
        axis_labels = ['PC%d' % i for i in range(1, 10)]

        expected_results = OrdinationResults(
            short_method_name='PCoA',
            long_method_name='Principal Coordinate Analysis',
            eigvals=pd.Series(eigvals, index=axis_labels),
            samples=pd.DataFrame(
                np.loadtxt(get_data_path('exp_PCoAEigenResults_site')),
                index=sample_ids, columns=axis_labels),
            proportion_explained=pd.Series(proportion_explained,
                                           index=axis_labels))

        dm = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
        results = pcoa(dm)

        assert_ordination_results_equal(results, expected_results,
                                        ignore_directionality=True)
开发者ID:ebolyen,项目名称:scikit-bio,代码行数:27,代码来源:test_principal_coordinate_analysis.py


示例18: get_spearmans

def get_spearmans(distfile, truth):
    distmat = DistanceMatrix.read(distfile)
    ids = list(sorted(distmat.ids))
    distmat = distmat.filter(ids)
    dist  = distmat.condensed_form()
    truth = truth.condensed_form()
    sp = stats.spearmanr(truth, dist)
    return sp.correlation
开发者ID:kdmurray91,项目名称:kwip-experiments,代码行数:8,代码来源:treedist.py


示例19: setup

    def setup(self):
        with open(get_data_path('PCoA_sample_data_3'), 'U') as lines:
            dist_matrix = DistanceMatrix.from_file(lines)

        self.ordination = PCoA(dist_matrix)

        self.ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593',
                    'PC.355', 'PC.607', 'PC.634']
开发者ID:jwdebelius,项目名称:scikit-bio,代码行数:8,代码来源:test_ordination.py


示例20: test_from_iterable_validate_false_non_symmetric

 def test_from_iterable_validate_false_non_symmetric(self):
     exp = DistanceMatrix([[0, 1, 2, 3],
                           [1, 0, 1, 2],
                           [2, 1, 0, 1],
                           [3, 2, 1, 0]])
     res = DistanceMatrix.from_iterable((x for x in range(4)),
                                        lambda a, b: a - b,
                                        validate=False)
     self.assertEqual(res, exp)
开发者ID:jakereps,项目名称:scikit-bio,代码行数:9,代码来源:test_base.py



注:本文中的skbio.DistanceMatrix类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python skbio.SequenceCollection类代码示例发布时间:2022-05-27
下一篇:
Python skbio.Alignment类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap