本文整理汇总了Python中skbio.DistanceMatrix类的典型用法代码示例。如果您正苦于以下问题:Python DistanceMatrix类的具体用法?Python DistanceMatrix怎么用?Python DistanceMatrix使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DistanceMatrix类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_from_iterable_validate_equal_valid_data
def test_from_iterable_validate_equal_valid_data(self):
validate_true = DistanceMatrix.from_iterable((x for x in range(4)),
lambda a, b: abs(b - a),
validate=True)
validate_false = DistanceMatrix.from_iterable((x for x in range(4)),
lambda a, b: abs(b - a),
validate=False)
self.assertEqual(validate_true, validate_false)
开发者ID:jakereps,项目名称:scikit-bio,代码行数:8,代码来源:test_base.py
示例2: test_from_file_with_file_path
def test_from_file_with_file_path(self):
"""Should identify the filepath correctly and parse from it."""
# should fail with the expected exception
with self.assertRaises(DissimilarityMatrixFormatError):
DistanceMatrix.from_file(self.bad_dm_fp)
obs = DistanceMatrix.from_file(self.dm_3x3_fp)
self.assertEqual(self.dm_3x3, obs)
self.assertTrue(isinstance(obs, DistanceMatrix))
开发者ID:nbresnick,项目名称:scikit-bio,代码行数:10,代码来源:test_distance.py
示例3: setUp
def setUp(self):
super(DistanceMatrixTests, self).setUp()
self.dm_1x1 = DistanceMatrix(self.dm_1x1_data, ['a'])
self.dm_2x2 = DistanceMatrix(self.dm_2x2_data, ['a', 'b'])
self.dm_3x3 = DistanceMatrix(self.dm_3x3_data, ['a', 'b', 'c'])
self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3]
self.dm_condensed_forms = [np.array([]), np.array([0.123]),
np.array([0.01, 4.2, 12.0])]
开发者ID:adamrp,项目名称:scikit-bio,代码行数:10,代码来源:test_base.py
示例4: test_to_series_4x4
def test_to_series_4x4(self):
dm = DistanceMatrix([
[0, 0.25, 0.75, 0.75],
[0.25, 0.0, 0.5, 0.5],
[0.75, 0.5, 0.0, 0.0],
[0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd'])
series = dm.to_series()
exp = pd.Series([0.25, 0.75, 0.75, 0.25, 0.5, 0.5, 0.75, 0.5, 0.75, 0.5],
index = [('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'a'), ('b', 'c'), ('b', 'd'),
('c', 'a'), ('c', 'b'), ('d', 'a'), ('d', 'b')])
assert_series_almost_equal(series, exp)
开发者ID:virginiasaulnier,项目名称:scikit-bio,代码行数:11,代码来源:test_base.py
示例5: progressive_msa_and_tree
def progressive_msa_and_tree(sequences,
pairwise_aligner,
metric=kmer_distance,
guide_tree=None,
display_aln=False,
display_tree=False):
""" Perform progressive msa of sequences and build a UPGMA tree
Parameters
----------
sequences : skbio.SequenceCollection
The sequences to be aligned.
pairwise_aligner : function
Function that should be used to perform the pairwise alignments,
for example skbio.alignment.global_pairwise_align_nucleotide. Must
support skbio.Sequence objects or skbio.TabularMSA objects
as input.
metric : function, optional
Function that returns a single distance value when given a pair of
skbio.Sequence objects. This will be used to build a guide tree if one
is not provided.
guide_tree : skbio.TreeNode, optional
The tree that should be used to guide the alignment process.
display_aln : bool, optional
Print the alignment before returning.
display_tree : bool, optional
Print the tree before returning.
Returns
-------
skbio.alignment
skbio.TreeNode
"""
if guide_tree is None:
guide_dm = DistanceMatrix.from_iterable(
sequences, metric=metric, key='id')
guide_lm = average(guide_dm.condensed_form())
guide_tree = TreeNode.from_linkage_matrix(guide_lm, guide_dm.ids)
msa = progressive_msa(sequences, guide_tree,
pairwise_aligner=pairwise_aligner)
if display_aln:
print(msa)
msa_dm = DistanceMatrix.from_iterable(msa, metric=metric, key='id')
msa_lm = average(msa_dm.condensed_form())
msa_tree = TreeNode.from_linkage_matrix(msa_lm, msa_dm.ids)
if display_tree:
print("\nOutput tree:")
d = dendrogram(msa_lm, labels=msa_dm.ids, orientation='right',
link_color_func=lambda x: 'black', leaf_font_size=24)
return msa, msa_tree
开发者ID:lsl5,项目名称:An-Introduction-To-Applied-Bioinformatics,代码行数:53,代码来源:__init__.py
示例6: test_fsvd
def test_fsvd(self):
dm1 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
dm2 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
dm3 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
# Test eigh vs. fsvd pcoa and inplace parameter
expected_results = pcoa(dm1, method="eigh", number_of_dimensions=3,
inplace=False)
results = pcoa(dm2, method="fsvd", number_of_dimensions=3,
inplace=False)
results_inplace = pcoa(dm2, method="fsvd", number_of_dimensions=3,
inplace=True)
assert_ordination_results_equal(results, expected_results,
ignore_directionality=True,
ignore_method_names=True)
assert_ordination_results_equal(results, results_inplace,
ignore_directionality=True,
ignore_method_names=True)
# Test number_of_dimensions edge cases
results2 = pcoa(dm3, method="fsvd", number_of_dimensions=0,
inplace=False)
expected_results2 = pcoa(dm3, method="fsvd",
number_of_dimensions=dm3.data.shape[0],
inplace=False)
assert_ordination_results_equal(results2, expected_results2,
ignore_directionality=True,
ignore_method_names=True)
with self.assertRaises(ValueError):
dim_too_large = dm1.data.shape[0] + 10
pcoa(dm2, method="fsvd", number_of_dimensions=dim_too_large)
with self.assertRaises(ValueError):
pcoa(dm2, method="fsvd", number_of_dimensions=-1)
with self.assertRaises(ValueError):
dim_too_large = dm1.data.shape[0] + 10
pcoa(dm2, method="eigh", number_of_dimensions=dim_too_large)
with self.assertRaises(ValueError):
pcoa(dm2, method="eigh", number_of_dimensions=-1)
dm_big = DistanceMatrix.read(get_data_path('PCoA_sample_data_12dim'))
with self.assertWarnsRegex(RuntimeWarning,
"no value for number_of_dimensions"):
pcoa(dm_big, method="fsvd", number_of_dimensions=0)
开发者ID:thermokarst,项目名称:scikit-bio,代码行数:52,代码来源:test_principal_coordinate_analysis.py
示例7: distmat_corr
def distmat_corr(truthfile, distfile, reps=3, corrstat=spearman):
'''Returns correlation between condensed distance matrices, using corrstat'''
distmat = DistanceMatrix.read(distfile)
truthmat = DistanceMatrix.read(truthfile)
truthmat = sample_matrix_to_runs(truthmat, reps)
ids = list(sorted(distmat.ids))
t_ids = list(sorted(truthmat.ids))
assert ids == t_ids, (ids, t_ids)
dist = distmat.filter(ids).condensed_form()
truth = truthmat.filter(ids).condensed_form()
return corrstat(truth, dist)
开发者ID:kdmurray91,项目名称:kwip-experiments,代码行数:13,代码来源:calc_rho.py
示例8: test_to_series_4x4
def test_to_series_4x4(self):
dm = DistanceMatrix([
[0.0, 0.2, 0.3, 0.4],
[0.2, 0.0, 0.5, 0.6],
[0.3, 0.5, 0.0, 0.7],
[0.4, 0.6, 0.7, 0.0]], ['a', 'b', 'c', 'd'])
series = dm.to_series()
exp = pd.Series([0.2, 0.3, 0.4, 0.5, 0.6, 0.7],
index=pd.Index([('a', 'b'), ('a', 'c'), ('a', 'd'),
('b', 'c'), ('b', 'd'), ('c', 'd')]))
assert_series_almost_equal(series, exp)
开发者ID:RNAer,项目名称:scikit-bio,代码行数:13,代码来源:test_base.py
示例9: test_fsvd_inplace
def test_fsvd_inplace(self):
dm1 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
dm2 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
expected_results = pcoa(dm1, method="eigh", number_of_dimensions=3,
inplace=True)
results = pcoa(dm2, method="fsvd", number_of_dimensions=3,
inplace=True)
assert_ordination_results_equal(results, expected_results,
ignore_directionality=True,
ignore_method_names=True)
开发者ID:thermokarst,项目名称:scikit-bio,代码行数:13,代码来源:test_principal_coordinate_analysis.py
示例10: setUp
def setUp(self):
self.minx = DistanceMatrix([[0, 1, 2], [1, 0, 3], [2, 3, 0]])
self.miny = DistanceMatrix([[0, 2, 7], [2, 0, 6], [7, 6, 0]])
self.minz = DistanceMatrix([[0, 0.5, 0.25],
[0.5, 0, 0.1],
[0.25, 0.1, 0]])
self.min_dms = (self.minx, self.miny, self.minz)
# Versions of self.minx and self.minz (above) that each have an extra
# ID on the end.
self.x_extra = DistanceMatrix([[0, 1, 2, 7],
[1, 0, 3, 2],
[2, 3, 0, 4],
[7, 2, 4, 0]], ['0', '1', '2', 'foo'])
self.z_extra = DistanceMatrix([[0, 0.5, 0.25, 3],
[0.5, 0, 0.1, 24],
[0.25, 0.1, 0, 5],
[3, 24, 5, 0]], ['0', '1', '2', 'bar'])
# Load expected results. We have to load the p-value column (column
# index 3) as a string dtype in order to compare with the in-memory
# results since we're formatting the p-values as strings with the
# correct number of decimal places. Without this explicit converter,
# the p-value column will be loaded as a float dtype and the frames
# won't compare equal.
p_val_conv = {3: str}
self.exp_results_minimal = pd.read_csv(
get_data_path('pwmantel_exp_results_minimal.txt'), sep='\t',
index_col=(0, 1), converters=p_val_conv)
self.exp_results_minimal_with_labels = pd.read_csv(
get_data_path('pwmantel_exp_results_minimal_with_labels.txt'),
sep='\t', index_col=(0, 1), converters=p_val_conv)
self.exp_results_duplicate_dms = pd.read_csv(
get_data_path('pwmantel_exp_results_duplicate_dms.txt'),
sep='\t', index_col=(0, 1), converters=p_val_conv)
self.exp_results_na_p_value = pd.read_csv(
get_data_path('pwmantel_exp_results_na_p_value.txt'),
sep='\t', index_col=(0, 1), converters=p_val_conv)
self.exp_results_too_few_permutations = pd.read_csv(
get_data_path('pwmantel_exp_results_too_few_permutations.txt'),
sep='\t', index_col=(0, 1), converters=p_val_conv)
self.exp_results_reordered_distance_matrices = pd.read_csv(
get_data_path('pwmantel_exp_results_reordered_distance_matrices'
'.txt'),
sep='\t', index_col=(0, 1), converters=p_val_conv)
开发者ID:nbresnick,项目名称:scikit-bio,代码行数:51,代码来源:test_mantel.py
示例11: bioenv
def bioenv(output_dir: str, distance_matrix: skbio.DistanceMatrix,
metadata: qiime2.Metadata) -> None:
# convert metadata to numeric values where applicable, drop the non-numeric
# values, and then drop samples that contain NaNs
df = metadata.to_dataframe()
df = df.apply(lambda x: pd.to_numeric(x, errors='ignore'))
# filter categorical columns
pre_filtered_cols = set(df.columns)
df = df.select_dtypes([numpy.number]).dropna()
filtered_categorical_cols = pre_filtered_cols - set(df.columns)
# filter 0 variance numerical columns
pre_filtered_cols = set(df.columns)
df = df.loc[:, df.var() != 0]
filtered_zero_variance_cols = pre_filtered_cols - set(df.columns)
# filter the distance matrix to exclude samples that were dropped from
# the metadata, and keep track of how many samples survived the filtering
# so that information can be presented to the user.
initial_dm_length = distance_matrix.shape[0]
distance_matrix = distance_matrix.filter(df.index, strict=False)
filtered_dm_length = distance_matrix.shape[0]
result = skbio.stats.distance.bioenv(distance_matrix, df)
result = result.to_html(classes='table table-striped table-hover').replace(
'border="1"', 'border="0"')
index = os.path.join(TEMPLATES, 'bioenv_assets', 'index.html')
q2templates.render(index, output_dir, context={
'initial_dm_length': initial_dm_length,
'filtered_dm_length': filtered_dm_length,
'filtered_categorical_cols': ', '.join(filtered_categorical_cols),
'filtered_zero_variance_cols': ', '.join(filtered_zero_variance_cols),
'result': result})
开发者ID:jairideout,项目名称:diversity,代码行数:35,代码来源:_visualizer.py
示例12: effect_size
def effect_size(mappings, alphas, betas, output, jobs, permutations,
overwrite, na_values):
# As we can have multiple mapping, alpha or beta files, we will construct
# a mfs dictionary with all the dataframes. Additionally, we will load the
# data_dictionary.csv file so we can use it to process the data
mappings = {f: pd.read_csv(f, sep='\t', dtype=str, na_values=na_values)
for f in mappings}
for m, mf in mappings.items():
mappings[m].set_index('#SampleID', inplace=True)
if betas:
betas = {f: DistanceMatrix.read(f) for f in betas}
with joblib.parallel.Parallel(n_jobs=jobs, verbose=100) as par:
par(joblib.delayed(
_process_column)(bf, c, fname, finfo, alphas, betas,
permutations)
for bf, c, fname, finfo in _generate_betas(
betas, mappings, permutations, output, overwrite))
else:
alphas = {f: pd.read_csv(f, sep='\t', dtype=str, na_values=na_values)
for f in alphas}
for a, af in alphas.items():
alphas[a].set_index('#SampleID', inplace=True)
for af, c, fname, finfo in _generate_alphas(alphas, mappings,
output, overwrite):
_process_column(af, c, fname, finfo, alphas, betas, permutations)
开发者ID:antgonza,项目名称:evident,代码行数:27,代码来源:effect_size.py
示例13: setUp
def setUp(self):
# Crawford dataset for unweighted UniFrac
fp = get_data_path('PCoA_sample_data_3')
self.ordination = pcoa(DistanceMatrix.read(fp))
fp = get_data_path('PCoA_biplot_descriptors')
self.descriptors = pd.read_table(fp, index_col='Taxon').T
开发者ID:thermokarst,项目名称:scikit-bio,代码行数:7,代码来源:test_principal_coordinate_analysis.py
示例14: aln_distmat
def aln_distmat(alignment, reps=3):
'''Calculate pairwise distances from a MSA of genomes'''
aln = TabularMSA.read(alignment, constructor=DNA)
aln.reassign_index(minter="id")
dist = DistanceMatrix.from_iterable([seq.values for seq in aln],
metric=hamming, keys=aln.index)
return dist
开发者ID:kdmurray91,项目名称:kwip-experiments,代码行数:7,代码来源:alndist.py
示例15: test_confirm_betadispr_results
def test_confirm_betadispr_results(self):
mp_dm = DistanceMatrix.read(get_data_path('moving_pictures_dm.tsv'))
mp_mf = pd.read_csv(get_data_path('moving_pictures_mf.tsv'), sep='\t')
mp_mf.set_index('#SampleID', inplace=True)
obs_med_mp = permdisp(mp_dm, mp_mf,
column='BodySite')
obs_cen_mp = permdisp(mp_dm, mp_mf, column='BodySite',
test='centroid')
exp_data_m = ['PERMDISP', 'F-value', 33, 4, 10.1956, 0.001, 999]
exp_data_c = ['PERMDISP', 'F-value', 33, 4, 17.4242, 0.001, 999]
exp_ind = ['method name', 'test statistic name', 'sample size',
'number of groups', 'test statistic', 'p-value',
'number of permutations']
exp_med_mp = pd.Series(data=exp_data_m, index=exp_ind, dtype='object',
name='PERMDISP results')
exp_cen_mp = pd.Series(data=exp_data_c, index=exp_ind, dtype='object',
name='PERMDISP results')
self.assert_series_equal(exp_med_mp, obs_med_mp)
self.assert_series_equal(exp_cen_mp, obs_cen_mp)
开发者ID:ElDeveloper,项目名称:biolopy,代码行数:25,代码来源:test_permdisp.py
示例16: guide_tree_from_sequences
def guide_tree_from_sequences(sequences,
metric=kmer_distance,
display_tree = False):
""" Build a UPGMA tree by applying metric to sequences
Parameters
----------
sequences : list of skbio.Sequence objects (or subclasses)
The sequences to be represented in the resulting guide tree.
metric : function
Function that returns a single distance value when given a pair of
skbio.Sequence objects.
display_tree : bool, optional
Print the tree before returning.
Returns
-------
skbio.TreeNode
"""
guide_dm = DistanceMatrix.from_iterable(
sequences, metric=metric, key='id')
guide_lm = average(guide_dm.condensed_form())
guide_tree = to_tree(guide_lm)
if display_tree:
guide_d = dendrogram(guide_lm, labels=guide_dm.ids, orientation='right',
link_color_func=lambda x: 'black')
return guide_tree
开发者ID:lsl5,项目名称:An-Introduction-To-Applied-Bioinformatics,代码行数:28,代码来源:__init__.py
示例17: test_simple
def test_simple(self):
eigvals = [0.51236726, 0.30071909, 0.26791207, 0.20898868,
0.19169895, 0.16054235, 0.15017696, 0.12245775,
0.0]
proportion_explained = [0.2675738328, 0.157044696, 0.1399118638,
0.1091402725, 0.1001110485,
0.0838401162, 0.0784269939,
0.0639511764, 0.0]
sample_ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354',
'PC.593', 'PC.355', 'PC.607', 'PC.634']
axis_labels = ['PC%d' % i for i in range(1, 10)]
expected_results = OrdinationResults(
short_method_name='PCoA',
long_method_name='Principal Coordinate Analysis',
eigvals=pd.Series(eigvals, index=axis_labels),
samples=pd.DataFrame(
np.loadtxt(get_data_path('exp_PCoAEigenResults_site')),
index=sample_ids, columns=axis_labels),
proportion_explained=pd.Series(proportion_explained,
index=axis_labels))
dm = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
results = pcoa(dm)
assert_ordination_results_equal(results, expected_results,
ignore_directionality=True)
开发者ID:ebolyen,项目名称:scikit-bio,代码行数:27,代码来源:test_principal_coordinate_analysis.py
示例18: get_spearmans
def get_spearmans(distfile, truth):
distmat = DistanceMatrix.read(distfile)
ids = list(sorted(distmat.ids))
distmat = distmat.filter(ids)
dist = distmat.condensed_form()
truth = truth.condensed_form()
sp = stats.spearmanr(truth, dist)
return sp.correlation
开发者ID:kdmurray91,项目名称:kwip-experiments,代码行数:8,代码来源:treedist.py
示例19: setup
def setup(self):
with open(get_data_path('PCoA_sample_data_3'), 'U') as lines:
dist_matrix = DistanceMatrix.from_file(lines)
self.ordination = PCoA(dist_matrix)
self.ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593',
'PC.355', 'PC.607', 'PC.634']
开发者ID:jwdebelius,项目名称:scikit-bio,代码行数:8,代码来源:test_ordination.py
示例20: test_from_iterable_validate_false_non_symmetric
def test_from_iterable_validate_false_non_symmetric(self):
exp = DistanceMatrix([[0, 1, 2, 3],
[1, 0, 1, 2],
[2, 1, 0, 1],
[3, 2, 1, 0]])
res = DistanceMatrix.from_iterable((x for x in range(4)),
lambda a, b: a - b,
validate=False)
self.assertEqual(res, exp)
开发者ID:jakereps,项目名称:scikit-bio,代码行数:9,代码来源:test_base.py
注:本文中的skbio.DistanceMatrix类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论