本文整理汇总了Python中qiime.filter.sample_ids_from_metadata_description函数的典型用法代码示例。如果您正苦于以下问题:Python sample_ids_from_metadata_description函数的具体用法?Python sample_ids_from_metadata_description怎么用?Python sample_ids_from_metadata_description使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sample_ids_from_metadata_description函数的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
output_f = open(opts.output_distance_matrix, 'w')
if opts.otu_table_fp:
otu_table = load_table(opts.otu_table_fp)
samples_to_keep = otu_table.ids()
# samples_to_keep = \
# sample_ids_from_otu_table(open(opts.otu_table_fp,'U'))
elif opts.sample_id_fp:
samples_to_keep = \
get_seqs_to_keep_lookup_from_seq_id_file(
open(opts.sample_id_fp, 'U'))
elif opts.mapping_fp and opts.valid_states:
try:
samples_to_keep = sample_ids_from_metadata_description(
open(opts.mapping_fp, 'U'), opts.valid_states)
except ValueError as e:
option_parser.error(e.message)
else:
option_parser.error('must pass either --sample_id_fp, -t, or -m and '
'-s')
# note that negate gets a little weird here. The function we're calling
# removes the specified samples from the distance matrix, but the other
# QIIME filter scripts keep these samples specified. So, the interface of
# this script is designed to keep the specified samples, and therefore
# negate=True is passed to filter_samples_from_distance_matrix by default.
d = filter_samples_from_distance_matrix(
parse_distmat(
open(opts.input_distance_matrix, 'U')),
samples_to_keep,
negate=not opts.negate)
output_f.write(d)
output_f.close()
开发者ID:AhmedAbdelfattah,项目名称:qiime,代码行数:34,代码来源:filter_distance_matrix.py
示例2: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
mapping_fp = opts.input_fp
out_mapping_fp = opts.output_fp
valid_states = opts.valid_states
if opts.sample_id_fp:
valid_sample_ids = \
get_seqs_to_keep_lookup_from_seq_id_file(open(opts.sample_id_fp,'U'))
elif mapping_fp and valid_states:
valid_sample_ids = sample_ids_from_metadata_description(
open(mapping_fp, 'U'), valid_states)
data, headers, _ = parse_mapping_file(open(mapping_fp, 'U'))
good_mapping_file = []
for line in data:
if line[0] in valid_sample_ids:
good_mapping_file.append(line)
lines = format_mapping_file(headers, good_mapping_file)
fd = open(out_mapping_fp, 'w')
fd.write(lines)
fd.close()
开发者ID:ElDeveloper,项目名称:apocaqiime,代码行数:26,代码来源:filter_mapping_file.py
示例3: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
coords_fp = opts.input_coords
mapping_fp = opts.mapping_fp
output_fp = opts.output_fp
valid_states = opts.valid_states
negate = opts.negate
mapping_header_name = opts.mapping_header_name
coords_ids, coords, eigen_values, pct_exp = parse_coords(open(coords_fp, "U"))
data, headers, _ = parse_mapping_file(open(mapping_fp, "U"))
if mapping_fp and valid_states:
valid_sample_ids = sample_ids_from_metadata_description(open(mapping_fp, "U"), valid_states)
valid_coords_ids, valid_coords = filter_sample_ids_from_coords(coords_ids, coords, valid_sample_ids, negate)
if mapping_header_name:
sorted_sample_ids = sort_sample_ids(data, headers, mapping_header_name)
sorted_coord_ids, sorted_coords = sort_coords(valid_coords_ids, valid_coords, sorted_sample_ids)
valid_coords_ids, valid_coords = sorted_coord_ids, sorted_coords
lines = format_coords(valid_coords_ids, valid_coords, eigen_values, pct_exp)
fd = open(output_fp, "w")
fd.writelines(lines)
fd.close
开发者ID:ElDeveloper,项目名称:apocaqiime,代码行数:28,代码来源:filter_coords_from_pcoa.py
示例4: silly_function
def silly_function(ui):
for c_value in ui.series(coloring_values):
sample_ids = sample_ids_from_metadata_description(open(mapping_fp, 'U'),
'%s:%s' % (coloring_header_name, c_value))
_headers, _data = filter_mapping_file(data, headers, sample_ids, True)
per_color_subject_values = list(set([row[subject_index] for row in _data]))
fd = open(join(output_path, 'color_by_'+c_value+'.txt'), 'w')
for s in ui.series(per_color_subject_values):
fd.write('%s\n' % s)
fd.close()
if not suppress_trajectory_files:
for s in ui.series(per_color_subject_values):
filename = join(output_path, s+'.txt')
if opts.verbose:
print 'Working on printing', filename
COMMAND_CALL = FILTER_CMD % (coords_fp, mapping_fp,
'%s:%s' % (subject_header_name, s), filename,
sorting_category)
o, e, r = qiime_system_call(COMMAND_CALL)
if opts.verbose and e:
print 'Error happened on filtering step: \n%s' % e
continue
COMMAND_CALL = CONVERSION_CMD % (filename, filename)
o, e, r = qiime_system_call(COMMAND_CALL)
if opts.verbose and e:
print 'Error happened on conversion step: \n%s' % e
continue # useless here but just in case
开发者ID:ElDeveloper,项目名称:apocaqiime,代码行数:33,代码来源:build_input_files_for_category.py
示例5: make_profiles_by_category
def make_profiles_by_category(mapping_fp, taxa_level, category):
""" Creates a list of profiles for each unique value in the category
Inputs:
mapping_fp: filepath to the mapping file
category: mapping file category to split data over
defaults to HOST_SUBJECT_ID
Returns a dictionary keyed by the values on that category and a list of
profiles as values
"""
# Parse the mapping file
map_f = open(mapping_fp, 'U')
mapping_data, comments = parse_mapping_file_to_dict(map_f)
map_f.close()
# Get a list of unique keys for the specified category
if category == 'SampleID':
result = {}
for sid in mapping_data:
result[sid] = [make_profile_by_sid(mapping_data, sid, taxa_level)]
else:
values = set([mapping_data[sid][category] for sid in mapping_data])
result = {}
# Loop over each value in that category
for value in values:
# Re-open the mapping file
map_f = open(mapping_fp, 'U')
# Get sample ids that match the value
sids = sample_ids_from_metadata_description(map_f,
category+":"+value)
map_f.close()
# Create the list with all the profiles of the sample IDs in this
# category value
result[value] = [make_profile_by_sid(mapping_data,
sid, taxa_level) for sid in sids]
return result
开发者ID:squirrelo,项目名称:SCGM,代码行数:34,代码来源:profile.py
示例6: split_mapping_file_on_field
def split_mapping_file_on_field(mapping_f,
mapping_field,
column_rename_ids=None,
include_repeat_cols=True):
""" split mapping file based on value in field """
mapping_f = list(mapping_f)
mapping_values = get_mapping_values(mapping_f, mapping_field)
mapping_data, mapping_headers, _ = parse_mapping_file(mapping_f)
if column_rename_ids:
try:
column_rename_ids = mapping_headers.index(column_rename_ids)
except ValueError:
raise KeyError("Field is not in mapping file (search is case " +
"and white-space sensitive). \n\tProvided field: " +
"%s. \n\tValid fields: %s" % (mapping_field, ' '.join(mapping_headers)))
for v in mapping_values:
v_fp_str = v.replace(' ', '_')
sample_ids_to_keep = sample_ids_from_metadata_description(
mapping_f, valid_states_str="%s:%s" % (mapping_field, v))
# parse mapping file each time though the loop as filtering operates on
# values
mapping_data, mapping_headers, _ = parse_mapping_file(mapping_f)
mapping_headers, mapping_data = filter_mapping_file(
mapping_data,
mapping_headers,
sample_ids_to_keep,
include_repeat_cols=include_repeat_cols,
column_rename_ids=column_rename_ids)
yield v_fp_str, format_mapping_file(mapping_headers, mapping_data)
开发者ID:TheSchwa,项目名称:qiime,代码行数:35,代码来源:split.py
示例7: split_otu_table_on_sample_metadata
def split_otu_table_on_sample_metadata(otu_table, mapping_f, mapping_field):
""" split otu table into sub otu tables where each represent samples
corresponding to only a certain value in mapping_field
"""
with errstate(empty='raise'):
mapping_f = list(mapping_f)
mapping_values = get_mapping_values(mapping_f, mapping_field)
tables = 0
for v in mapping_values:
v_fp_str = v.replace(' ', '_')
sample_ids_to_keep = sample_ids_from_metadata_description(
mapping_f, valid_states_str="%s:%s" % (mapping_field, v))
try:
# filtering cannot be inplace otherwise we lose data
filtered_otu_table = otu_table.filter(
lambda values, id_, metadata: id_ in sample_ids_to_keep,
axis='sample', inplace=False)
tables += 1
except TableException:
# all samples are filtered out, so no otu table to write
continue
yield v_fp_str, filtered_otu_table
if not tables:
raise OTUTableSplitError(
"Could not split OTU tables! There are no matches between the "
"sample identifiers in the OTU table and the mapping file.")
开发者ID:Honglongwu,项目名称:qiime,代码行数:29,代码来源:split.py
示例8: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
input_fp = opts.input_fp
output_fp = opts.output_fp
mapping_fp = opts.mapping_fp
output_mapping_fp = opts.output_mapping_fp
valid_states = opts.valid_states
min_count = opts.min_count
max_count = opts.max_count
sample_id_fp = opts.sample_id_fp
if mapping_fp is None and valid_states is not None:
option_parser.error("--mapping_fp must be provided if --valid_states " "is passed.")
if not ((mapping_fp and valid_states) or min_count != 0 or not isinf(max_count) or sample_id_fp is not None):
option_parser.error(
"No filtering requested. Must provide either "
"mapping_fp and valid states, min counts, "
"max counts, or sample_id_fp (or some combination "
"of those)."
)
if (mapping_fp and valid_states) and sample_id_fp:
option_parser.error("Providing both --sample_id_fp and " "--mapping_fp/--valid_states is not supported.")
if output_mapping_fp and not mapping_fp:
option_parser.error("Must provide input mapping file to generate" " output mapping file.")
otu_table = load_table(opts.input_fp)
negate_sample_id_fp = opts.negate_sample_id_fp
if mapping_fp and valid_states:
sample_ids_to_keep = sample_ids_from_metadata_description(open(mapping_fp, "U"), valid_states)
negate_sample_id_fp = False
else:
sample_ids_to_keep = otu_table.ids()
if sample_id_fp is not None:
o = open(sample_id_fp, "U")
sample_id_f_ids = set([l.strip().split()[0] for l in o if not l.startswith("#")])
o.close()
sample_ids_to_keep = set(sample_ids_to_keep) & sample_id_f_ids
filtered_otu_table = filter_samples_from_otu_table(
otu_table, sample_ids_to_keep, min_count, max_count, negate_ids_to_keep=negate_sample_id_fp
)
try:
write_biom_table(filtered_otu_table, output_fp)
except EmptyBIOMTableError:
option_parser.error(
"Filtering resulted in an empty BIOM table. " "This indicates that no samples remained after filtering."
)
# filter mapping file if requested
if output_mapping_fp:
mapping_data, mapping_headers, _ = parse_mapping_file(open(mapping_fp, "U"))
mapping_headers, mapping_data = filter_mapping_file(mapping_data, mapping_headers, filtered_otu_table.ids())
open(output_mapping_fp, "w").write(format_mapping_file(mapping_headers, mapping_data))
开发者ID:colinbrislawn,项目名称:qiime,代码行数:59,代码来源:filter_samples_from_otu_table.py
示例9: main
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
input_fp = opts.input_fp
output_fp = opts.output_fp
mapping_fp = opts.mapping_fp
output_mapping_fp = opts.output_mapping_fp
valid_states = opts.valid_states
min_count = opts.min_count
max_count = opts.max_count
sample_id_fp = opts.sample_id_fp
if not ((mapping_fp and valid_states) or
min_count != 0 or
not isinf(max_count) or
sample_id_fp is not None):
option_parser.error("No filtering requested. Must provide either "
"mapping_fp and valid states, min counts, "
"max counts, or sample_id_fp (or some combination "
"of those).")
if output_mapping_fp and not mapping_fp:
option_parser.error("Must provide input mapping file to generate"
" output mapping file.")
otu_table = load_table(opts.input_fp)
if mapping_fp and valid_states:
sample_ids_to_keep = sample_ids_from_metadata_description(
open(mapping_fp, 'U'), valid_states)
else:
sample_ids_to_keep = otu_table.ids()
if sample_id_fp is not None:
sample_id_f_ids = set([l.strip().split()[0]
for l in open(sample_id_fp, 'U') if not l.startswith('#')])
sample_ids_to_keep = set(sample_ids_to_keep) & sample_id_f_ids
filtered_otu_table = filter_samples_from_otu_table(otu_table,
sample_ids_to_keep,
min_count,
max_count)
write_biom_table(filtered_otu_table, output_fp)
# filter mapping file if requested
if output_mapping_fp:
mapping_data, mapping_headers, _ = parse_mapping_file(
open(mapping_fp, 'U'))
mapping_headers, mapping_data = \
filter_mapping_file(
mapping_data,
mapping_headers,
filtered_otu_table.ids())
open(
output_mapping_fp,
'w').write(
format_mapping_file(
mapping_headers,
mapping_data))
开发者ID:cmokeefe,项目名称:qiime,代码行数:59,代码来源:filter_samples_from_otu_table.py
示例10: get_seqs_to_keep_lookup_from_mapping_file
def get_seqs_to_keep_lookup_from_mapping_file(fasta_f, mapping_f, valid_states):
sample_ids = {}.fromkeys(sample_ids_from_metadata_description(mapping_f, valid_states))
seqs_to_keep = []
for seq_id, seq in parse_fasta(fasta_f):
if seq_id.split("_")[0] in sample_ids:
seqs_to_keep.append(seq_id)
else:
continue
return {}.fromkeys(seqs_to_keep)
开发者ID:Bonder-MJ,项目名称:qiime,代码行数:9,代码来源:filter_fasta.py
示例11: split_otu_table_on_sample_metadata
def split_otu_table_on_sample_metadata(otu_table_f, mapping_f, mapping_field):
""" split otu table into sub otu tables where each represent samples corresponding to only a certain value in mapping_field
"""
mapping_f = list(mapping_f)
mapping_values = get_mapping_values(mapping_f, mapping_field)
otu_table = parse_biom_table(otu_table_f)
for v in mapping_values:
v_fp_str = v.replace(' ', '_')
sample_ids_to_keep = sample_ids_from_metadata_description(
mapping_f, valid_states_str="%s:%s" % (mapping_field, v))
try:
filtered_otu_table = otu_table.filterSamples(
lambda values, id_, metadata: id_ in sample_ids_to_keep)
except TableException:
# all samples are filtered out, so no otu table to write
continue
yield v_fp_str, format_biom_table(filtered_otu_table)
开发者ID:TheSchwa,项目名称:qiime,代码行数:19,代码来源:split.py
示例12: main
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
output_f = open(opts.output_distance_matrix,'w')
if opts.otu_table_fp:
otu_table = parse_biom_table(open(opts.otu_table_fp,'U'))
samples_to_keep = otu_table.SampleIds
#samples_to_keep = \
# sample_ids_from_otu_table(open(opts.otu_table_fp,'U'))
elif opts.sample_id_fp:
samples_to_keep = \
get_seqs_to_keep_lookup_from_seq_id_file(open(opts.sample_id_fp,'U'))
elif opts.mapping_fp and opts.valid_states:
try:
samples_to_keep = sample_ids_from_metadata_description(
open(opts.mapping_fp,'U'),opts.valid_states)
except ValueError, e:
option_parser.error(e.message)
开发者ID:Jorge-C,项目名称:qiime,代码行数:19,代码来源:filter_distance_matrix.py
示例13: split_otu_table_on_sample_metadata
def split_otu_table_on_sample_metadata(otu_table, mapping_f, mapping_field):
""" split otu table into sub otu tables where each represent samples corresponding to only a certain value in mapping_field
"""
mapping_f = list(mapping_f)
mapping_values = get_mapping_values(mapping_f, mapping_field)
for v in mapping_values:
v_fp_str = v.replace(' ', '_')
sample_ids_to_keep = sample_ids_from_metadata_description(
mapping_f, valid_states_str="%s:%s" % (mapping_field, v))
try:
# filtering cannot be inplace otherwise we lose data
filtered_otu_table = otu_table.filter(
lambda values, id_, metadata: id_ in sample_ids_to_keep,
axis='observation', inplace=False)
except TableException:
# all samples are filtered out, so no otu table to write
continue
yield v_fp_str, filtered_otu_table
开发者ID:jrherr,项目名称:qiime,代码行数:20,代码来源:split.py
示例14: main
#.........这里部分代码省略.........
# If sequence reinstatement is requested, make sure all necessary options
# are specified
reinstatement_options_counter = 0
if reinstatement_stat_blank:
reinstatement_options_counter += 1
if reinstatement_stat_sample:
reinstatement_options_counter += 1
if reinstatement_differential:
reinstatement_options_counter += 1
if ((reinstatement_options_counter > 0) and
(reinstatement_options_counter < 3)):
option_parser.error("Must provide all of "
"reinstatement_stats_blank, "
"reinstatement_stat_sample, and "
"reinstatement_differential, or none.")
if ((reinstatement_options_counter == 3 and reinstatement_sample_number)
and not reinstatement_method):
option_parser.error("If providing sample number AND abundance criteria "
"for sequence reinstatement, must also provide "
"a method for combining results.")
if reinstatement_options_counter == 3 or reinstatement_sample_number:
reinstatement = True
else:
reinstatement = False
# get blank sample IDs from mapping file or sample ID list
if mapping_fp and valid_states:
blank_sample_ids = sample_ids_from_metadata_description(
open(mapping_fp, 'U'), valid_states)
blanks = True
elif blank_id_fp is not None:
blank_id_f = open(blank_id_fp, 'Ur')
blank_sample_ids = set([line.strip().split()[0]
for line in blank_id_f
if not line.startswith('#')])
blank_id_f.close()
blanks = True
else:
blanks = False
# Initialize output objets
output_dict = {}
contaminant_types = []
contamination_stats_dict = None
contamination_stats_header = None
corr_data_dict = None
# Do blank-based stats calculations, if not there check to make sure no
# blank-dependent methods are requested:
if blanks:
if prescreen_threshold:
low_contam_libraries = prescreen_libraries(unique_seq_biom,
blank_sample_ids,
removal_stat_sample,
removal_stat_blank,
removal_differential,
开发者ID:mortonjt,项目名称:decontaminate,代码行数:67,代码来源:decontaminate_unitary.py
示例15: format_vectors_to_js
def format_vectors_to_js(mapping_file_data, mapping_file_headers, coords_data,
coords_headers, connected_by_header,
sorted_by_header=None):
"""Write a string representing the vectors in a PCoA plot as javascript
Inputs:
mapping_file_data: contents of the mapping file
mapping_file_headers: headers of the mapping file
coords_data: coordinates of the PCoA plot in a numpy 2-D array or a list of
numpy 2-D arrays for jackknifed input
coords_headers: headers of the coords in the PCoA plot or a list of lists
with the headers for jackknifed input
connected_by_header: header of the mapping file that represents how the
lines will be connected
sorted_by_header: numeric-only header name to sort the samples in the
vectors
Output:
js_vectors_string: string that represents the vectors in the shape of a
javascript object
Notes:
If using jackknifed input, the coordinates and headers that will be used are
the ones belonging to the master coords i. e. the first element.
"""
js_vectors_string = []
js_vectors_string.append('\nvar g_vectorPositions = new Array();\n')
if connected_by_header != None:
# check if we are processing jackknifed input, if so just get the master
if type(coords_data) == list:
coords_data = coords_data[0]
coords_headers = coords_headers[0]
columns_to_keep = ['SampleID', connected_by_header]
# do not ad None if sorted_by_header is None or empty
if sorted_by_header:
columns_to_keep.append(sorted_by_header)
# reduce the amount of data by keeping the required fields only
mapping_file_data, mapping_file_headers =\
keep_columns_from_mapping_file(mapping_file_data,
mapping_file_headers, columns_to_keep)
# format the mapping file to use this with the filtering function
mf_string = format_mapping_file(mapping_file_headers, mapping_file_data)
index = mapping_file_headers.index(connected_by_header)
connected_by = list(set([line[index] for line in mapping_file_data]))
for category in connected_by:
# convert to StringIO to for each iteration; else the object
# won't be usable after the first iteration & you'll get an error
sample_ids = sample_ids_from_metadata_description(
StringIO(mf_string),'%s:%s' % (connected_by_header,category))
# if there is a sorting header, sort the coords using these values
if sorted_by_header:
sorting_index = mapping_file_headers.index(sorted_by_header)
to_sort = [line for line in mapping_file_data if line[0] in\
sample_ids]
# get the sorted sample ids from the sorted-reduced mapping file
sample_ids = zip(*sorted(to_sort,
key=lambda x: float(x[sorting_index])))[0]
# each category value is a new vector
js_vectors_string.append("g_vectorPositions['%s'] = new Array();\n"
% (category))
for s in sample_ids:
index = coords_headers.index(s)
# print the first three elements of each coord for each sample
js_vectors_string.append("g_vectorPositions['%s']['%s'] = %s;\n"
% (category, s, coords_data[index, :3].tolist()))
return ''.join(js_vectors_string)
开发者ID:jessicalmetcalf,项目名称:emperor,代码行数:80,代码来源:format.py
示例16: main
#.........这里部分代码省略.........
y_file = StringIO(
filter_samples_from_distance_matrix((y_samples, y_distmtx), ignoring_from_y))
y_samples, y_distmtx = parse_distmat(y_file)
else:
if x_distmtx.shape != y_distmtx.shape:
raise ValueError('The distance matrices have different sizes. ' +
'You can cancel this error by passing --ignore_missing_samples')
figure()
if category is None:
x_val, y_val, x_fit, y_fit, func_text = fit_semivariogram(
(x_samples, x_distmtx), (y_samples, y_distmtx), opts.model, ranges)
plot(
x_val,
y_val,
color=opts.dot_color,
marker=opts.dot_marker,
linestyle="None",
alpha=opts.dot_alpha)
plot(
x_fit,
y_fit,
linewidth=2.0,
color=opts.line_color,
alpha=opts.line_alpha)
else:
# not all the categories that are going to be enumerated are found in
# the distance matrices i.e. the mapping file is a superset that can
# contain more samples than the distance matrices
used_categories = deepcopy(categories)
for index, single_category in enumerate(categories):
good_sample_ids = sample_ids_from_metadata_description(
open(mapping_fp), '%s:%s' % (category, single_category))
try:
_y_samples, _y_distmtx = parse_distmat(StringIO(
filter_samples_from_distance_matrix((y_samples, y_distmtx),
good_sample_ids, negate=True)))
_x_samples, _x_distmtx = parse_distmat(StringIO(
filter_samples_from_distance_matrix((x_samples, x_distmtx),
good_sample_ids, negate=True)))
except ValueError:
# no samples found for this category
used_categories.remove(single_category)
continue
x_val, y_val, x_fit, y_fit, func_text = fit_semivariogram(
(_x_samples, _x_distmtx), (_y_samples, _y_distmtx),
opts.model, ranges)
# retrieve one of the colors the "QIIME" colors and add it to the
# list of used colors for the creation of the legends in the plot
color_only = get_qiime_hex_string_color(index)
colors_used.append(color_only)
plot(x_val, y_val, color=color_only, marker=opts.dot_marker,
linestyle="None", alpha=opts.dot_alpha)
plot(x_fit, y_fit, linewidth=2.0, color=color_only,
alpha=opts.line_alpha, label=single_category)
# set plot limits if requested
x_lb, x_ub = xlim()
y_lb, y_ub = ylim()
if opts.x_min is not None:
开发者ID:Springbudder,项目名称:qiime,代码行数:67,代码来源:plot_semivariogram.py
示例17: main
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
mapping_fp = opts.mapping_fp
state_values = opts.state_values.split(',')
metadata_categories = opts.metadata_categories
state_category = opts.state_category
individual_id_category = opts.individual_id_category
output_dir = opts.output_dir
biom_table_fp = opts.biom_table_fp
observation_ids = opts.observation_ids
if not observation_ids is None:
observation_ids = observation_ids.split(',')
valid_states = opts.valid_states
ymin = opts.ymin
ymax = opts.ymax
line_color = opts.line_color
# validate the input - currently only supports either biom data
# or mapping file data. if useful in the future it shouldn't be too
# hard to allow the user to provide both.
if metadata_categories and biom_table_fp:
option_parser.error(
"Can only pass --metadata_categories or --biom_table_fp, not both.")
elif not (metadata_categories or biom_table_fp):
option_parser.error(
"Must pass either --metadata_categories or --biom_table_fp.")
else:
pass
# parse the mapping file to a dict
mapping_data = parse_mapping_file_to_dict(open(mapping_fp, 'U'))[0]
# currently only support for pre/post (ie, two-state) tests
if len(state_values) != 2:
option_parser.error(
"Exactly two state_values must be passed separated by a comma.")
# filter mapping_data, if requested
if valid_states:
sample_ids_to_keep = sample_ids_from_metadata_description(
open(mapping_fp, 'U'), valid_states)
for sid in mapping_data.keys():
if sid not in sample_ids_to_keep:
del mapping_data[sid]
if biom_table_fp:
biom_table = parse_biom_table(open(biom_table_fp, 'U'))
analysis_categories = observation_ids or biom_table.ObservationIds
personal_ids_to_state_values = \
extract_per_individual_state_metadata_from_sample_metadata_and_biom(
mapping_data,
biom_table,
state_category,
state_values,
individual_id_category,
observation_ids=analysis_categories)
else:
analysis_categories = metadata_categories.split(',')
personal_ids_to_state_values = \
extract_per_individual_state_metadata_from_sample_metadata(
mapping_data,
state_category,
state_values,
individual_id_category,
analysis_categories)
paired_difference_analyses(personal_ids_to_state_values,
analysis_categories,
state_values,
output_dir,
line_color=line_color,
ymin=ymin,
ymax=ymax)
开发者ID:Bonder-MJ,项目名称:qiime,代码行数:75,代码来源:identify_paired_differences.py
示例18: main
def main():
option_parser, opts, args =\
parse_command_line_parameters(**script_info)
input_fp = opts.input_fp
output_dir = opts.output_dir
if opts.num_fraction_for_core_steps < 2:
option_parser.error(
"Must perform at least two steps. Increase --num_fraction_for_core_steps.")
fractions_for_core = np.linspace(opts.min_fraction_for_core,
opts.max_fraction_for_core,
opts.num_fraction_for_core_steps)
otu_md = opts.otu_md
valid_states = opts.valid_states
mapping_fp = opts.mapping_fp
create_dir(output_dir)
if valid_states and opts.mapping_fp:
sample_ids = sample_ids_from_metadata_description(
open(mapping_fp, 'U'),
valid_states)
if len(sample_ids) < 1:
option_parser.error(
"--valid_states pattern didn't match any entries in mapping file: \"%s\"" %
valid_states)
else:
# get core across all samples if user doesn't specify a subset of the
# samples to work with
sample_ids = None
input_table = parse_biom_table(open(input_fp, 'U'))
otu_counts = []
summary_figure_fp = join(output_dir, 'core_otu_size.pdf')
for fraction_for_core in fractions_for_core:
# build a string representation of the fraction as that gets used
# several times
fraction_for_core_str = "%1.0f" % (fraction_for_core * 100.)
# prep output files
output_fp = join(
output_dir,
'core_otus_%s.txt' %
fraction_for_core_str)
output_table_fp = join(
output_dir,
'core_table_%s.biom' %
fraction_for_core_str)
output_f = open(output_fp, 'w')
try:
core_table = filter_table_to_core(input_table,
sample_ids,
fraction_for_core)
except TableException:
output_f.write(
"# No OTUs present in %s %% of samples." %
fraction_for_core_str)
output_f.close()
otu_counts.append(0)
continue
# write some header information to file
if sample_ids is None:
output_f.write(
"# Core OTUs across %s %% of samples.\n" %
fraction_for_core_str)
else:
output_f.write(
"# Core OTUs across %s %% of samples matching the sample metadata pattern \"%s\":\n# %s\n" %
(fraction_for_core_str, valid_states, ' '.join(sample_ids)))
# write the otu id and corresponding metadata for all core otus
otu_count = 0
for value, id_, md in core_table.iter(axis='observation'):
output_f.write('%s\t%s\n' % (id_, md[otu_md]))
otu_count += 1
output_f.close()
# write the core biom table
write_biom_table(core_table, output_table_fp)
# append the otu count to the list of counts
otu_counts.append(otu_count)
plot(fractions_for_core, otu_counts)
xlim(min(fractions_for_core), max(fractions_for_core))
ylim(0, max(otu_counts) + 1)
xlabel(
"Fraction of samples that OTU must be observed in to be considered 'core'")
ylabel("Number of OTUs")
savefig(summary_figure_fp)
开发者ID:Kleptobismol,项目名称:qiime,代码行数:95,代码来源:compute_core_microbiome.py
示例19: get_seqs_to_keep_lookup_from_mapping_file
def get_seqs_to_keep_lookup_from_mapping_file(mapping_f, valid_states):
sample_ids = set(sample_ids_from_metadata_description(mapping_f,
valid_states))
return sample_ids
开发者ID:AhmedAbdelfattah,项目名称:qiime,代码行数:4,代码来源:filter_fasta.py
注:本文中的qiime.filter.sample_ids_from_metadata_description函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论