• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python util.MetadataMap类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中qiime.util.MetadataMap的典型用法代码示例。如果您正苦于以下问题:Python MetadataMap类的具体用法?Python MetadataMap怎么用?Python MetadataMap使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了MetadataMap类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: setUp

    def setUp(self):
        self.otu_map1 = [('0',['seq1','seq2','seq5']),
                         ('1',['seq3','seq4']),
                         ('2',['seq6','seq7','seq8'])]
        self.tmp_fp1 = get_tmp_filename(prefix='FormatTests_',suffix='.txt')
        self.tmp_fp2 = get_tmp_filename(prefix='FormatTests_',suffix='.txt')
        self.files_to_remove = []

        self.add_taxa_summary = {'s1':[1,2],'s2':[3,4]}
        self.add_taxa_header = ['sample_id','foo','bar']
        self.add_taxa_order = ['a;b;c','d;e;f']
        self.add_taxa_mapping = [['s1','something1','something2'],
                                 ['s2','something3','something4'],
                                 ['s3','something5','something6']]
        self.biom1 = parse_biom_table(biom1.split('\n'))
        
        self.expected_formatted_html_no_errors_warnings =\
         expected_formatted_html_no_errors_warnings
        self.expected_formatted_html_errors =\
         expected_formatted_html_errors
        self.expected_formatted_html_warnings =\
         expected_formatted_html_warnings
        self.expected_formatted_html_data_nonloc_error =\
         expected_formatted_html_data_nonloc_error

        # For testing formatting of correlation vectors.
        self.corr_vec1 = [('S1', 'T1', 0.7777777777, 0, 0, 0, 0, (0.5, 1.0))]
        self.corr_vec2 = [('S1', 'T1', 0.7777777777, 0, 0, 0, 0, (0.5, 1.0)),
                          ('S2', 'T2', 0.1, 0.05, 0.15, 0.04, 0.12,
                           (-0.1, 0.2)),
                          ('S3', 'T3', 100.68, 0.9, 1, 1, 1, (-0.4, -0.2))]
        self.corr_vec3 = [('S1', 'T1', 0.7777777777, 0, 0, 0, 0, (None, None))]

        # For testing statistical method formatters.
        self.overview_dm = DistanceMatrix.parseDistanceMatrix(
                overview_dm_lines)
        self.overview_map = MetadataMap.parseMetadataMap(overview_map_lines)

        self.soils_dm = DistanceMatrix.parseDistanceMatrix(soils_dm_lines)
        self.soils_map = MetadataMap.parseMetadataMap(soils_map_lines)

        self.anosim_overview = Anosim(self.overview_map, self.overview_dm,
                                      'Treatment')
        self.permanova_overview = Permanova(self.overview_map,
                                            self.overview_dm, 'Treatment')
        self.best_overview = Best(self.overview_dm, self.overview_map, ['DOB'])
        self.best_88_soils = Best(self.soils_dm, self.soils_map,
                ['TOT_ORG_CARB', 'SILT_CLAY', 'ELEVATION',
                 'SOIL_MOISTURE_DEFICIT', 'CARB_NITRO_RATIO',
                 'ANNUAL_SEASON_TEMP', 'ANNUAL_SEASON_PRECPT', 'PH',
                 'CMIN_RATE', 'LONGITUDE', 'LATITUDE'])
开发者ID:oscaredd,项目名称:qiime,代码行数:51,代码来源:test_format.py


示例2: choose_cluster_subsets

def choose_cluster_subsets(otu_table_f, map_f, category, num_total_samples):
    otu_table = parse_biom_table(otu_table_f)
    metadata_map = MetadataMap.parseMetadataMap(map_f)

    # Dirty... :(
    try:
        map_f.seek(0)
    except AttributeError:
        pass

    if num_total_samples > len(otu_table.SampleIds):
        raise InvalidSubsetSize("Too many total samples (%d) were specified "
                                "as a subset size. There are only %d total "
                                "samples to choose a subset from." %
                                (num_total_samples, len(otu_table.SampleIds)))

    category_map = defaultdict(list)
    for samp_id in metadata_map.SampleIds:
        # Mapping files can have more samples than OTU tables.
        if samp_id in otu_table.SampleIds:
            category_val = metadata_map.getCategoryValue(samp_id, category)
            category_map[category_val].append(samp_id)

    samp_ids_to_keep, extra_samps = _choose_items_from_clusters(
            category_map, otu_table.SampleIds, num_total_samples)
    samp_ids_to_keep.extend(extra_samps)

    assert len(samp_ids_to_keep) == num_total_samples, \
           "%d != %d" % (len(samp_ids_to_keep), num_total_samples)
    assert len(samp_ids_to_keep) == len(set(samp_ids_to_keep)), \
           "Duplicate sample IDs in subset"

    return (filter_samples_from_otu_table(otu_table, samp_ids_to_keep, 0, inf),
            filter_mapping_file_from_mapping_f(map_f, samp_ids_to_keep))
开发者ID:gregcaporaso,项目名称:microbiogeo,代码行数:34,代码来源:simulate.py


示例3: main

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    verbose = opts.verbose
    output_fp = opts.output_fp
    mapping_files = [open(fp, 'U') for fp in opts.mapping_fps]
    no_data_value = opts.no_data_value

    mapping_data = MetadataMap.mergeMappingFiles(mapping_files,
                                                 no_data_value=no_data_value)

    with open(output_fp, 'w') as f:
        f.write(str(mapping_data))
开发者ID:Bonder-MJ,项目名称:qiime,代码行数:13,代码来源:merge_mapping_files.py


示例4: _generate_alpha_diversity_boxplots

def _generate_alpha_diversity_boxplots(collated_adiv_dir, map_fp,
                                       split_category, comparison_category,
                                       rarefaction_depth, output_dir):
    """Generates per-body-site self vs. other alpha diversity boxplots.

    Creates a plot for each input collated alpha diversity file (i.e. metric)
    in collated_adiv_dir. Returns a list of plot filenames that were created in
    output_dir.

    Arguments:
        collated_adiv_dir - path to directory containing one or more collated
            alpha diversity files
        map_fp - filepath to metadata mapping file
        split_category - category to split on, e.g. body site. A boxplot will
            be created for each category value (e.g. tongue, palm, etc.)
        comparison_category - category to split on within each of the split
            categories (e.g. self, other)
        rarefaction_depth - rarefaction depth to use when pulling data from
            rarefaction files
        output_dir - directory to write output plot images to
    """
    metadata_map = MetadataMap.parseMetadataMap(open(map_fp, 'U'))
    collated_adiv_fps = glob(join(collated_adiv_dir, '*.txt'))
    plot_title = 'Alpha diversity (%d seqs/sample)' % rarefaction_depth

    # Generate a plot for each collated alpha diversity metric file.
    created_files = []
    for collated_adiv_fp in collated_adiv_fps:
        adiv_metric = splitext(basename(collated_adiv_fp))[0]

        x_tick_labels, dists = _collect_alpha_diversity_boxplot_data(
                open(collated_adiv_fp, 'U'), metadata_map, rarefaction_depth,
                split_category, comparison_category)

        plot_figure = generate_box_plots(dists,
                                         x_tick_labels=x_tick_labels,
                                         title=plot_title,
                                         x_label='Grouping',
                                         y_label=format_title(adiv_metric))
        plot_fp = join(output_dir, '%s.png' % adiv_metric)
        plot_figure.savefig(plot_fp)
        created_files.append(basename(plot_fp))

    return created_files
开发者ID:biocore,项目名称:my-microbes,代码行数:44,代码来源:util.py


示例5: subset_groups

def subset_groups(dm_f, map_f, category, max_group_size):
    dm_labels, dm_data = parse_distmat(dm_f)
    metadata_map = MetadataMap.parseMetadataMap(map_f)

    category_map = defaultdict(list)
    for samp_id in metadata_map.SampleIds:
        # Mapping files can have more samples than distance matrices, which can
        # happen in this case since we are dealing with rarefied OTU tables
        # (samples get dropped).
        if samp_id in dm_labels:
            category_val = metadata_map.getCategoryValue(samp_id, category)
            category_map[category_val].append(samp_id)

    samp_ids_to_keep = []
    for category_val, samp_ids in category_map.items():
        samp_ids_to_keep.extend(
                sample(samp_ids, min(max_group_size, len(samp_ids))))

    return filter_samples_from_distance_matrix((dm_labels, dm_data),
                                               samp_ids_to_keep, negate=True)
开发者ID:gregcaporaso,项目名称:microbiogeo,代码行数:20,代码来源:util.py


示例6: check_mapping_file_category

def check_mapping_file_category(loaded_biom, mapping_fp, mapping_category, subcategory_1, subcategory_2):
    #remove mapping file samples that are not in the input BIOM table
    with open(mapping_fp, 'U') as map_f:
        md_map = MetadataMap.parseMetadataMap(map_f)
    md_map.filterSamples(loaded_biom.ids(axis='sample'), strict=True)

    if mapping_category not in md_map.CategoryNames:
        raise ValueError("category '%s' not found in mapping file "
                         "columns." % mapping_category)

    all_subcategories = md_map.getCategoryValues(md_map.sample_ids, mapping_category)

    if subcategory_1 not in all_subcategories:
        raise ValueError("subcategory_1 (-x) '%s' not found in selected "
                         "mapping file column." % subcategory_1)

    if subcategory_2 not in all_subcategories:
        raise ValueError("subcategory_2 (-y) '%s' not found in selected "
                         "mapping file column." % subcategory_2)

    if subcategory_2 == subcategory_1:
        raise ValueError("subcategory_1 (-x) must be different from subcategory_2 (-y)")
开发者ID:ElDeveloper,项目名称:qiime,代码行数:22,代码来源:differential_abundance.py


示例7: sample_ids_from_category_state_coverage

def sample_ids_from_category_state_coverage(mapping_f,
                                            coverage_category,
                                            subject_category,
                                            min_num_states=None,
                                            required_states=None,
                                            considered_states=None,
                                            splitter_category=None):
    """Filter sample IDs based on subject's coverage of a category.

    Given a category that groups samples by subject (subject_category), samples
    are filtered by how well a subject covers (i.e. has at least one sample
    for) the category states in coverage_category.

    Two filtering criteria are provided (min_num_states and required_states). At
    least one must be provided. If both are provided, the subject must meet
    both criteria to pass the filter (i.e. providing both filters is an AND,
    not an OR, operation).

    A common use case is to provide a 'time' category for coverage_category and
    an 'individual' category for subject_category in order to filter out
    individuals from a study that do not have samples for some minimum number
    of timepoints (min_num_states) and that do not have samples for certain
    timepoints (required_states). For example, this could be the first and last
    timepoints in the study.

    Returns a set of sample IDs to keep, the number of subjects that were
    kept, and a set of the unique category states in coverage_category that
    were kept. The set of sample IDs is not guaranteed to be in any specific
    order relative to the order of sample IDs or subjects in the mapping file.

    Arguments:
        mapping_f - metadata mapping file (file-like object)
        coverage_category - category to test subjects' coverage (string)
        subject_category - category to group samples by subject (string)
        min_num_states - minimum number of category states in coverage_category
            that a subject must cover (i.e. have at least one sample for) to be
            included in results (integer)
        required_states - category states in coverage_category that must be
            covered by a subject's samples in order to be included in results
            (list of strings or items that can be converted to strings)
        considered_states - category states that are counted toward the 
            min_num_states (list of strings or items that can be converted to
            strings)
        splitter_category - category to split input mapping file on prior to
            processing. If not supplied, the mapping file will not be split. If
            supplied, a dictionary mapping splitter_category state to results
            will be returned instead of the three-element tuple. The supplied
            filtering criteria will apply to each split piece of the mapping
            file independently (e.g. if an individual passes the filters for
            the tongue samples, his/her tongue samples will be included for
            the tongue results, even if he/she doesn't pass the filters for the 
            palm samples)
    """
    metadata_map = MetadataMap.parseMetadataMap(mapping_f)

    # Make sure our input looks sane.
    categories_to_test = [coverage_category, subject_category]
    if splitter_category is not None:
        categories_to_test.append(splitter_category)

    if 'SampleID' in categories_to_test:
        raise ValueError("The 'SampleID' category is not suitable for use in "
                         "this function. Please choose a different category "
                         "from the metadata mapping file.")

    for category in categories_to_test:
        if category not in metadata_map.CategoryNames:
            raise ValueError("The category '%s' is not in the metadata "
                             "mapping file." % category)

    if len(set(categories_to_test)) < len(categories_to_test):
        raise ValueError("The coverage, subject, and (optional) splitter "
                         "categories must all be unique.")

    if required_states is not None:
        # required_states must be in coverage_category's states in the mapping
        # file.
        required_states = set(map(str,required_states))
        valid_coverage_states = set(metadata_map.getCategoryValues(
            metadata_map.SampleIds, coverage_category))
        invalid_coverage_states = required_states - valid_coverage_states

        if invalid_coverage_states:
            raise ValueError("The category state(s) '%s' are not in the '%s' "
                             "category in the metadata mapping file." %
                             (', '.join(invalid_coverage_states),
                              coverage_category))

    if considered_states is not None:
        # considered_states is not as restrictive as required_states - we don't 
        # require that these are present, so it's OK if some of the states
        # listed here don't actually show up in the mapping file (allowing
        # the user to pass something like range(100) to consider only states
        # that fall in some range)
        considered_states = set(map(str,considered_states))
        # define a function to determine if a state should be considered
        consider_state = lambda s: s in considered_states
    else:
        # define a dummy function to consider all states (the default
        # if the user does not provide a list of considered_states)
#.........这里部分代码省略.........
开发者ID:kartoffelpuffer,项目名称:qiime,代码行数:101,代码来源:filter.py


示例8: MetadataMap

            if '&&' in col:
                for _col in col.split('&&'):
                    if _col not in lookup_header:
                        offending_fields.append(col)
            elif col not in lookup_header:
                offending_fields.append(col)
    else:
        # if the user didn't specify the header names display everything
        color_by_column_names = header[:]

    # extract a list of the custom axes provided and each element is numeric
    if custom_axes:
        custom_axes = custom_axes.strip().strip("'").strip('"').split(',')

        # the MetadataMap object makes some checks easier
        map_object = MetadataMap(mapping_file_to_dict(mapping_data, header), [])
        for axis in custom_axes:
            # append the field to the error queue that it belongs to
            if axis not in lookup_header:
                offending_fields.append(axis)
                break
            # make sure this value is in the mapping file
            elif axis not in color_by_column_names:
                color_by_column_names.append(axis)
        # perform only if the for loop does not call break
        else:
            # make sure all these axes are numeric
            for axis in custom_axes:
                if map_object.isNumericCategory(axis) == False:
                    non_numeric_categories.append(axis)
开发者ID:jessicalmetcalf,项目名称:emperor,代码行数:30,代码来源:make_emperor.py


示例9: main

def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create the output dir if it doesn't already exist.
    try:
        if not path.exists(opts.output_dir):
            create_dir(opts.output_dir)
    except:
        option_parser.error("Could not create or access output directory "
                            "specified with the -o option.")

    # Parse the mapping file and distance matrix.
    md_map = MetadataMap.parseMetadataMap(open(opts.mapping_file,'U'))
    dm = DistanceMatrix.parseDistanceMatrix(open(opts.input_dm,'U'))

    # Separate all categories into a list, then grab the first category.
    categories = opts.categories.split(',')

    # Cursory check to make sure all categories passed in are in mapping file.
    maps = parse_mapping_file(open(opts.mapping_file,'U').readlines())
    for category in categories:
        if not category in maps[1][1:]:
            option_parser.error("Category '%s' not found in mapping file "
                                "columns:" % category)

    # Make sure the input distance matrix is symmetric and hollow. Must check
    # here before allowing R to use it, as R will silently ignore the diagonal
    # and upper triangle of the distance matrix.
    if not dm.is_symmetric_and_hollow():
        option_parser.error("The distance matrix must be symmetric and "
                            "hollow.")

    # Figure out which method we need to run.
    if opts.method == 'adonis':
        command_args = ["-d " + opts.input_dm + " -m " + opts.mapping_file + \
            " -c " + categories[0] + " -o " + opts.output_dir + " -n " + \
            str(opts.num_permutations)]
        rex = RExecutor()
        rex(command_args, "adonis.r", output_dir=opts.output_dir)
    elif opts.method == 'anosim':
        anosim = Anosim(md_map, dm, categories[0])
        anosim_results = anosim(opts.num_permutations)

        output_file = open(opts.output_dir + "/" + opts.method + \
            "_results.txt", "w+")
        output_file.write("Method Name\tR-value\tP-value")
        output_file.write("\n")
        output_file.write(anosim_results["method_name"]+"\t"+\
            str(anosim_results["r_value"])+"\t"+\
            str(anosim_results["p_value"])+"\t")
        output_file.write("\n")
        output_file.close()
    elif opts.method == 'best':
        bioenv = BioEnv(dm, md_map, categories)
        bioenv_results = bioenv()

        output_file = open(opts.output_dir+"/best_results.txt", 'w+')
        output_file.write("Method Name:\tNum_Vars:\t")
        output_file.write("\n")
        output_file.write(bioenv_results["method_name"]+"\t"+\
            str(bioenv_results["num_vars"]) + "\t")
        output_file.write("\n")
        output_file.write("Variables:\t")
        output_file.write("\n")
        for variable in bioenv_results["vars"]:
            output_file.write(str(variable) + "\t")
        output_file.write("\n")
        output_file.write("RHO_Values:\t")
        output_file.write("\n")
        for rho_val in bioenv_results["bioenv_rho_vals"]:
            output_file.write(str(rho_val) + "\t")
        output_file.write("\n")
        output_file.close()
    elif opts.method == 'morans_i':
        command_args = ["-i " + opts.input_dm + " -m " + opts.mapping_file + \
            " -c " + categories[0] + " -o " + opts.output_dir]
        rex = RExecutor()
        rex(command_args, "morans_i.r", output_dir=opts.output_dir)
    elif opts.method == 'mrpp':
        command_args = ["-d " + opts.input_dm + " -m " + opts.mapping_file + \
            " -c " + categories[0] + " -o " + opts.output_dir + \
            " -n " + str(opts.num_permutations)]
        rex = RExecutor()
        rex(command_args, "mrpp.r", output_dir=opts.output_dir)
    elif opts.method == 'permanova':
        permanova_plain = Permanova(md_map, dm, categories[0])
        permanova_results = permanova_plain(opts.num_permutations)

        output_file = open(opts.output_dir+"/permanova_results.txt", 'w+')
        output_file.write("Method Name\tF-value\tP-value")
        output_file.write("\n")
        output_file.write(permanova_results["method_name"]+"\t"+\
            str(permanova_results["f_value"]) + "\t" + \
            format_p_value_for_num_iters(permanova_results["p_value"], \
            opts.num_permutations)+"\t")
        output_file.write("\n")
        output_file.close()
    elif opts.method == 'permdisp':
        command_args = ["-d " + opts.input_dm + " -m " + opts.mapping_file + \
            " -c " + categories[0] + " -o " + opts.output_dir + " -n " + \
#.........这里部分代码省略.........
开发者ID:clozupone,项目名称:qiime,代码行数:101,代码来源:compare_categories.py


示例10: compare_categories

def compare_categories(dm_fp, map_fp, method, categories, num_perms, out_dir):
    """Runs the specified statistical method using the category of interest.

    This method does not return anything; all output is written to results
    files in out_dir.

    Arguments:
        dm_fp - filepath to the input distance matrix
        map_fp - filepath to the input metadata mapping file
        categories - list of categories in the metadata mapping file to
            consider in the statistical test. Multiple categories will only be
            considered if method is 'best', otherwise only the first category
            will be considered
        num_perms - the number of permutations to use when calculating the
            p-value. If method is 'best' or 'morans_i', this parameter will be
            ignored as they are not permutation-based methods
        out_dir - path to the output directory where results files will be
            written. It is assumed that this directory already exists and we
            have write permissions to it
    """

    # Make sure we were passed a list of categories, not a single string.
    if not isinstance(categories, ListType):
        raise TypeError("The supplied categories must be a list of "
                        "strings.")

    # Special case: we do not allow SampleID as it is not a category, neither
    # in data structure representation nor in terms of a statistical test (no
    # groups are formed since all entries are unique IDs).
    if 'SampleID' in categories:
        raise ValueError("Cannot use SampleID as a category because it is a "
                         "unique identifier for each sample, and thus does "
                         "not create groups of samples (nor can it be used as "
                         "a numeric category in Moran's I or BEST analyses). "
                         "Please use a different metadata column to perform "
                         "statistical tests on.")

    # Parse the mapping file and distance matrix.
    with open(map_fp, 'U') as map_f:
        md_map = MetadataMap.parseMetadataMap(map_f)

    with open(dm_fp, 'U') as dm_f:
        dm = SymmetricDistanceMatrix.from_file(dm_f)

    # Remove any samples from the mapping file that aren't in the distance
    # matrix (important for validation checks). Use strict=True so that an
    # error is raised if the distance matrix contains any samples that aren't
    # in the mapping file.
    md_map.filterSamples(dm.sample_ids, strict=True)

    # Run the specified statistical method.
    if method in ['adonis', 'morans_i', 'mrpp', 'permdisp', 'dbrda']:
        # These methods are run in R. Input validation must be done here before
        # running the R commands. The pure-Python implementations perform all
        # validation in the classes in the stats module.

        # Check to make sure all categories passed in are in mapping file and
        # are not all the same value.
        for category in categories:
            if not category in md_map.CategoryNames:
                raise ValueError("Category '%s' not found in mapping file "
                                 "columns." % category)

            if md_map.hasSingleCategoryValue(category):
                raise ValueError("All values in category '%s' are the "
                                 "same. The statistical method '%s' cannot "
                                 "operate on a category that creates only "
                                 "a single group of samples (e.g. there "
                                 "are no 'between' distances because "
                                 "there is only a single group)."
                                 % (category, method))

        # Build the command arguments string.
        command_args = ['-d %s -m %s -c %s -o %s'
                        % (dm_fp, map_fp, categories[0], out_dir)]

        if method == 'morans_i':
            # Moran's I requires only numeric categories.
            for category in categories:
                if not md_map.isNumericCategory(category):
                    raise TypeError("The category '%s' is not numeric. Not "
                                    "all values could be converted to numbers."
                                    % category)
        else:
            # The rest require groups of samples, so the category values cannot
            # all be unique.
            for category in categories:
                if md_map.hasUniqueCategoryValues(category):
                    raise ValueError("All values in category '%s' are unique. "
                                     "This statistical method cannot operate "
                                     "on a category with unique values (e.g. "
                                     "there are no 'within' distances because "
                                     "each group of samples contains only a "
                                     "single sample)." % category)

            # Only Moran's I doesn't accept a number of permutations.
            if num_perms < 0:
                raise ValueError("The number of permutations must be greater "
                                 "than or equal to zero.")

#.........这里部分代码省略.........
开发者ID:askerdb,项目名称:qiime,代码行数:101,代码来源:compare_categories.py


示例11: run_core_diversity_analyses

def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_otu_category_significance=False,
    status_update_callback=print_to_stdout,
):
    """
    """
    if categories != None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = parse_mapping_file_to_dict(open(mapping_fp, "U"))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError, (
                    "Category '%s' is not a column header "
                    "in your mapping file. "
                    "Categories are case and white space sensitive. Valid "
                    "choices are: (%s)" % (c, ", ".join(metadata_map.CategoryNames))
                )
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError, (
                    "Category '%s' contains only one value. "
                    "Categories analyzed here require at least two values." % c
                )

    else:
        categories = []

    # prep some variables
    if params == None:
        params = parse_qiime_parameters([])

    create_dir(output_dir)
    index_fp = "%s/index.html" % output_dir
    index_links = []
    commands = []

    # begin logging
    log_fp = generate_log_fp(output_dir)
    index_links.append(("Master run log", log_fp, _index_headers["run_summary"]))
    logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
    input_fps = [biom_fp, mapping_fp]
    if tree_fp != None:
        input_fps.append(tree_fp)
    log_input_md5s(logger, input_fps)

    # run print_biom_table_summary.py on input BIOM table
    try:
        params_str = get_params_str(params["print_biom_table_summary"])
    except KeyError:
        params_str = ""
    biom_table_stats_output_fp = "%s/biom_table_summary.txt" % output_dir
    print_biom_table_summary_cmd = "print_biom_table_summary.py -i %s -o %s --suppress_md5 %s" % (
        biom_fp,
        biom_table_stats_output_fp,
        params_str,
    )
    index_links.append(("BIOM table statistics", biom_table_stats_output_fp, _index_headers["run_summary"]))
    commands.append([("Generate BIOM table summary", print_biom_table_summary_cmd)])

    # filter samples with fewer observations than the requested sampling_depth.
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" % (
        biom_fp,
        filtered_biom_fp,
        sampling_depth,
    )
    commands.append(
        [
            (
                "Filter low sequence count samples from table (minimum sequence count: %d)" % sampling_depth,
                filter_samples_cmd,
            )
        ]
    )
    biom_fp = filtered_biom_fp

    # run initial commands and reset the command list
    command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
    commands = []

    if not suppress_beta_diversity:
        bdiv_even_output_dir = "%s/bdiv_even%d/" % (output_dir, sampling_depth)
#.........这里部分代码省略.........
开发者ID:kartoffelpuffer,项目名称:qiime,代码行数:101,代码来源:core_diversity_analyses.py


示例12: run_core_diversity_analyses

def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_otu_category_significance=False,
    status_update_callback=print_to_stdout):
    """
    """
    if categories != None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = \
         parse_mapping_file_to_dict(open(mapping_fp,'U'))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError, ("Category '%s' is not a column header "
                 "in your mapping file. "
                 "Categories are case and white space sensitive. Valid "
                 "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError, ("Category '%s' contains only one value. "
                 "Categories analyzed here require at least two values." % c)
            
    else:
        categories= []
    
    # prep some variables
    if params == None:
        params = parse_qiime_parameters([])
        
    create_dir(output_dir)
    index_fp = '%s/index.html' % output_dir
    index_links = []
    commands = []
    
    # begin logging
    old_log_fps = glob(join(output_dir,'log_20*txt'))
    log_fp = generate_log_fp(output_dir)
    index_links.append(('Master run log',log_fp,_index_headers['run_summary']))
    for old_log_fp in old_log_fps:
        index_links.append(('Previous run log',old_log_fp,_index_headers['run_summary']))
    logger = WorkflowLogger(log_fp,
                            params=params,
                            qiime_config=qiime_config)
    input_fps = [biom_fp,mapping_fp]
    if tree_fp != None:
        input_fps.append(tree_fp)
    log_input_md5s(logger,input_fps)

    # run 'biom summarize-table' on input BIOM table
    try:
        params_str = get_params_str(params['biom-summarize-table'])
    except KeyError:
        params_str = ''
    biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
    if not exists(biom_table_stats_output_fp):
        biom_table_summary_cmd = \
         "biom summarize-table -i %s -o %s --suppress-md5 %s" % \
         (biom_fp, biom_table_stats_output_fp,params_str)
        commands.append([('Generate BIOM table summary',
                          biom_table_summary_cmd)])
    else:
        logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" \
                     % biom_table_stats_output_fp)
    index_links.append(('BIOM table statistics',
                        biom_table_stats_output_fp,
                        _index_headers['run_summary']))
    
    # filter samples with fewer observations than the requested sampling_depth. 
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered 
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    if not exists(filtered_biom_fp):
        filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
         (biom_fp,filtered_biom_fp,sampling_depth)
        commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth,
                          filter_samples_cmd)])
    else:
        logger.write("Skipping filter_samples_from_otu_table.py as %s exists.\n\n" \
                     % filtered_biom_fp)
    biom_fp = filtered_biom_fp
    
    # run initial commands and reset the command list
    if len(commands) > 0:
        command_handler(commands, 
                        status_update_callback, 
                        logger,
#.........这里部分代码省略.........
开发者ID:jasonbogovich,项目名称:qiime,代码行数:101,代码来源:core_diversity_analyses.py


示例13: compare_categories

def compare_categories(dm_fp, map_fp, method, categories, num_perms, out_dir):
    """Runs the specified statistical method using the category of interest.

    This method does not return anything; all output is written to results
    files in out_dir.

    Arguments:
        dm_fp - filepath to the input distance matrix
        map_fp - filepath to the input metadata mapping file
        categories - list of categories in the metadata mapping file to
            consider in the statistical test. Multiple categories will only be
            considered if method is 'bioenv', otherwise only the first category
            will be considered
        num_perms - the number of permutations to use when calculating the
            p-value. If method is 'bioenv' or 'morans_i', this parameter will
            be ignored as they are not permutation-based methods
        out_dir - path to the output directory where results files will be
            written. It is assumed that this directory already exists and we
            have write permissions to it
    """
    # Make sure we were passed a list of categories, not a single string.
    if not isinstance(categories, ListType):
        raise TypeError("The supplied categories must be a list of "
                        "strings.")

    # Special case: we do not allow SampleID as it is not a category, neither
    # in data structure representation nor in terms of a statistical test (no
    # groups are formed since all entries are unique IDs).
    if 'SampleID' in categories:
        raise ValueError("Cannot use SampleID as a category because it is a "
                         "unique identifier for each sample, and thus does "
                         "not create groups of samples (nor can it be used as "
                         "a numeric category in Moran's I or BIO-ENV "
                         "analyses). Please choose a different metadata "
                         "column to perform statistical tests on.")

    dm = DistanceMatrix.read(dm_fp)

    if method in ('anosim', 'permanova', 'bioenv'):
        with open(map_fp, 'U') as map_f:
            md_dict = parse_mapping_file_to_dict(map_f)[0]
        df = pd.DataFrame.from_dict(md_dict, orient='index')

        out_fp = join(out_dir, '%s_results.txt' % method)

        if method in ('anosim', 'permanova'):
            if method == 'anosim':
                method_cls = ANOSIM
            elif method == 'permanova':
                method_cls = PERMANOVA

            method_inst = method_cls(dm, df, column=categories[0])
            results = method_inst(num_perms)

            with open(out_fp, 'w') as out_f:
                out_f.write(results.summary())
        elif method == 'bioenv':
            results = bioenv(dm, df, columns=categories)
            results.to_csv(out_fp, sep='\t')
    else:
        # Remove any samples from the mapping file that aren't in the distance
        # matrix (important for validation checks). Use strict=True so that an
        # error is raised if the distance matrix contains any samples that
        # aren't in the mapping file.
        with open(map_fp, 'U') as map_f:
            md_map = MetadataMap.parseMetadataMap(map_f)
        md_map.filterSamples(dm.ids, strict=True)

        # These methods are run in R. Input validation must be done here before
        # running the R commands.
        if method in ['adonis', 'morans_i', 'mrpp', 'permdisp', 'dbrda']:
            # Check to make sure all categories passed in are in mapping file
            # and are not all the same value.
            for category in categories:
                if not category in md_map.CategoryNames:
                    raise ValueError("Category '%s' not found in mapping file "
                                     "columns." % category)

                if md_map.hasSingleCategoryValue(category):
                    raise ValueError("All values in category '%s' are the "
                                     "same. The statistical method '%s' "
                                     "cannot operate on a category that "
                                     "creates only a single group of samples "
                                     "(e.g. there are no 'between' distances "
                                     "because there is only a single group)."
                                     % (category, method))

            # Build the command arguments string.
            command_args = ['-d %s -m %s -c %s -o %s'
                            % (dm_fp, map_fp, categories[0], out_dir)]

            if method == 'morans_i':
                # Moran's I requires only numeric categories.
                for category in categories:
                    if not md_map.isNumericCategory(category):
                        raise TypeError("The category '%s' is not numeric. "
                                        "Not all values could be converted to "
                                        "numbers." % category)
            else:
                # The rest require groups of samples, so the category values
#.........这里部分代码省略.........
开发者ID:YuJinhui,项目名称:qiime,代码行数:101,代码来源:compare_categories.py


示例14: _color_field_states

def _color_field_states(map_f, samp_ids, field, field_states, color_by_field):
    """Colors one field by another.

    Returns a list of matplotlib-compatible colors, one for each of the input
    field_states. Also returns a dictionary mapping color_by_field states to
    colors (useful for building a legend, for example).

    If there are not enough colors available (they are drawn from
    qiime.colors.data_colors), an error will be raised as the color mapping
    (and legend) will be ambiguous.

    A one-to-one mapping must exist between each field_state and its
    corresponding color_by field state (otherwise it is unclear which
    corresponding color_by field state should be used to color it by). An error
    will be raised if this one-to-one mapping does not exist.

    Arguments:
        map_f - the mapping file (file-like object)
        samp_ids - a list of sample IDs to consider in the mapping file. Only
            these sample IDs will be used when coloring field states
        field - the field in the mapping file to color
        field_states - the field states in field to color
        color_by_field - the field in the mapping file to color field_states by
    """
    colors = []
    color_pool = [matplotlib_rgb_color(data_colors[color].toRGB()) for color in data_color_order]
    metadata_map = MetadataMap.parseMetadataMap(map_f)

    for field_to_check in field, color_by_field:
        if field_to_check not in metadata_map.CategoryNames:
            raise ValueError("The field '%s' is not in the metadata mapping " "file's column headers." % field_to_check)

    all_field_states = metadata_map.getCategoryValues(samp_ids, field)
    all_color_by_states = metadata_map.getCategoryValues(samp_ids, color_by_field)

    if len(set(field_states) - set(all_field_states)) != 0:
        raise ValueError("Encountered unrecognizable field state(s) in %r " "for field '%s'." % (field_states, field))

    # Build mapping from one field to the other.
    field_mapping = defaultdict(list)
    for field_state, color_by_state in zip(all_field_states, all_color_by_states):
        if field_state in field_states:
            field_mapping[field_state].append(color_by_state)

    # For each of the specified input field states, find its corresponding
    # "color by" field state and give it a color if it hasn't been assigned one
    # yet. Make sure we have enough colors and there is a one-to-one mapping.
    color_mapping = {}
    for field_state in field_states:
        color_by_states = set(field_mapping[field_state])

        if len(color_by_states) != 1:
            raise ValueError(
                "The field '%s' to color by does not have a "
                "one-to-one mapping with field '%s'. Coloring "
                "would be ambiguous." % (color_by_field, field)
            )

        color_by_state = list(color_by_states)[0]
        if color_by_state not in color_mapping:
            if len(color_pool) > 0:
                color_mapping[color_by_state] = color_pool.pop(0)
            else:
                raise ValueError(
                    "There are not enough available QIIME colors "
                    "to color each of the field states in field "
                    "'%s'. Coloring would be ambiguous." % color_by_field
                )

        colors.append(color_mapping[color_by_state])

    return colors, color_mapping
开发者ID:Honglongwu,项目名称:qiime,代码行数:72,代码来源:make_distance_boxplots.py


示例15: run_core_diversity_analyses

该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python upstream.run_pick_de_novo_otus函数代码示例发布时间:2022-05-26
下一篇:
Python util.write_biom_table函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap