• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python utils.color_gene函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中utils.color_gene函数的典型用法代码示例。如果您正苦于以下问题:Python color_gene函数的具体用法?Python color_gene怎么用?Python color_gene使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了color_gene函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: print_match

 def print_match(self, region, gene, query_seq, score, glbounds, qrbounds, codon_pos, warnings, skipping=False):
     if self.debug < 2:
         return
     out_str_list = []
     buff_str = (20 - len(gene)) * ' '
     tmp_val = score
     if self.args.apply_choice_probs_in_sw and self.get_choice_prob(region, gene) != 0.0:
         tmp_val = score / self.get_choice_prob(region, gene)
     if self.args.apply_choice_probs_in_sw:
         out_str_list.append('%8s%s%s%9.1e * %3.0f = %-6.1f' % (' ', utils.color_gene(gene), buff_str, self.get_choice_prob(region, gene), tmp_val, score))
     else:
         out_str_list.append('%8s%s%s%9s%3s %6.0f        ' % (' ', utils.color_gene(gene), '', '', buff_str, score))
     out_str_list.append('%4d%4d   %s\n' % (glbounds[0], glbounds[1], self.germline_seqs[region][gene][glbounds[0]:glbounds[1]]))
     out_str_list.append('%46s  %4d%4d' % ('', qrbounds[0], qrbounds[1]))
     out_str_list.append('   %s ' % (utils.color_mutants(self.germline_seqs[region][gene][glbounds[0]:glbounds[1]], query_seq[qrbounds[0]:qrbounds[1]])))
     if region != 'd':
         out_str_list.append('(%s %d)' % (utils.conserved_codon_names[region], codon_pos))
     if warnings[gene] != '':
         out_str_list.append('WARNING ' + warnings[gene])
     if skipping:
         out_str_list.append('skipping!')
     if self.args.outfname is None:
         print ''.join(out_str_list)
     else:
         out_str_list.append('\n')
         self.outfile.write(''.join(out_str_list))
开发者ID:matsengrp,项目名称:bioboxmixcr,代码行数:26,代码来源:waterer.py


示例2: add_new_allele

def add_new_allele(glfo, newfo, remove_template_genes=False, debug=False):
    """
    Add a new allele to <glfo>, specified by <newfo> which is of the
    form: {'gene' : 'IGHV3-71*01+C35T.T47G', 'seq' : 'ACTG yadda yadda CGGGT', 'template-gene' : 'IGHV3-71*01'}
    If <remove_template_genes>, we also remove 'template-gene' from <glfo>.
    """

    template_gene = newfo["template-gene"]
    region = utils.get_region(template_gene)
    if template_gene not in glfo["seqs"][region]:
        raise Exception("unknown template gene %s" % template_gene)

    new_gene = newfo["gene"]

    if region == "v":
        glfo["cyst-positions"][new_gene] = glfo["cyst-positions"][template_gene]
    elif region == "j":
        glfo["tryp-positions"][new_gene] = glfo["tryp-positions"][template_gene]

    glfo["seqs"][region][new_gene] = newfo["seq"]

    if debug:
        print "    adding new allele to glfo:"
        print "      template %s   %s" % (glfo["seqs"][region][template_gene], utils.color_gene(template_gene))
        print "           new %s   %s" % (
            utils.color_mutants(glfo["seqs"][region][template_gene], newfo["seq"]),
            utils.color_gene(new_gene),
        )

    if remove_template_genes:
        remove_gene(glfo, template_gene, debug=True)
开发者ID:psathyrella,项目名称:partis,代码行数:31,代码来源:glutils.py


示例3: add_new_allele

    def add_new_allele(self, gene, fitfo, n_candidate_snps, debug=False):
        # figure out what the new nukes are
        old_seq = self.glfo['seqs'][utils.get_region(gene)][gene]
        new_seq = old_seq
        mutfo = {}
        for pos in sorted(fitfo['candidates'][n_candidate_snps]):
            obs_counts = {nuke : self.counts[gene][pos][n_candidate_snps][nuke] for nuke in utils.nukes}  # NOTE it's super important to only use the counts from sequences with <n_candidate_snps> total mutations
            sorted_obs_counts = sorted(obs_counts.items(), key=operator.itemgetter(1), reverse=True)
            original_nuke = self.mfreqer.counts[gene][pos]['gl_nuke']
            new_nuke = None
            for nuke, _ in sorted_obs_counts:  # take the most common one that isn't the existing gl nuke
                if nuke != original_nuke:
                    new_nuke = nuke
                    break
            print '   %3d  (%s --> %s)' % (pos, original_nuke, new_nuke),
            assert old_seq[pos] == original_nuke
            mutfo[pos] = {'original' : original_nuke, 'new' : new_nuke}
            new_seq = new_seq[:pos] + new_nuke + new_seq[pos+1:]

        new_name, mutfo = glutils.get_new_allele_name_and_change_mutfo(gene, mutfo)
        print ''
        print '          %s   %s' % (old_seq, utils.color_gene(gene))
        print '          %s   %s' % (utils.color_mutants(old_seq, new_seq), utils.color_gene(new_name))

        # and add it to the set of new alleles for this gene
        self.new_allele_info.append({
            'template-gene' : gene,
            'gene' : new_name,
            'seq' : new_seq,
            'aligned-seq' : None
        })
开发者ID:Annak17,项目名称:partis,代码行数:31,代码来源:allelefinder.py


示例4: make_transition_plot

    def make_transition_plot(self, gene_name, model):
        """ NOTE shares a lot with make_mutefreq_plot() in python/paramutils.py """
        fig, ax = plotting.mpl_init()
        fig.set_size_inches(plotting.plot_ratios[utils.get_region(gene_name)])

        ibin = 0
        print utils.color_gene(utils.unsanitize_name(gene_name))
        legend_colors = set()  # add a color to this the first time you plot it
        for state in model.states:

            # bin label
            ax.text(-0.5 + ibin, -0.075, paramutils.simplify_state_name(state.name), rotation='vertical', size=8)

            sorted_to_states = {}
            for name in state.transitions.keys():
                if name.find('IG') == 0:
                    sorted_to_states[name] = int(paramutils.simplify_state_name(name))
                else:
                    sorted_to_states[name] = name
            sorted_to_states = sorted(sorted_to_states.items(), key=operator.itemgetter(1))

            total = 0.0
            for to_state, simple_to_state in sorted_to_states:

                prob = state.transitions[to_state]

                alpha = 0.6
                width = 3

                if 'insert' in str(simple_to_state):
                    label = 'insert'
                    color = '#3498db'  # blue
                elif str(simple_to_state) == 'end':
                    label = 'end'
                    color = 'red'
                else:  # regional/internal states
                    assert to_state.find('IG') == 0
                    label = 'internal'
                    color = 'green'

                label_to_use = None
                if color not in legend_colors:
                    label_to_use = label
                    legend_colors.add(color)

                # horizontal line at height total+prob
                ax.plot([-0.5 + ibin, 0.5 + ibin], [total + prob, total + prob], color=color, linewidth=width, alpha=alpha, label=label_to_use)

                # vertical line from total to total + prob
                ax.plot([ibin, ibin], [total + 0.01, total + prob], color=color, alpha=alpha, linewidth=width)

                midpoint = 0.5*(prob + 2*total)
                # ax.text(ibin, midpoint, paramutils.simplify_state_name(to_state))  # nicely labels the midpoint of the chunk between lines, but there isn't really room for it

                total += prob
    
            ibin += 1

        ax.get_xaxis().set_visible(False)
        plotting.mpl_finish(ax, self.base_plotdir + '/transitions', gene_name, ybounds=(-0.01, 1.01), xbounds=(-3, len(model.states) + 3), leg_loc=(0.95, 0.1), adjust={'left' : 0.1, 'right' : 0.8}, leg_prop={'size' : 8})
开发者ID:psathyrella,项目名称:partis,代码行数:60,代码来源:plot-hmms.py


示例5: remove_gene

def remove_gene(glfo, gene, debug=False):
    """ remove <gene> from <glfo> """
    region = utils.get_region(gene)
    if gene in glfo["seqs"][region]:
        if debug:
            print "  removing %s from glfo" % utils.color_gene(gene)
        del glfo["seqs"][region][gene]
        if region in utils.conserved_codons[glfo["chain"]]:
            del glfo[utils.conserved_codons[glfo["chain"]][region] + "-positions"][gene]
    else:
        if debug:
            print "  can't remove %s from glfo, it's not there" % utils.color_gene(gene)
开发者ID:psathyrella,项目名称:partis,代码行数:12,代码来源:glutils.py


示例6: find_partial_failures

    def find_partial_failures(self, fostream_name):
        unique_ids = []
        for line in open(fostream_name.replace(".fostream", "")).readlines():
            if len(self.sim_need) == 0:
                return
            if len(line.strip()) == 0:  # skip blank lines
                continue

            line = line.replace('"', "")
            line = line.split(";")

            unique_id = line[0]

            if "NA" not in line:  # skip lines that were ok
                unique_ids.append(unique_id)
                continue
            if unique_id not in self.sim_need:
                continue
            if unique_id not in self.siminfo:
                continue  # not looking for this <unique_id> a.t.m.

            info = {}
            info["unique_id"] = unique_id
            for stuff in line:
                for region in utils.regions:  # add the first instance of IGH[VDJ] (if it's there at all)
                    if "IGH" + region.upper() in stuff and region + "_gene" not in info:
                        genes = re.findall("IGH" + region.upper() + "[^ ][^ ]*", stuff)
                        if len(genes) == 0:
                            print "ERROR no %s genes in %s" % (region, stuff)
                        gene = genes[0]
                        if gene not in self.germline_seqs[region]:
                            print "ERROR bad gene %s for %s" % (gene, unique_id)
                            sys.exit()
                        info[region + "_gene"] = gene
            self.perfplotter.add_partial_fail(self.siminfo[unique_id], info)
            if self.args.debug:
                print "%-20s  partial fail %s %s %s" % (
                    unique_id,
                    utils.color_gene(info["v_gene"]) if "v_gene" in info else "",
                    utils.color_gene(info["d_gene"]) if "d_gene" in info else "",
                    utils.color_gene(info["j_gene"]) if "j_gene" in info else "",
                ),
                print "  (true %s %s %s)" % tuple(
                    [self.siminfo[unique_id][region + "_gene"] for region in utils.regions]
                )
            self.failtails[unique_id] = info
            self.n_partially_failed += 1
            self.sim_need.remove(unique_id)

        return unique_ids
开发者ID:stevenweaver,项目名称:partis,代码行数:50,代码来源:ihhhmmmparser.py


示例7: skip_gene

 def skip_gene(gene):
     if self.args.debug:
         print '    %s in list of genes to skip' % utils.color_gene(gene)
     if gene not in genes_actually_skipped:
         genes_actually_skipped[gene] = 0
     genes_actually_skipped[gene] += 1
     qr_info['skip_gene'] = True
开发者ID:matsengrp,项目名称:bioboxmixcr,代码行数:7,代码来源:igblastparser.py


示例8: write_hmm_input

    def write_hmm_input(self, csv_fname, sw_info, parameter_dir, preclusters=None, hmm_type='', pair_hmm=False, stripped=False):
        print '    writing input'
        csvfile = opener('w')(csv_fname)
        start = time.time()

        # write header
        header = ['names', 'k_v_min', 'k_v_max', 'k_d_min', 'k_d_max', 'only_genes', 'seqs']  # I wish I had a good c++ csv reader 
        csvfile.write(' '.join(header) + '\n')

        skipped_gene_matches = set()
        assert hmm_type != ''
        if hmm_type == 'k=1':  # single vanilla hmm
            nsets = [[qn] for qn in self.input_info.keys()]
        elif hmm_type == 'k=2':  # pair hmm
            nsets = self.get_pairs(preclusters)
        elif hmm_type == 'k=preclusters':  # run the k-hmm on each cluster in <preclusters>
            assert preclusters != None
            nsets = [ val for key, val in preclusters.id_clusters.items() if len(val) > 1 ]  # <nsets> is a list of sets (well, lists) of query names
            # nsets = []
            # for cluster in preclusters.id_clusters.values():
            #     nsets += itertools.combinations(cluster, 5)
        elif hmm_type == 'k=nsets':  # run on *every* combination of queries which has length <self.args.n_sets>
            if self.args.all_combinations:
                nsets = itertools.combinations(self.input_info.keys(), self.args.n_sets)
            else:  # put the first n together, and the second group of n (not the self.input_info is and OrderedDict)
                nsets = []
                keylist = self.input_info.keys()
                this_set = []
                for iquery in range(len(keylist)):
                    if iquery % self.args.n_sets == 0:  # every nth query, start a new group
                        if len(this_set) > 0:
                            nsets.append(this_set)
                        this_set = []
                    this_set.append(keylist[iquery])
                if len(this_set) > 0:
                    nsets.append(this_set)
        else:
            assert False

        for query_names in nsets:
            non_failed_names = self.remove_sw_failures(query_names, sw_info)
            if len(non_failed_names) == 0:
                continue
            combined_query = self.combine_queries(sw_info, non_failed_names, parameter_dir, stripped=stripped, skipped_gene_matches=skipped_gene_matches)
            if len(combined_query) == 0:  # didn't find all regions
                continue
            csvfile.write('%s %d %d %d %d %s %s\n' %  # NOTE csv.DictWriter can handle tsvs, so this should really be switched to use that
                          (':'.join([str(qn) for qn in non_failed_names]),
                           combined_query['k_v']['min'], combined_query['k_v']['max'],
                           combined_query['k_d']['min'], combined_query['k_d']['max'],
                           ':'.join(combined_query['only_genes']),
                           ':'.join(combined_query['seqs'])))

        if len(skipped_gene_matches) > 0:
            print '    not found in %s, i.e. were never the best sw match for any query, so removing from consideration for hmm:' % (parameter_dir)
            for region in utils.regions:
                print '      %s: %s' % (region, ' '.join([utils.color_gene(gene) for gene in skipped_gene_matches if utils.get_region(gene) == region]))

        csvfile.close()
        print '        input write time: %.3f' % (time.time()-start)
开发者ID:matsengrp,项目名称:bioboxpartis,代码行数:60,代码来源:partitiondriver.py


示例9: add_new_allele

def add_new_allele(glfo, newfo, remove_template_genes, debug=False):
    """
    Add a new allele to <glfo>, specified by <newfo> which is of the
    form: {'template-gene' : 'IGHV3-71*01', 'gene' : 'IGHV3-71*01+C35T.T47G', 'seq' : 'ACTG yadda yadda CGGGT'}
    If <remove_template_genes>, we also remove 'template-gene' from <glfo>.
    """

    template_gene = newfo['template-gene']
    region = utils.get_region(template_gene)
    if template_gene not in glfo['seqs'][region]:
        raise Exception('unknown template gene %s' % template_gene)

    new_gene = newfo['gene']

    if region == 'v':
        glfo['cyst-positions'][new_gene] = glfo['cyst-positions'][template_gene]
    elif region == 'j':
        glfo['tryp-positions'][new_gene] = glfo['tryp-positions'][template_gene]

    glfo['seqs'][region][new_gene] = newfo['seq']

    if debug:
        print '    adding new allele to glfo:'
        print '      template %s   %s' % (glfo['seqs'][region][template_gene], utils.color_gene(template_gene))
        print '           new %s   %s' % (utils.color_mutants(glfo['seqs'][region][template_gene], newfo['seq']), utils.color_gene(new_gene))

    if remove_template_genes:
        remove_gene(glfo, template_gene, debug=True)
开发者ID:Annak17,项目名称:partis,代码行数:28,代码来源:glutils.py


示例10: finalize

    def finalize(self, debug=False):
        assert not self.finalized

        self.mfreqer.finalize()

        start = time.time()
        gene_results = {'not_enough_obs_to_fit' : set(), 'didnt_find_anything_with_fit' : set(), 'new_allele' : set()}
        if debug:
            print '\nlooking for new alleles:'
        for gene in sorted(self.mfreqer.counts):
            if utils.get_region(gene) != 'v':
                continue
            if debug:
                print '\n%s (observed %d %s)' % (utils.color_gene(gene), self.gene_obs_counts[gene], utils.plural_str('time', self.gene_obs_counts[gene]))

            positions_to_try_to_fit, xyvals = self.get_positions_to_fit(gene, gene_results, debug=debug)
            if positions_to_try_to_fit is None:
                continue

            fitfo = {n : {} for n in ('min_snp_ratios', 'candidates')}
            for istart in range(1, self.n_max_snps):
                if debug:
                    if istart == 1:
                        print '                                 resid. / ndof'
                        print '             position   ratio   (m=0 / m>%5.2f)       muted / obs ' % self.big_y_icpt_bounds[0]
                    print '  %d %s' % (istart, utils.plural_str('snp', istart))

                subxyvals = {pos : {k : v[istart : istart + self.max_fit_length] for k, v in xyvals[pos].items()} for pos in positions_to_try_to_fit}
                self.fit_istart(gene, istart, positions_to_try_to_fit, subxyvals, fitfo, debug=debug)
                if istart not in fitfo['candidates']:  # if it didn't get filled, we didn't have enough observations to do the fit
                    break

            istart_candidates = []
            if debug:
                print '  evaluating each snp hypothesis'
                print '    snps       min ratio'
            for istart in fitfo['candidates']:
                if debug:
                    print '    %2d     %9s' % (istart, fstr(fitfo['min_snp_ratios'][istart])),
                if self.is_a_candidate(gene, fitfo, istart, debug=debug):
                    istart_candidates.append(istart)

            if len(istart_candidates) > 0:
                n_candidate_snps = min(istart_candidates)  # add the candidate with the smallest number of snps to the germline set, and run again (if the firs
                gene_results['new_allele'].add(gene)
                print '\n    found a new allele candidate separated from %s by %d %s at %s:' % (utils.color_gene(gene), n_candidate_snps,
                                                                                                utils.plural_str('snp', n_candidate_snps), utils.plural_str('position', n_candidate_snps)),
                self.add_new_allele(gene, fitfo, n_candidate_snps, debug=debug)
            else:
                gene_results['didnt_find_anything_with_fit'].add(gene)
                if debug:
                    print '  no new alleles'

        if debug:
            print 'found new alleles for %d %s (there were also %d without new alleles, and %d without enough observations to fit)' % (len(gene_results['new_allele']), utils.plural_str('gene', len(gene_results['new_allele'])),
                                                                                                                                       len(gene_results['didnt_find_anything_with_fit']), len(gene_results['not_enough_obs_to_fit']))
            print '      allele finding time: %.1f' % (time.time()-start)

        self.finalized = True
开发者ID:Annak17,项目名称:partis,代码行数:59,代码来源:allelefinder.py


示例11: set_per_gene_support

 def set_per_gene_support(self, true_line, inf_line, region):
     if inf_line[region + '_per_gene_support'].keys()[0] != inf_line[region + '_gene']:
         print '   WARNING best-supported gene %s not same as viterbi gene %s' % (utils.color_gene(inf_line[region + '_per_gene_support'].keys()[0]), utils.color_gene(inf_line[region + '_gene']))
     support = inf_line[region + '_per_gene_support'].values()[0]  # sorted, ordered dict with gene : logprob key-val pairs
     if true_line[region + '_gene'] == inf_line[region + '_gene']:  # NOTE this requires allele to be correct, but set_bool_column() does not
         self.hists[region + '_allele_right_vs_per_gene_support'].fill(support)
     else:
         self.hists[region + '_allele_wrong_vs_per_gene_support'].fill(support)
开发者ID:Irrationone,项目名称:partis,代码行数:8,代码来源:performanceplotter.py


示例12: find_partial_failures

    def find_partial_failures(self, fostream_name):
        unique_ids = []
        for line in open(fostream_name.replace('.fostream', '')).readlines():
            if len(self.sim_need) == 0:
                return
            if len(line.strip()) == 0:  # skip blank lines
                continue

            line = line.replace('"', '')
            line = line.split(';')

            unique_id = line[0]
            
            if 'NA' not in line:  # skip lines that were ok
                unique_ids.append(unique_id)
                continue
            if unique_id not in self.sim_need:
                continue
            if unique_id not in self.siminfo:
                continue  # not looking for this <unique_id> a.t.m.

            info = {}
            info['unique_id'] = unique_id
            for stuff in line:
                for region in utils.regions:  # add the first instance of IGH[VDJ] (if it's there at all)
                    if 'IGH'+region.upper() in stuff and region+'_gene' not in info:
                        genes = re.findall('IGH' + region.upper() + '[^ ][^ ]*', stuff)
                        if len(genes) == 0:
                            print 'ERROR no %s genes in %s' % (region, stuff)
                        gene = genes[0]
                        if gene not in self.germline_seqs[region]:
                            print 'ERROR bad gene %s for %s' % (gene, unique_id)
                            sys.exit()
                        info[region + '_gene'] = gene
            self.perfplotter.add_partial_fail(self.siminfo[unique_id], info)
            if self.args.debug:
                print '%-20s  partial fail %s %s %s' % (unique_id,
                                                     utils.color_gene(info['v_gene']) if 'v_gene' in info else '',
                                                     utils.color_gene(info['d_gene']) if 'd_gene' in info else '',
                                                     utils.color_gene(info['j_gene']) if 'j_gene' in info else ''),
                print '  (true %s %s %s)' % tuple([self.siminfo[unique_id][region + '_gene'] for region in utils.regions])
            self.failtails[unique_id] = info
            self.n_partially_failed += 1
            self.sim_need.remove(unique_id)

        return unique_ids
开发者ID:matsengrp,项目名称:bioboxpartis,代码行数:46,代码来源:ihhhmmmparser.py


示例13: remove_gene

def remove_gene(glfo, gene, debug=False):
    """ remove <gene> from <glfo> """
    if debug:
        print '  removing %s from glfo' % utils.color_gene(gene)
    region = utils.get_region(gene)
    if region in utils.conserved_codons:
        del glfo[utils.conserved_codons[region] + '-positions'][gene]
    del glfo['seqs'][region][gene]
开发者ID:Annak17,项目名称:partis,代码行数:8,代码来源:glutils.py


示例14: build_v_gene_set

def build_v_gene_set(glfo, introns):
    total_d_counts = {}
    refseqs = {}
    for d_gene, counts in introns.items():
        total_d_counts[d_gene] = sum(counts.values())
    for d_gene, _ in sorted(total_d_counts.items(), key=operator.itemgetter(1), reverse=True):
        counts = introns[d_gene]

        # first decide on the reference sequences
        refseq, column_counts = None, None
        for seq in sorted(counts, key=len, reverse=True):
            if refseq is None:  # first one, i.e. the longest
                refseq = seq
                column_counts = [{n : 0 for n in utils.nukes} for i in range(len(refseq))]
            ioffset = len(refseq) - len(seq)
            partial_refseq = refseq[ioffset:]
            assert len(partial_refseq) == len(seq)
            for ibase in range(ioffset, len(refseq)):
                column_counts[ibase][seq[ibase - ioffset]] += counts[seq]

        refseqs[d_gene] = []
        for basecounts in column_counts:
            most_common_base = sorted(basecounts.items(), key=operator.itemgetter(1), reverse=True)[0][0]
            refseqs[d_gene].append(most_common_base)
        refseqs[d_gene] = ''.join(refseqs[d_gene])

        n_ok = 0
        mutecounts = {}
        for seq in sorted(counts, key=len, reverse=True):
            # print '    %3d   %150s' % (count, seq)
            partial_refseq = refseqs[d_gene][len(refseqs[d_gene]) - len(seq):]
            if seq == partial_refseq:
                n_ok += counts[seq]
            else:
                # utils.color_mutants(partial_refseq, seq, print_result=True, extra_str='                ')
                n_mutes = utils.hamming_distance(partial_refseq, seq)
                if n_mutes not in mutecounts:
                    mutecounts[n_mutes] = 0
                mutecounts[n_mutes] += counts[seq]
        print '  %s   %4d / %-4d ok' % (utils.color_gene(d_gene, width=10), n_ok, n_ok + sum(mutecounts.values())),
        if len(mutecounts) > 0:
            print '(mean of %.1f mutations among the other %d' % (numpy.average(mutecounts.keys(), weights=mutecounts.values()), sum(mutecounts.values())),
        print ''

    # add the intronic v genes to glfo
    for d_gene, refseq in refseqs.items():
        glfo['seqs']['v'][utils.generate_dummy_v(d_gene)] = refseq
        glfo['cyst-positions'][utils.generate_dummy_v(d_gene)] = len(refseq) - 3

    # write a glfo dir with everything
    glutils.write_glfo(outdir + '/germlines/imgt-and-intronic', glfo, debug=True)

    # remove the original v genes, and write a glfo dir with just the intronic ones
    glutils.remove_genes(glfo, [g for g in glfo['seqs']['v'] if 'xDx' not in g], debug=True)
    glutils.write_glfo(outdir + '/germlines/intronic', glfo, debug=True)
开发者ID:psathyrella,项目名称:partis,代码行数:55,代码来源:process-katie-data.py


示例15: make_mutefreq_plot

def make_mutefreq_plot(plotdir, gene_name, positions):
    """ NOTE shares a lot with make_transition_plot() in bin/plot-hmms.py. """
    nuke_colors = {'A' : 'red', 'C' : 'blue', 'G' : 'orange', 'T' : 'green'}
    fig, ax = plotting.mpl_init()
    fig.set_size_inches(plotting.plot_ratios[utils.get_region(gene_name)])

    ibin = 0
    print utils.color_gene(utils.unsanitize_name(gene_name))
    legend_colors = set()
    for info in positions:
        posname = info['name']

        # make label below bin
        ax.text(-0.5 + ibin, -0.075, simplify_state_name(posname), rotation='vertical', size=8)

        total = 0.0
        alpha = 0.6
        for nuke, prob in sorted(info['nuke_freqs'].items(), key=operator.itemgetter(1), reverse=True):
            color = nuke_colors[nuke]

            label_to_use = None
            if color not in legend_colors:
                label_to_use = nuke
                legend_colors.add(color)

            # horizontal line at height total+prob
            ax.plot([-0.5 + ibin, 0.5 + ibin], [total + prob, total + prob], color=color, alpha=alpha, linewidth=3, label=label_to_use)

            # vertical line from total to total + prob
            ax.plot([ibin, ibin], [total + 0.01, total + prob], color=color, alpha=alpha, linewidth=3)

            # # write [ACGT] at midpoint between total and total+prob
            # midpoint = 0.5*(prob + 2*total)
            # ... *redacted*

            total += prob

        ibin += 1

    ax.get_xaxis().set_visible(False)
    plotting.mpl_finish(ax, plotdir, gene_name, ybounds=(-0.01, 1.01), xbounds=(-3, len(positions) + 3), leg_loc=(0.95, 0.1), adjust={'left' : 0.1, 'right' : 0.8}, leg_prop={'size' : 8})
开发者ID:psathyrella,项目名称:partis,代码行数:41,代码来源:paramutils.py


示例16: __init__

    def __init__(self, base_indir, outdir, gene_name, glfo, args, debug=False):
        self.region = utils.get_region(gene_name)
        self.raw_name = gene_name  # i.e. unsanitized
        self.germline_seqs = glfo['seqs']  # all germline alleles
        self.germline_seq = self.germline_seqs[self.region][gene_name]  # germline sequence for this hmm
        self.indir = base_indir
        self.args = args
        self.debug = debug
        self.codon_positions = {r : glfo[c + '-positions'] for r, c in utils.conserved_codons[args.chain].items()}

        # parameters with values that I more or less made up
        self.precision = '16'  # number of digits after the decimal for probabilities
        self.eps = 1e-6  # NOTE I also have an eps defined in utils, and they should in principle be combined
        self.n_max_to_interpolate = args.min_observations_to_write
        self.min_mean_unphysical_insertion_length = {'fv' : 1.5, 'jf' : 25}  # jf has to be quite a bit bigger, since besides account for the variation in J length from the tryp position to the end, it has to account for the difference in cdr3 lengths

        self.erosion_pseudocount_length = 10  # if we're closer to the end of the gene than this, make sure erosion probability isn't zero

        self.outdir = outdir
        self.smallest_entry_index = -1  # keeps track of the first state that has a chance of being entered from init -- we want to start writing (with add_internal_state) from there

        self.insertions = []
        if self.region == 'v':
            self.insertions.append('fv')
        elif self.region == 'd':
            self.insertions.append('vd')
        elif self.region == 'j':
            self.insertions.append('dj')
            self.insertions.append('jf')

        assert len(utils.ambiguous_bases) == 1 and utils.ambiguous_bases[0] == 'N'  # maybe need to update some stuff below if this changes

        if self.debug:
            print '%s' % utils.color_gene(gene_name)

        self.n_occurences = utils.read_single_gene_count(self.indir, gene_name, debug=self.debug)  # how many times did we observe this gene in data?
        replacement_genes = None
        if self.n_occurences < self.args.min_observations_to_write:  # if we didn't see it enough, average over all the genes that find_replacement_genes() gives us
            if self.debug:
                print '      only saw it %d times (wanted %d), so use info from all other genes' % (self.n_occurences, self.args.min_observations_to_write)
            replacement_genes = utils.find_replacement_genes(self.indir, self.args.min_observations_to_write, gene_name, debug=self.debug)

        self.erosion_probs = self.read_erosion_info(gene_name, replacement_genes)
        self.insertion_probs, self.insertion_content_probs = self.read_insertion_info(gene_name, replacement_genes)
        self.mute_freqs, self.mute_obs = paramutils.read_mute_info(self.indir, this_gene=gene_name, chain=self.args.chain, approved_genes=replacement_genes)  # actual info in <self.mute_obs> isn't actually used a.t.m.

        self.track = Track('nukes', utils.nukes)
        self.saniname = utils.sanitize_name(gene_name)
        self.hmm = HMM(self.saniname, self.track.getdict())  # pass the track as a dict rather than a Track object to keep the yaml file a bit more readable
        self.hmm.extras['gene_prob'] = max(self.eps, utils.read_overall_gene_probs(self.indir, only_gene=gene_name))  # if we really didn't see this gene at all, take pity on it and kick it an eps
        tmp_mean_freq_hist = Hist(fname=self.indir + '/all-mean-mute-freqs.csv')
        self.hmm.extras['overall_mute_freq'] = tmp_mean_freq_hist.get_mean()
开发者ID:psathyrella,项目名称:partis,代码行数:52,代码来源:hmmwriter.py


示例17: read_allele_prevalence_freqs

def read_allele_prevalence_freqs(fname, debug=False):
    # NOTE kinda weird to mash all the regions into one file here (as compared to parametercounter), but it seems to make more sense
    allele_prevalence_freqs = {r: {} for r in utils.regions}
    with open(fname) as pfile:
        reader = csv.DictReader(pfile)
        for line in reader:
            allele_prevalence_freqs[utils.get_region(line["gene"])][line["gene"]] = float(line["freq"])
    for region in utils.regions:
        if len(allele_prevalence_freqs[region]) == 0:
            continue
        if debug:
            for gene, freq in allele_prevalence_freqs[region].items():
                print "%14.8f   %s" % (freq, utils.color_gene(gene))
        assert utils.is_normed(allele_prevalence_freqs[region])
    return allele_prevalence_freqs
开发者ID:psathyrella,项目名称:partis,代码行数:15,代码来源:glutils.py


示例18: print_match

    def print_match(self, region, gene, query_seq, score, glbounds, qrbounds, codon_pos, warnings, skipping=False):
        out_str_list = []
        buff_str = (20 - len(gene)) * ' '
        out_str_list.append('%8s%s%s%9s%3s %6.0f        ' % (' ', utils.color_gene(gene), '', '', buff_str, score))
        out_str_list.append('%4d%4d   %s\n' % (glbounds[0], glbounds[1], self.glfo['seqs'][region][gene][glbounds[0]:glbounds[1]]))
        out_str_list.append('%46s  %4d%4d' % ('', qrbounds[0], qrbounds[1]))
        out_str_list.append('   %s ' % (utils.color_mutants(self.glfo['seqs'][region][gene][glbounds[0]:glbounds[1]], query_seq[qrbounds[0]:qrbounds[1]])))
        if region != 'd':
            out_str_list.append('(%s %d)' % (utils.conserved_codons[region], codon_pos))
        if warnings[gene] != '':
            out_str_list.append('WARNING ' + warnings[gene])
        if skipping:
            out_str_list.append('skipping!')

        print ''.join(out_str_list)
开发者ID:apurvaraman,项目名称:partis,代码行数:15,代码来源:waterer.py


示例19: choose_some_alleles

def choose_some_alleles(region, genes_to_use, allelic_groups, n_alleles_per_gene, debug=False):
    """ choose a gene (i.e. a primary and sub-version) from <allelic_groups>, and its attendant alleles """
    # NOTE also modifies <allelic_groups>

    if len(allelic_groups[region]) == 0:
        raise Exception(
            "ran out of %s alleles (either --n-genes-per-region or --n-alleles-per-gene are probably too big)" % region
        )

    available_versions = None
    while available_versions is None or len(available_versions) == 0:
        if available_versions is not None:
            print "  %s couldn't find any versions that have %d alleles, so trying again" % (
                utils.color("red", "warning"),
                n_alleles,
            )
        n_alleles = numpy.random.choice(n_alleles_per_gene[region])
        available_versions = [
            (pv, subv)
            for pv in allelic_groups[region]
            for subv in allelic_groups[region][pv]
            if len(allelic_groups[region][pv][subv]) >= n_alleles
        ]
    ichoice = (
        numpy.random.randint(0, len(available_versions) - 1) if len(available_versions) > 1 else 0
    )  # numpy.random.choice() can't handle list of tuples (and barfs if you give it only one thing to choose from)
    primary_version, sub_version = available_versions[ichoice]
    new_alleles = set(
        numpy.random.choice(list(allelic_groups[region][primary_version][sub_version]), size=n_alleles, replace=False)
    )
    if debug:
        print "      %8s %5s   %s" % (
            primary_version,
            sub_version,
            " ".join([utils.color_gene(g, width=15) for g in new_alleles]),
        )

    assert len(new_alleles & genes_to_use) == 0  # make sure none of the new alleles are already in <genes_to_use>
    genes_to_use |= new_alleles  # actually add them to the final set

    # remove stuff we've used from <allelic_groups>
    del allelic_groups[region][primary_version][
        sub_version
    ]  # remove this sub-version (we don't want any more alleles from it)
    if len(allelic_groups[region][primary_version]) == 0:
        del allelic_groups[region][primary_version]
开发者ID:psathyrella,项目名称:partis,代码行数:46,代码来源:glutils.py


示例20: check_allele_prevalence_freqs

def check_allele_prevalence_freqs(outfname, glfo, allele_prevalence_fname, only_region=None):
    allele_prevalence_freqs = read_allele_prevalence_freqs(allele_prevalence_fname)
    counts = {r: {g: 0 for g in glfo["seqs"][r]} for r in utils.regions}
    with open(outfname) as outfile:
        reader = csv.DictReader(outfile)
        for line in reader:
            for region in utils.regions:
                counts[region][line[region + "_gene"]] += 1
    print "   checking allele prevalence freqs"
    for region in utils.regions:
        if only_region is not None and region != only_region:
            continue
        total = sum(counts[region].values())
        print "       %s   obs / tot  =  freq    expected" % region
        for gene in glfo["seqs"][region]:
            print "          %4d / %-4d = %.3f    %.3f   %s" % (
                counts[region][gene],
                total,
                float(counts[region][gene]) / total,
                allele_prevalence_freqs[region][gene],
                utils.color_gene(gene, width=15),
            )
开发者ID:psathyrella,项目名称:partis,代码行数:22,代码来源:glutils.py



注:本文中的utils.color_gene函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python utils.color_hex_to_cairo函数代码示例发布时间:2022-05-26
下一篇:
Python utils.color函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap