• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python utils.is_normed函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中utils.is_normed函数的典型用法代码示例。如果您正苦于以下问题:Python is_normed函数的具体用法?Python is_normed怎么用?Python is_normed使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了is_normed函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: read_erosion_info

    def read_erosion_info(self, this_gene, approved_genes=None):
        # NOTE that d erosion lengths depend on each other... but I don't think that's modellable with an hmm. At least for the moment we integrate over the other erosion
        if approved_genes == None:
            approved_genes = [this_gene]
        genes_used = set()
        for erosion in utils.real_erosions + utils.effective_erosions:
            if erosion[0] != self.region:
                continue
            self.erosion_probs[erosion] = {}
            deps = utils.column_dependencies[erosion + "_del"]
            with opener("r")(
                self.indir + "/" + utils.get_parameter_fname(column=erosion + "_del", deps=deps)
            ) as infile:
                reader = csv.DictReader(infile)
                for line in reader:
                    # first see if we want to use this line (if <region>_gene isn't in the line, this erosion doesn't depend on gene version)
                    if (
                        self.region + "_gene" in line and line[self.region + "_gene"] not in approved_genes
                    ):  # NOTE you'll need to change this if you want it to depend on another region's genes
                        continue
                    # then skip nonsense erosions that're too long for this gene, but were ok for another
                    if int(line[erosion + "_del"]) >= len(self.germline_seq):
                        continue

                    # then add in this erosion's counts
                    n_eroded = int(line[erosion + "_del"])
                    if n_eroded not in self.erosion_probs[erosion]:
                        self.erosion_probs[erosion][n_eroded] = 0.0
                    self.erosion_probs[erosion][n_eroded] += float(line["count"])

                    if self.region + "_gene" in line:
                        genes_used.add(line[self.region + "_gene"])

            assert len(self.erosion_probs[erosion]) > 0

            # do some smoothingy things NOTE that we normalize *after* interpolating
            if (
                erosion in utils.real_erosions
            ):  # for real erosions, don't interpolate if we lots of information about neighboring bins (i.e. we're pretty confident this bin should actually be zero)
                n_max = self.n_max_to_interpolate
            else:  # for fake erosions, always interpolate
                n_max = -1
            # print '   interpolate erosions'
            interpolate_bins(self.erosion_probs[erosion], n_max, bin_eps=self.eps, max_bin=len(self.germline_seq))
            self.add_pseudocounts(self.erosion_probs[erosion])

            # and finally, normalize
            total = 0.0
            for _, val in self.erosion_probs[erosion].iteritems():
                total += val

            test_total = 0.0
            for n_eroded in self.erosion_probs[erosion]:
                self.erosion_probs[erosion][n_eroded] /= total
                test_total += self.erosion_probs[erosion][n_eroded]
            assert utils.is_normed(test_total)

        if len(genes_used) > 1:  # if length is 1, we will have just used the actual gene
            if self.args.debug:
                print "    erosions used:", " ".join(genes_used)
开发者ID:stevenweaver,项目名称:partis,代码行数:60,代码来源:hmmwriter.py


示例2: read_mute_freqs

    def read_mute_freqs(self, mute_freq_dir):
        # NOTE these are mute freqs, not branch lengths, but it's ok for now
        for mtype in ['all',] + utils.regions:
            infname = mute_freq_dir + '/' + mtype + '-mean-mute-freqs.csv'
            self.branch_lengths[mtype] = {}
            self.branch_lengths[mtype]['lengths'], self.branch_lengths[mtype]['probs'] = [], []
            mutehist = plotting.make_hist_from_bin_entry_file(infname, mtype+'-mute-freqs')
            self.branch_lengths[mtype]['mean'] = mutehist.GetMean()

            if mutehist.GetBinContent(0) > 0.0 or mutehist.GetBinContent(mutehist.GetNbinsX()+1) > 0.0:
                print 'WARNING nonzero under/overflow bins read from %s' % infname

            check_sum = 0.0
            for ibin in range(1, mutehist.GetNbinsX()+1):  # ignore under/overflow bins
                freq = mutehist.GetBinCenter(ibin)
                branch_length = float(freq)
                prob = mutehist.GetBinContent(ibin)
                self.branch_lengths[mtype]['lengths'].append(branch_length)
                self.branch_lengths[mtype]['probs'].append(prob)
                check_sum += self.branch_lengths[mtype]['probs'][-1]
            assert utils.is_normed(check_sum)

        if self.args.debug:
            print '  mean branch lengths'
            for mtype in ['all',] + utils.regions:
                print '     %4s %7.3f (ratio %7.3f)' % (mtype, self.branch_lengths[mtype]['mean'], self.branch_lengths[mtype]['mean'] / self.branch_lengths['all']['mean'])
开发者ID:matsengrp,项目名称:bioboxpartis,代码行数:26,代码来源:treegenerator.py


示例3: read_mute_freqs

    def read_mute_freqs(self, mute_freq_dir):
        # NOTE these are mute freqs, not branch lengths, but it's ok for now
        for mtype in ['all',] + utils.regions:
            infname = mute_freq_dir + '/' + mtype + '-mean-mute-freqs.csv'
            self.branch_lengths[mtype] = {}
            self.branch_lengths[mtype]['lengths'], self.branch_lengths[mtype]['probs'] = [], []
            mutehist = Hist(fname=infname)
            self.branch_lengths[mtype]['mean'] = mutehist.get_mean()

            # if mutehist.GetBinContent(0) > 0.0 or mutehist.GetBinContent(mutehist.GetNbinsX()+1) > 0.0:
            #     print 'WARNING nonzero under/overflow bins read from %s' % infname
            mutehist.normalize(include_overflows=False, overflow_eps_to_ignore=1e-2)  # if it was written with overflows included, it'll need to be renormalized
            check_sum = 0.0
            for ibin in range(1, mutehist.n_bins + 1):  # ignore under/overflow bins
                freq = mutehist.get_bin_centers()[ibin]
                branch_length = self.convert_observed_changes_to_branch_length(float(freq))
                prob = mutehist.bin_contents[ibin]
                self.branch_lengths[mtype]['lengths'].append(branch_length)
                self.branch_lengths[mtype]['probs'].append(prob)
                check_sum += self.branch_lengths[mtype]['probs'][-1]
            if not utils.is_normed(check_sum):
                raise Exception('not normalized %f' % check_sum)

        if self.args.debug:
            print '  mean branch lengths'
            for mtype in ['all',] + utils.regions:
                print '     %4s %7.3f (ratio %7.3f)' % (mtype, self.branch_lengths[mtype]['mean'], self.branch_lengths[mtype]['mean'] / self.branch_lengths['all']['mean'])
开发者ID:antibodyome,项目名称:partis,代码行数:27,代码来源:treegenerator.py


示例4: write_mute_freqs

    def write_mute_freqs(self, region, gene_or_insert_name, seq, reco_event, reco_seq_fname, is_insertion=False):
        """ Read position-by-position mute freqs from disk for <gene_or_insert_name>, renormalize, then write to a file for bppseqgen. """
        mute_freqs = self.get_mute_freqs(gene_or_insert_name)

        rates = []  # list with a relative mutation rate for each position in <seq>
        total = 0.0
        # assert len(mute_freqs) == len(seq)  # only equal length if no erosions NO oh right but mute_freqs only covers areas we could align to...
        left_erosion_length = dict(reco_event.erosions.items() + reco_event.effective_erosions.items())[region + '_5p']
        for inuke in range(len(seq)):  # append a freq for each nuke
            position = inuke + left_erosion_length
            freq = 0.0
            if position in mute_freqs:
                freq = mute_freqs[position]
            else:
                freq = mute_freqs['overall_mean']
            rates.append(freq)
            total += freq

        # normalize to the number of sites (i.e. so an average site is given value 1.0)
        assert total != 0.0  # I am not hip enough to divide by zero
        for inuke in range(len(seq)):
            rates[inuke] *= float(len(seq)) / total
        total = 0.0

        # and... double check it, just for shits and giggles
        for inuke in range(len(seq)):
            total += rates[inuke]
        assert utils.is_normed(total / float(len(seq)))
        assert len(rates) == len(seq)  # you just can't be too careful. what if gremlins ate a few while python wasn't looking?

        # write the input file for bppseqgen, one base per line
        with opener('w')(reco_seq_fname) as reco_seq_file:
            reco_seq_file.write('state\trate\n')
            for inuke in range(len(seq)):
                reco_seq_file.write('%s\t%.15f\n' % (seq[inuke], rates[inuke]))
开发者ID:Annak17,项目名称:partis,代码行数:35,代码来源:recombinator.py


示例5: read_mute_freqs

    def read_mute_freqs(self, parameter_dir):
        # NOTE these are mute freqs, not branch lengths, but it's ok for now
        branch_lengths = {}
        for mtype in ['all',] + utils.regions:
            branch_lengths[mtype] = {n : [] for n in ('lengths', 'probs')}
            mutehist = self.get_mute_hist(mtype, parameter_dir)
            branch_lengths[mtype]['mean'] = mutehist.get_mean()

            mutehist.normalize(include_overflows=False, expect_overflows=True)  # if it was written with overflows included, it'll need to be renormalized
            check_sum = 0.0
            for ibin in range(1, mutehist.n_bins + 1):  # ignore under/overflow bins
                freq = mutehist.get_bin_centers()[ibin]
                branch_length = self.convert_observed_changes_to_branch_length(float(freq))
                prob = mutehist.bin_contents[ibin]
                branch_lengths[mtype]['lengths'].append(branch_length)
                branch_lengths[mtype]['probs'].append(prob)
                check_sum += branch_lengths[mtype]['probs'][-1]
            if not utils.is_normed(check_sum):
                raise Exception('not normalized %f' % check_sum)

        if self.args.debug:
            print '  mean branch lengths'
            for mtype in ['all',] + utils.regions:
                print '     %4s %7.3f (ratio %7.3f)' % (mtype, branch_lengths[mtype]['mean'], branch_lengths[mtype]['mean'] / branch_lengths['all']['mean'])

        return branch_lengths
开发者ID:Annak17,项目名称:partis,代码行数:26,代码来源:treegenerator.py


示例6: read_vdj_version_freqs

    def read_vdj_version_freqs(self):
        """ Read the frequencies at which various VDJ combinations appeared in data """
        if self.args.rearrange_from_scratch:
            return None

        version_freq_table = {}
        with opener('r')(self.parameter_dir + '/' + utils.get_parameter_fname('all')) as infile:
            in_data = csv.DictReader(infile)
            total = 0.0
            for line in in_data:  # NOTE do *not* assume the file is sorted
                skip = False
                for region in utils.regions:
                    if line[region + '_gene'] not in self.glfo['seqs'][region]:
                        skip = True
                        break
                if skip:
                    continue
                total += float(line['count'])
                index = self.freqtable_index(line)
                assert index not in version_freq_table
                version_freq_table[index] = float(line['count'])

        if len(version_freq_table) == 0:
            raise Exception('didn\'t find any gene combinations in %s' % fname)

        # then normalize
        test_total = 0.0
        for index in version_freq_table:
            version_freq_table[index] /= total
            test_total += version_freq_table[index]
        assert utils.is_normed(test_total, this_eps=1e-8)
        assert len(version_freq_table) < 1e8  # if it gets *too* large, choose_vdj_combo() below isn't going to work because of numerical underflow. Note there's nothing special about 1e8, it's just that I'm pretty sure we're fine *up* to that point, and once we get beyond it we should think about doing things differently
        return version_freq_table
开发者ID:psathyrella,项目名称:partis,代码行数:33,代码来源:recombinator.py


示例7: read_erosion_info

    def read_erosion_info(self, this_gene, approved_genes=None):
        # NOTE that d erosion lengths depend on each other... but I don't think that's modellable with an hmm. At least for the moment we integrate over the other erosion
        if approved_genes is None:
            approved_genes = [this_gene, ]
        eprobs = {}
        genes_used = set()
        for erosion in utils.real_erosions + utils.effective_erosions:
            if erosion[0] != self.region:
                continue
            eprobs[erosion] = {}
            if this_gene == glutils.dummy_d_genes[self.args.chain]:
                eprobs[erosion][0] = 1.  # always erode zero bases
                continue
            deps = utils.column_dependencies[erosion + '_del']
            with opener('r')(self.indir + '/' + utils.get_parameter_fname(column=erosion + '_del', deps=deps)) as infile:
                reader = csv.DictReader(infile)
                for line in reader:
                    # first see if we want to use this line (if <region>_gene isn't in the line, this erosion doesn't depend on gene version)
                    if self.region + '_gene' in line and line[self.region + '_gene'] not in approved_genes:  # NOTE you'll need to change this if you want it to depend on another region's genes
                        continue
                    # then skip nonsense erosions that're too long for this gene, but were ok for another
                    if int(line[erosion + '_del']) >= len(self.germline_seq):
                        continue

                    # then add in this erosion's counts
                    n_eroded = int(line[erosion + '_del'])
                    if n_eroded not in eprobs[erosion]:
                        eprobs[erosion][n_eroded] = 0.0
                    eprobs[erosion][n_eroded] += float(line['count'])

                    if self.region + '_gene' in line:
                        genes_used.add(line[self.region + '_gene'])

            if len(eprobs[erosion]) == 0:
                raise Exception('didn\'t read any %s erosion probs from %s' % (erosion, self.indir + '/' + utils.get_parameter_fname(column=erosion + '_del', deps=deps)))

            # do some smoothingy things NOTE that we normalize *after* interpolating
            if erosion in utils.real_erosions:  # for real erosions, don't interpolate if we lots of information about neighboring bins (i.e. we're pretty confident this bin should actually be zero)
                n_max = self.n_max_to_interpolate
            else:  # for fake erosions, always interpolate
                n_max = -1
            # print '   interpolate erosions'
            interpolate_bins(eprobs[erosion], n_max, bin_eps=self.eps, max_bin=len(self.germline_seq))
            self.add_pseudocounts(eprobs[erosion])

            # and finally, normalize
            total = 0.0
            for _, val in eprobs[erosion].iteritems():
                total += val

            test_total = 0.0
            for n_eroded in eprobs[erosion]:
                eprobs[erosion][n_eroded] /= total
                test_total += eprobs[erosion][n_eroded]
            assert utils.is_normed(test_total)

        if len(genes_used) > 1 and self.debug:  # if length is 1, we will have just used the actual gene
            print '    used erosion info from:', ' '.join(genes_used)

        return eprobs
开发者ID:psathyrella,项目名称:partis,代码行数:60,代码来源:hmmwriter.py


示例8: read_insertion_content

    def read_insertion_content(self, insertion):
        icontentprobs = {}  # NOTE this is only the probs for <insertion>, even though name is the same as in the previous function
        if insertion in utils.boundaries:  # i.e. if it's a real insertion
            with opener('r')(self.indir + '/' + insertion + '_insertion_content.csv') as icfile:
                reader = csv.DictReader(icfile)
                total = 0
                for line in reader:
                    icontentprobs[line[insertion + '_insertion_content']] = int(line['count'])
                    total += int(line['count'])

                if total == 0. and self.debug:
                    print '\n    WARNING zero insertion content probs read from %s, so setting to uniform distribution' % self.indir + '/' + insertion + '_insertion_content.csv'
                for nuke in utils.nukes:
                    if total == 0.:
                        icontentprobs[nuke] = 1. / len(utils.nukes)
                    else:
                        if nuke not in icontentprobs:
                            print '    %s not in insertion content probs, adding with zero' % nuke
                            icontentprobs[nuke] = 0
                        icontentprobs[nuke] /= float(total)
        else:  # just return uniform probs for effective (fv and jf) insertions
            icontentprobs = {n : 0.25 for n in utils.nukes}

        assert utils.is_normed(icontentprobs)

        return icontentprobs
开发者ID:psathyrella,项目名称:partis,代码行数:26,代码来源:hmmwriter.py


示例9: read_insertion_content

    def read_insertion_content(self, insertion):
        self.insertion_content_probs[insertion] = {}
        if insertion in utils.boundaries:  # just return uniform probs for fv and jf insertions
            with opener('r')(self.indir + '/' + insertion + '_insertion_content.csv') as icfile:
                reader = csv.DictReader(icfile)
                total = 0
                for line in reader:
                    self.insertion_content_probs[insertion][line[insertion + '_insertion_content']] = int(line['count'])
                    total += int(line['count'])
                if total == 0.:
                    print '\n    WARNING zero insertion content probs read from %s, so setting to uniform distribution' % self.indir + '/' + insertion + '_insertion_content.csv'
                for nuke in utils.nukes:
                    if total == 0.:
                        self.insertion_content_probs[insertion][nuke] = 1. / len(utils.nukes)
                    else:
                        if nuke not in self.insertion_content_probs[insertion]:
                            print '    %s not in insertion content probs, adding with zero' % nuke
                            self.insertion_content_probs[insertion][nuke] = 0
                        self.insertion_content_probs[insertion][nuke] /= float(total)
        else:
            self.insertion_content_probs[insertion] = {n : 0.25 for n in utils.nukes}

        assert utils.is_normed(self.insertion_content_probs[insertion])
        if self.args.debug:
            print '  insertion content for', insertion, self.insertion_content_probs[insertion]
开发者ID:Annak17,项目名称:partis,代码行数:25,代码来源:hmmwriter.py


示例10: read_vdj_version_freqs

    def read_vdj_version_freqs(self, fname):
        """ Read the frequencies at which various VDJ combinations appeared in data """
        with opener('r')(fname) as infile:
            in_data = csv.DictReader(infile)
            total = 0.0
            for line in in_data:
                # NOTE do *not* assume the file is sorted
                #
                # if int(line['cdr3_length']) == -1:
                #     continue  # couldn't find conserved codons when we were inferring things
                if self.args.only_genes is not None:  # are we restricting ourselves to a subset of genes?
                    if line['v_gene'] not in self.args.only_genes:
                        continue
                    if line['d_gene'] not in self.args.only_genes:
                        continue
                    if line['j_gene'] not in self.args.only_genes:
                        continue
                total += float(line['count'])
                index = tuple(line[column] for column in utils.index_columns)
                assert index not in self.version_freq_table
                self.version_freq_table[index] = float(line['count'])

        if len(self.version_freq_table) == 0:
            print 'ERROR didn\'t find any matching gene combinations'
            assert False

        # then normalize
        test_total = 0.0
        for index in self.version_freq_table:
            self.version_freq_table[index] /= total
            test_total += self.version_freq_table[index]
        assert utils.is_normed(test_total, this_eps=1e-8)
        assert len(self.version_freq_table) < 1e8  # if it gets *too* large, choose_vdj_combo() below isn't going to work because of numerical underflow. Note there's nothing special about 1e8, it's just that I'm pretty sure we're fine *up* to that point, and once we get beyond it we should think about doing things differently
开发者ID:antibodyome,项目名称:partis,代码行数:33,代码来源:recombinator.py


示例11: read_insertion_info

    def read_insertion_info(self, this_gene, approved_genes=None):
        if approved_genes == None:  # if we aren't explicitly passed a list of genes to use, we just use the gene for which we're actually writing the hmm
            approved_genes = [this_gene,]

        genes_used = set()
        for insertion in self.insertions:
            self.insertion_probs[insertion] = {}
            deps = utils.column_dependencies[insertion + '_insertion']
            with opener('r')(self.indir + '/' + utils.get_parameter_fname(column=insertion + '_insertion', deps=deps)) as infile:
                reader = csv.DictReader(infile)
                for line in reader:
                    # first see if we want to use this line (if <region>_gene isn't in the line, this erosion doesn't depend on gene version)
                    if self.region + '_gene' in line and line[self.region + '_gene'] not in approved_genes:  # NOTE you'll need to change this if you want it to depend on another region's genes
                        continue

                    # then add in this insertion's counts
                    n_inserted = 0
                    n_inserted = int(line[insertion + '_insertion'])
                    if n_inserted not in self.insertion_probs[insertion]:
                        self.insertion_probs[insertion][n_inserted] = 0.0
                    self.insertion_probs[insertion][n_inserted] += float(line['count'])

                    if self.region + '_gene' in line:
                        genes_used.add(line[self.region + '_gene'])

            assert len(self.insertion_probs[insertion]) > 0

            # print '   interpolate insertions'
            interpolate_bins(self.insertion_probs[insertion], self.n_max_to_interpolate, bin_eps=self.eps)  #, max_bin=len(self.germline_seq))  # NOTE that we normalize *after* this

            if 0 not in self.insertion_probs[insertion] or len(self.insertion_probs[insertion]) < 2:  # all hell breaks loose lower down if we haven't got shit in the way of information
                if self.args.debug:
                    print '    WARNING adding pseudocount to 1-bin in insertion probs'
                self.insertion_probs[insertion][0] = 1
                self.insertion_probs[insertion][1] = 1
                if self.args.debug:
                    print '      ', self.insertion_probs[insertion]

            assert 0 in self.insertion_probs[insertion] and len(self.insertion_probs[insertion]) >= 2  # all hell breaks loose lower down if we haven't got shit in the way of information

            # and finally, normalize
            total = 0.0
            for _, val in self.insertion_probs[insertion].iteritems():
                total += val
            test_total = 0.0
            for n_inserted in self.insertion_probs[insertion]:
                self.insertion_probs[insertion][n_inserted] /= total
                test_total += self.insertion_probs[insertion][n_inserted]
            assert utils.is_normed(test_total)

            if 0 not in self.insertion_probs[insertion] or self.insertion_probs[insertion][0] == 1.0:
                print 'ERROR cannot have all or none of the probability mass in the zero bin:', self.insertion_probs[insertion]
                assert False

            # self.insertion_content_probs = {}
            self.read_insertion_content(insertion)  # also read the base content of the insertions

        if len(genes_used) > 1:  # if length is 1, we will have just used the actual gene
            if self.args.debug:
                print '    insertions used:', ' '.join(genes_used)
开发者ID:Annak17,项目名称:partis,代码行数:60,代码来源:hmmwriter.py


示例12: check

    def check(self):
        total = 0.0
        for _, prob in self.transitions.iteritems():
            assert prob >= 0.0
            total += prob
        if not utils.is_normed(total):
            raise Exception('transition probs not normed in %s: %s' % (self.name, self.transitions))

        if self.name == 'init':  # no emissions for 'init' state
            return

        if self.emissions is not None:
            total = 0.0
            for _, prob in self.emissions['probs'].iteritems():
                assert prob >= 0.0
                total += prob
            assert utils.is_normed(total)
开发者ID:Annak17,项目名称:partis,代码行数:17,代码来源:hmmwriter.py


示例13: add_region_entry_transitions

    def add_region_entry_transitions(self, state, insertion):
        """
        Add transitions *into* the v, d, or j regions. Called from either the 'init' state or the 'insert_left' state.
        For v, this is (mostly) the prob that the read doesn't extend all the way to the left side of the v gene.
        For d and j, this is (mostly) the prob to actually erode on the left side.
        The two <mostly>s are there because in both cases, we're starting from *approximate* smith-waterman alignments, so we need to add some fuzz in case the s-w is off.
        """
        assert 'jf' not in insertion  # need these to only be *left*-hand insertions
        assert state.name == 'init' or 'insert' in state.name

        # first add transitions to the insert state
        region_entry_prob = 0.0  # Prob to go to an internal germline state (i.e. not to an insert state)
        if state.name == 'init':
            if insertion == '':
                region_entry_prob = 1.0  # if no insert state on this side (i.e. we're on left side of v), we have no choice but to enter the region (the internal states)
            else:
                region_entry_prob = self.get_zero_length_insertion_prob(insertion)  # prob of entering the region from 'init' is the prob of a zero-length insertion
        elif 'insert' in state.name:
            region_entry_prob = 1.0 - self.get_insert_self_transition_prob(insertion)  # the 'insert_left' state has to either go to itself, or else enter the region
        else:
            assert False

        # If this is an 'init' state, we add a transition to 'insert' with probability the observed probability of a non-zero insertion
        # Whereas if this is an 'insert' state, we add a *self*-transition with probability 1/<mean observed insert length>
        # update: now, we also multiply by the insertion content prob, since we now have four insert states (and can thus no longer use this prob in the emissions)
        if insertion != '':
            if not insertion in utils.boundaries:
                nukelist = ['N', ]
            else:
                nukelist = utils.nukes
            for nuke in nukelist:
                content_prob = 1. if nuke == 'N' else self.insertion_content_probs[insertion][nuke]
                state.add_transition('insert_left_' + nuke, (1.0 - region_entry_prob) * content_prob)

        # then add transitions to the region's internal states
        total = 0.0
        if self.region == 'v':  # only add a transition to the zeroth internal state
            state.add_transition('%s_%d' % (self.saniname, 0), region_entry_prob)
            total += region_entry_prob
            self.smallest_entry_index = 0
        else:
            erosion = self.region + '_5p'
            for inuke in range(len(self.germline_seq)):
                erosion_length = inuke
                if erosion_length in self.erosion_probs[erosion]:
                    prob = self.erosion_probs[erosion][erosion_length]
                    total += prob * region_entry_prob
                    if region_entry_prob != 0.0:  # only add the line if there's a chance of entering the region from this state
                        state.add_transition('%s_%d' % (self.saniname, inuke), prob * region_entry_prob)
                        if self.smallest_entry_index == -1 or inuke < self.smallest_entry_index:  # tells us where we need to start adding internal states (the smallest internal state index we add is the first one that has nonzero transition probability here)
                            self.smallest_entry_index = inuke
                    else:
                        assert state.name == 'init'  # if there's *no* chance of entering the region, this better *not* be the 'insert_left' state

        if region_entry_prob != 0.0 and not utils.is_normed(total / region_entry_prob):
            raise Exception('normalization problem in add_region_entry_transitions():\n  region_entry_prob: %f   total / region_entry_prob: %f' % (region_entry_prob, total / region_entry_prob))
开发者ID:Annak17,项目名称:partis,代码行数:56,代码来源:hmmwriter.py


示例14: choose_allele_prevalence_freqs

def choose_allele_prevalence_freqs(glfo, allele_prevalence_freqs, region, min_allele_prevalence_freq, debug=False):
    n_alleles = len(glfo["seqs"][region])
    prevalence_counts = numpy.random.randint(
        1, int(1.0 / min_allele_prevalence_freq), size=n_alleles
    )  # ensures that each pair of alleles has a prevalence ratio between <min_allele_prevalence_freq> and 1. NOTE it's inclusive
    prevalence_freqs = [float(c) / sum(prevalence_counts) for c in prevalence_counts]
    allele_prevalence_freqs[region] = {g: f for g, f in zip(glfo["seqs"][region].keys(), prevalence_freqs)}
    assert utils.is_normed(allele_prevalence_freqs[region])
    if debug:
        print "      counts %s" % " ".join([("%5d" % c) for c in prevalence_counts])
        print "       freqs %s" % " ".join([("%5.3f" % c) for c in prevalence_freqs])
        print "   min ratio %.3f" % (min(prevalence_freqs) / max(prevalence_freqs))
开发者ID:psathyrella,项目名称:partis,代码行数:12,代码来源:glutils.py


示例15: read_allele_prevalence_freqs

def read_allele_prevalence_freqs(fname, debug=False):
    # NOTE kinda weird to mash all the regions into one file here (as compared to parametercounter), but it seems to make more sense
    allele_prevalence_freqs = {r: {} for r in utils.regions}
    with open(fname) as pfile:
        reader = csv.DictReader(pfile)
        for line in reader:
            allele_prevalence_freqs[utils.get_region(line["gene"])][line["gene"]] = float(line["freq"])
    for region in utils.regions:
        if len(allele_prevalence_freqs[region]) == 0:
            continue
        if debug:
            for gene, freq in allele_prevalence_freqs[region].items():
                print "%14.8f   %s" % (freq, utils.color_gene(gene))
        assert utils.is_normed(allele_prevalence_freqs[region])
    return allele_prevalence_freqs
开发者ID:psathyrella,项目名称:partis,代码行数:15,代码来源:glutils.py


示例16: check

    def check(self):
        total = 0.0
        for _, prob in self.transitions.iteritems():
            assert prob >= 0.0
            total += prob
        assert utils.is_normed(total)

        if self.name == 'init':  # no emissions for 'init' state
            return

        if self.emissions is not None:
            total = 0.0
            for _, prob in self.emissions['probs'].iteritems():
                assert prob >= 0.0
                total += prob
            assert utils.is_normed(total)

        if self.pair_emissions is not None:
            total = 0.0
            for letter1 in self.pair_emissions['probs']:
                for _, prob in self.pair_emissions['probs'][letter1].iteritems():
                    assert prob >= 0.0
                    total += prob
            assert utils.is_normed(total)
开发者ID:matsengrp,项目名称:bioboxpartis,代码行数:24,代码来源:hmmwriter.py


示例17: add_region_entry_transitions

    def add_region_entry_transitions(self, state, insertion):
        """
        Add transitions *into* the v, d, or j regions. Called from either the 'init' state or the 'insert_left' state.
        For v, this is (mostly) the prob that the read doesn't extend all the way to the left side of the v gene.
        For d and j, this is (mostly) the prob to actually erode on the left side.
        The two <mostly>s are there because in both cases, we're starting from *approximate* smith-waterman alignments, so we need to add some fuzz in case the s-w is off.
        """
        assert 'jf' not in insertion  # need these to only be *left*-hand insertions
        assert state.name == 'init' or 'insert' in state.name

        region_entry_prob = 0.0  # Prob to go directly into the region (i.e. with no insertion)
                                 # The sum of the region entry probs must be (1 - non_zero_insertion_prob) for d and j
                                 # (i.e. such that [prob of transitions to insert] + [prob of transitions *not* to insert] is 1.0)

        # first add transitions to the insert state
        if state.name == 'init':
            if insertion == '':
                region_entry_prob = 1.0  # if no insert state on this side (i.e. we're on left side of v), we have no choice but to enter the region (the internal states)
            else:
                region_entry_prob = self.insertion_probs[insertion][0]  # prob of entering the region from 'init' is the prob of a zero-length insertion
        elif 'insert' in state.name:
            region_entry_prob = 1.0 - self.get_insert_self_transition_prob(insertion)  # the 'insert_left' state has to either go to itself, or else enter the region
        else:
            assert False

        # If this is an 'init' state, we add a transition to 'insert' with probability the observed probability of a non-zero insertion
        # Whereas if this is an 'insert' state, we add a *self*-transition with probability 1/<mean observed insert length>
        # update: now, we also multiply by the insertion content prob, since we now have four insert states (and can thus no longer use this prob in the emissions)
        if insertion != '':
            for nuke in utils.nukes:
                state.add_transition('insert_left_' + nuke, (1.0 - region_entry_prob) * self.insertion_content_probs[insertion][nuke])

        # then add transitions to the region's internal states
        erosion = self.region + '_5p'
        total = 0.0
        for inuke in range(len(self.germline_seq)):
            erosion_length = inuke
            if erosion_length in self.erosion_probs[erosion]:
                prob = self.erosion_probs[erosion][erosion_length]
                total += prob * region_entry_prob
                if region_entry_prob != 0.0:  # only add the line if there's a chance of entering the region from this state
                    state.add_transition('%s_%d' % (self.saniname, inuke), prob * region_entry_prob)
                    if self.smallest_entry_index == -1 or inuke < self.smallest_entry_index:
                        self.smallest_entry_index = inuke
                else:
                    assert state.name == 'init'  # if there's *no* chance of entering the region, this better *not* be the 'insert_left' state

        assert region_entry_prob == 0.0 or utils.is_normed(total / region_entry_prob)
开发者ID:matsengrp,项目名称:bioboxpartis,代码行数:48,代码来源:hmmwriter.py


示例18: read_insertion_content

    def read_insertion_content(self):
        self.insertion_content_probs = {}
        for bound in utils.boundaries:
            self.insertion_content_probs[bound] = {}
            with opener('r')(self.args.parameter_dir + '/' + bound + '_insertion_content.csv') as icfile:
                reader = csv.DictReader(icfile)
                total = 0
                for line in reader:
                    self.insertion_content_probs[bound][line[bound + '_insertion_content']] = int(line['count'])
                    total += int(line['count'])
                for nuke in utils.nukes:
                    if nuke not in self.insertion_content_probs[bound]:
                        print '    %s not in insertion content probs, adding with zero' % nuke
                        self.insertion_content_probs[bound][nuke] = 0
                    self.insertion_content_probs[bound][nuke] /= float(total)

            assert utils.is_normed(self.insertion_content_probs[bound])
开发者ID:antibodyome,项目名称:partis,代码行数:17,代码来源:recombinator.py


示例19: normalize

 def normalize(self):
     sum_value = 0.0
     for ib in range(1, self.n_bins + 1):  # don't include under/overflows in sum_value
         sum_value += self.bin_contents[ib]
     if sum_value == 0.0:
         print 'WARNING sum zero in Hist::normalize, returning without doing anything'
         return
     # make sure there's not too much stuff in the under/overflows
     if self.bin_contents[0]/sum_value > 1e-10 or self.bin_contents[self.n_bins+1]/sum_value > 1e-10:
         print 'WARNING under/overflows'
     for ib in range(1, self.n_bins + 1):
         self.bin_contents[ib] /= sum_value
         if self.sum_weights_squared is not None:
             self.sum_weights_squared[ib] /= sum_value*sum_value
     check_sum = 0.0
     for ib in range(1, self.n_bins + 1):  # check it
         check_sum += self.bin_contents[ib]
     assert is_normed(check_sum, this_eps=1e-10)
开发者ID:matsengrp,项目名称:bioboxpartis,代码行数:18,代码来源:hist.py


示例20: get_rescaled_trees

 def get_rescal 

鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python utils.is_prime函数代码示例发布时间:2022-05-26
下一篇:
Python utils.is_name_sane函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap