• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python multicomp.multipletests函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中statsmodels.sandbox.stats.multicomp.multipletests函数的典型用法代码示例。如果您正苦于以下问题:Python multipletests函数的具体用法?Python multipletests怎么用?Python multipletests使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了multipletests函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: calc_overlap_stats

def calc_overlap_stats(test_set, geneset_dict, total_genes):
    """ get the overlaps and compute hypergeometric stats""" 

    overlaps = get_overlaps(test_set, geneset_dict)

    p = overlaps.apply( lambda x: (
      scipy.stats.hypergeom.sf(
          x.ix["match_count"]-1, # number of differentially expressed genes in set
          total_genes,           # total number of genes
          x.ix["size of set"],   # number of genes in current set
          len( test_set ))),     # total number of genes in test set
                               axis=1)
    p = pd.DataFrame(p, columns=["hypergeom p-val"])
    
    overlaps = overlaps.select(lambda x: overlaps.ix[x, "match_count"] > 0)
    overlaps = overlaps.merge(p, 
                              left_index=True, 
                              right_index=True).sort("hypergeom p-val", ascending=True)

    if len(overlaps.index) > 0:
        overlaps["bonferroni"] = multicomp.multipletests(overlaps.ix[:,"hypergeom p-val"], 
                                                         method="bonferroni")[1]
        overlaps["b-h fdr adj pval"] = multicomp.multipletests(
            overlaps.ix[:,"hypergeom p-val"].fillna(1.0), 
            method="fdr_bh")[1]

    return overlaps.sort("hypergeom p-val", ascending=True)
开发者ID:aheilbut,项目名称:alib,代码行数:27,代码来源:alg.py


示例2: adjustPvalue

    def adjustPvalue(s, preAdjusted, layoutAware):

        if layoutAware:
            idx = 2
        else:
            idx = 1

        try:
            # Some hosts do not have this library. If not we don't adjust
            import statsmodels.sandbox.stats.multicomp as multicomp
            adjust = True

        except Exception:
            adjust = False

        with open(s.file, 'w') as f:
            f = csv.writer(f, delimiter='\t')

            preAdjVals = []
            for i, row in enumerate(preAdjusted):

                if not adjust:

                    # No adjustment will happen so just write a NaN value to
                    # the file so the UI won't try to display this value
                    f.writerow(row + [float('NaN')])
                    continue

                # Extract the p-values from the data.
                # Translate NaNs to one so the stats routine will take it.
                if math.isnan(row[idx]):
                    preAdjVals.append(1)
                else:
                    preAdjVals.append(row[idx])
            
            if not adjust:
                return

            try:
                # Benjamini-Hochberg FDR correction for p-values returns:
                #   [reject, p_vals_corrected, alphacSidak, alphacBonf]
                # http://statsmodels.sourceforge.net/devel/generated/statsmodels.sandbox.stats.multicomp.multipletests.html
                reject, adjPvals, alphacSidak, alphacBonf = multicomp.multipletests(preAdjVals, alpha=0.05, method='fdr_bh')

            except Exception:
                adjPvals = [1 for x in preAdjusted]

            try:
                # Bonferroni correction for p-values returns:
                #   [reject, p_vals_corrected, alphacSidak, alphacBonf]
                # http://statsmodels.sourceforge.net/devel/generated/statsmodels.sandbox.stats.multicomp.multipletests.html
                reject, adjPvalsB, alphacSidak, alphacBonf = multicomp.multipletests(preAdjVals, alpha=0.05, method='bonferroni')

            except Exception:
                adjPvalsB = [1 for x in preAdjusted]

            for i, row in enumerate(preAdjusted):
                f.writerow(row + [sigDigs(adjPvals[i]), sigDigs(adjPvalsB[i])])
开发者ID:ucscHexmap,项目名称:hexagram,代码行数:58,代码来源:statsLayer.py


示例3: __enrich_counts

def __enrich_counts(db, to_r, query, pval_cutoff):
    to_r["pval"] = to_r.apply(compute_p, axis=1, M=db.bicluster_info.count(), N=query.shape[0])
    to_r["qval_BH"] = multipletests(to_r.pval, method='fdr_bh')[1]
    to_r["qval_bonferroni"] = multipletests(to_r.pval, method='bonferroni')[1]
    to_r = to_r.sort_values(["pval","counts"], ascending=True)

    # only return below pval cutoff
    to_r = to_r.loc[to_r.pval <= pval_cutoff, :]
    to_r.index = map(int, to_r.index)  # make sure GRE ids are integers
    return to_r
开发者ID:baliga-lab,项目名称:egrin2-tools,代码行数:10,代码来源:egrin2_query.py


示例4: celltype_overrepresntation_list

    def celltype_overrepresntation_list(self, enrichmentdf):
        '''
        This method will save the result of significance in one DF.
        '''
        significance = 1
        column = ['celltype', 'gene', 'enrichment', 'binom_pval', 'FDR']
        cellgenedf = pd.DataFrame()
        #print(self.binom_pval_df.head())
        for gene, celltype in self.binom_pval_df.iterrows():
            for cell, pval in celltype.iteritems():
                if pval < significance:
                    cellgenedf = cellgenedf.append(
                        pd.Series([cell, gene, enrichmentdf.loc[gene, cell], pval, 0]), ignore_index=True)
        #print cellgenedf.head(10)
        cellgenedf.columns = column
        cellgenedf = cellgenedf.sort_values(['celltype', 'binom_pval'], ascending=[True, True])
        cellgenedf.index = range(len(cellgenedf))

        pvals = cellgenedf['binom_pval'].values
        corr_pvals = statsmodels.multipletests(pvals=pvals, alpha=0.05, method='fdr_bh')
        #print(pvals)
        #print(corr_pvals)
        cellgenedf['FDR'] = 0
        cellgenedf['FDR'] = corr_pvals[1]
        '''
        for ind, row in cellgenedf.iterrows():
            fdr = (row['Binom p-val'] * len(cellgenedf)) / (ind + 1)
            cellgenedf.iloc[ind, 4] = fdr
        '''
        print('cellgenedf shape:', cellgenedf.shape)
        #cellgenedf = self.filter_df(cellgenedf)
        self.cellgenedf = cellgenedf
        print('cellgenedf shape after:', cellgenedf.shape)
        #self.filter_cellgenedf()  # Filter single cell multigene enrihment
        self.overall_significant_celltypes()
开发者ID:peeyushsahu,项目名称:analysisTools,代码行数:35,代码来源:SignificanceTesting.py


示例5: binom_significant_celltypes

    def binom_significant_celltypes(self):
        '''
        Binomial test for significance of celltype enrichment.
        '''
        print('Testing celltype enrichment....')
        sigcelltype = self.sigCelltypedf
        cellgroup = self.cellgenedf.groupby(self.cellgenedf['celltype'])

        binom_prob_occu = self.binom_prob_occu

        sigcelltype.loc[:, 'binom_pval'] = 1
        col = sigcelltype.columns.get_loc('binom_pval')
        for index, row in sigcelltype.iterrows():
            #print(row['celltype'])
            #print(row['genecluster'], totalgenes, len(cellgroup.get_group(row['celltype'])), allsiggenes)

            bprob_ind = binom_prob_occu[binom_prob_occu['celltype'] == row['celltype']].index[0]
            #print(bprob_ind)
            background_prob = binom_prob_occu.loc[bprob_ind, 'background_prob']
            #print(background_prob)
            binom_pval = stats.binom_test(row['genecluster']-1, len(cellgroup.get_group(row['celltype'])), background_prob, alternative='two-sided')
            sigcelltype.iloc[index, col] = binom_pval

        sigcelltype.loc[:, 'binom_FDR'] = 1
        sigcelltype = sigcelltype.sort_values('binom_pval', ascending=True)
        sigcelltype.index = range(len(sigcelltype))

        pvals = sigcelltype['binom_pval'].values
        corr_pvals = statsmodels.multipletests(pvals=pvals, alpha=0.05, method='fdr_bh')
        #print(pvals)
        #print(corr_pvals)
        sigcelltype['binom_FDR'] = corr_pvals[1]
        self.sigCelltypedf = sigcelltype
开发者ID:peeyushsahu,项目名称:analysisTools,代码行数:33,代码来源:SignificanceTesting.py


示例6: test_multi_pvalcorrection

def test_multi_pvalcorrection():
    # test against R package multtest mt.rawp2adjp
    # because of sort this doesn't check correct sequence - TODO: rewrite DONE
    rmethods = {
        "rawp": (0, "pval"),
        "Bonferroni": (1, "b"),
        "Holm": (2, "h"),
        "Hochberg": (3, "sh"),
        "SidakSS": (4, "s"),
        "SidakSD": (5, "hs"),
        "BH": (6, "fdr_i"),
        "BY": (7, "fdr_n"),
    }

    for k, v in rmethods.items():
        if v[1] in ["b", "s", "sh", "hs", "h", "fdr_i", "fdr_n"]:
            # pvalscorr = np.sort(multipletests(pval0, alpha=0.1, method=v[1])[1])
            r_sortindex = [6, 8, 9, 7, 5, 1, 2, 4, 0, 3]
            pvalscorr = multipletests(pval0, alpha=0.1, method=v[1])[1][r_sortindex]
            assert_almost_equal(pvalscorr, res_multtest[:, v[0]], 15)

    pvalscorr = np.sort(fdrcorrection0(pval0, method="n")[1])
    assert_almost_equal(pvalscorr, res_multtest[:, 7], 15)
    pvalscorr = np.sort(fdrcorrection0(pval0, method="i")[1])
    assert_almost_equal(pvalscorr, res_multtest[:, 6], 15)
开发者ID:CRP,项目名称:statsmodels,代码行数:25,代码来源:test_multi.py


示例7: run

 def run(study, pop, gene_set, adjust='fdr_bh'):
     '''
     Run a Over-represent analysis toward a gene set
     
     :param study: the significant gene set
     :param pop:  the background gene set
     :param gene_set: the function set
     :param adjust: the adjust method in the multiple tests, 
         details in http://www.statsmodels.org/dev/generated/statsmodels.sandbox.stats.multicomp.multipletests.html
     :return: the ORA analysis result
     '''
     gene_sets = gene_set if type(gene_set) == dict else GMTUtils.parse_gmt_file(gene_set)
     mapped = {k: list(set(v) & set([str(x) for x in pop])) for k, v in gene_sets.items()}
     s_mapped = {k: list(set(v) & set([str(x) for x in study])) for k, v in gene_sets.items()}
     result = {}
     for k, v in mapped.items():
         result[k] = stats.hypergeom.sf(len(s_mapped[k]) - 1, len(pop), len(mapped[k]), len(study))
     _, o, _, _ = multicomp.multipletests(list(result.values()), method=adjust)
     rfdr = {list(result.keys())[i]: o[i] for i in range(len(list(result.keys())))}
     # !
     df_result = {'name': [], 'mapped': [], 'number in study': [], 'p-value': [], 'fdr': []}
     for k, v in mapped.items():
         df_result['name'].append(k)
         df_result['mapped'].append(len(mapped[k]))
         df_result['number in study'].append(len(s_mapped[k]))
         df_result['p-value'].append(result[k])
         df_result['fdr'].append(rfdr[k])
     df = pd.DataFrame(df_result)
     df = df[['name', 'mapped', 'number in study', 'p-value', 'fdr']]
     return ORA(df, study, pop, adjust)
开发者ID:mofiarska,项目名称:PyPathway,代码行数:30,代码来源:__init__.py


示例8: combine

	def combine(self, results):
		"""
		Stouffer combination of zscores
		:param results:
		:return:
		"""

		zscores = results.sum(axis=1) / np.sqrt(results.count(axis=1))

		size = zscores.size
		is_nan = zscores.mask
		valid_indices = np.where(~is_nan)
		invalid_indices = np.where(is_nan)

		pv = stats.norm.sf(zscores[valid_indices])

		pvalues = np.empty(size)
		pvalues[valid_indices] = pv
		pvalues[invalid_indices] = np.nan

		if pv.size != 0:
			qv = multipletests(pv, method='fdr_bh')[1]
		else:
			qv = np.array([])
		qvalues = np.empty(size)
		qvalues[valid_indices] = qv
		qvalues[invalid_indices] = np.nan

		return np.array([zscores, pvalues, qvalues])
开发者ID:chris-zen,项目名称:phd-thesis,代码行数:29,代码来源:zscore.py


示例9: stat_test

def stat_test(f, test_name, test, fdr):
    print('Testing', test_name, f, 'fdr', fdr)
    df = pd.read_csv(f, sep='\t')
    # Drop contigs
    df = df.loc[[bool(re.match('chr[0-9XYM]+$', c)) for c in df['chr']]]
    ods = [c for c in df.columns.values if is_od(c)]
    yds = [c for c in df.columns.values if is_yd(c)]
    pvals = np.array([test(row[ods], row[yds]) for _, row in df.iterrows()])
    res = multipletests(pvals, fdr, "fdr_bh")
    h0_rejects = res[0]
    pvals_adj = res[1]
    df['pval'] = pvals
    df['pval_adj'] = pvals_adj
    df['od_mean'] = df[ods].mean(axis=1).to_frame('od_mean')['od_mean']
    df['yd_mean'] = df[yds].mean(axis=1).to_frame('yd_mean')['yd_mean']
    df['logfc'] = np.log(df['od_mean'] / df['yd_mean'])
    # Sort by pvalue
    pvals_order = pvals.argsort()
    df = df.loc[pvals_order]
    h0_rejects = h0_rejects[pvals_order]

    # Save results
    results = re.sub(r'\.tsv', '_{}.tsv'.format(test_name), f)
    df[['chr', 'start', 'end', 'yd_mean', 'od_mean', 'logfc', 'pval', 'pval_adj']] \
        .to_csv(results, sep='\t', index=None, header=True)
    print('Saved test results to', results)

    # Save significant results
    if sum(h0_rejects) > 0:
        results_fdr = re.sub(r'\.tsv', '_{}_diff_fdr_{}.bed'.format(test_name, fdr), f)
        df.loc[h0_rejects][['chr', 'start', 'end']] \
            .to_csv(results_fdr, sep='\t', index=None, header=True)
        print('Saved {} significant results at FDR={} to {}'.format(
            sum(h0_rejects), fdr, results_fdr))
开发者ID:olegs,项目名称:washu,代码行数:34,代码来源:signals_tests.py


示例10: combine

	def combine(self, results):
		"""
		Fisher's combination of pvalues
		:param results:
		:return:
		"""

		results = np.copy(results)
		results[results < PVALUE_EPSILON] = PVALUE_EPSILON

		log = np.ma.log(results)
		s = log.sum(axis=1)
		count = log.count(axis=1)

		size = s.size
		is_nan = s.mask
		valid_indices = np.where(~is_nan)
		invalid_indices = np.where(is_nan)

		pv = 1.0 - stats.chi2.cdf(-2.0 * s[valid_indices], 2 * count[valid_indices])
		pvalues = np.empty(size)
		pvalues[valid_indices] = pv
		pvalues[invalid_indices] = np.nan

		if pv.size != 0:
			qv = multipletests(pv, method='fdr_bh')[1]
		else:
			qv = np.array([])
		qvalues = np.empty(size)
		qvalues[valid_indices] = qv
		qvalues[invalid_indices] = np.nan

		return np.array([pvalues, qvalues])
开发者ID:chris-zen,项目名称:phd-thesis,代码行数:33,代码来源:empirical.py


示例11: perform_multiple_comparison_stat

def perform_multiple_comparison_stat(data1,data2, alpha=0.05):
    """

    :param data1:
    :param data2:
    :return: True if they are statistically different
    """
    mat1 = np.array(data1)
    mat2 = np.array(data2)
    comparisons = len(data1[0])
    pvals = [ttest_ind(mat1[:,i].tolist(),mat2[:,i])[1] for i in range(comparisons)]

    mult_comparison = multipletests(pvals, alpha=alpha)
    #print(mult_comparison)
    print(mult_comparison[0])
    """Version where just once is enough
    for val in mult_comparison[0]:
        if val == True:
            return True
    return False
    """
    # Version where the number of trues must exceed alpha (useful when you have A LOT of elements)
    true_counter = 0
    for val in mult_comparison[0]:
        if val == True:
            true_counter += 1

    return True if true_counter/len(mult_comparison[0]) >= alpha else False
开发者ID:CompBioUIC,项目名称:Mingle,代码行数:28,代码来源:classifiers.py


示例12: hypergeometric_significant_celltypes

    def hypergeometric_significant_celltypes(self):
        '''
        hypergeometric test for significance of celltype enrichment.
        '''
        print('Testing celltype enrichment....')
        sigcelltype = self.sigCelltypedf
        cellgroup = self.cellgenedf.groupby(self.cellgenedf['celltype'])
        totalgenes = self.occurrencedf.shape[0]

        allsiggenes = self.cellgenedf
        allsiggenes = allsiggenes[allsiggenes['FDR'] <= 0.05]
        allsiggenes = len(set(allsiggenes['gene']))

        sigcelltype.loc[:, 'hyper_pval'] = 1
        col = sigcelltype.columns.get_loc('hyper_pval')
        for index, row in sigcelltype.iterrows():
            #print(row['celltype'])
            #print(row['genecluster'], totalgenes, len(cellgroup.get_group(row['celltype'])), allsiggenes)
            ## stats.hypergeom.sf(x, M, n, N)
            hyper_pval = stats.hypergeom.sf(row['genecluster']-1, totalgenes, allsiggenes, len(cellgroup.get_group(row['celltype'])))
            #print(hyper_pval)
            sigcelltype.iloc[index, col] = hyper_pval

        sigcelltype.loc[:, 'hyper_FDR'] = 1
        #ind_fdr = sigcelltype.columns.get_loc('hyper_FDR')
        sigcelltype = sigcelltype.sort_values('hyper_pval', ascending=True)
        sigcelltype.index = range(len(sigcelltype))

        pvals = sigcelltype['hyper_pval'].values
        corr_pvals = statsmodels.multipletests(pvals=pvals, alpha=0.05, method='fdr_bh')
        sigcelltype['hyper_FDR'] = corr_pvals[1]
        self.sigCelltypedf = sigcelltype
开发者ID:peeyushsahu,项目名称:analysisTools,代码行数:32,代码来源:SignificanceTesting.py


示例13: calculate_gene_expression_similarity

def calculate_gene_expression_similarity(reduced_stat_map_data, mask="full"):
    store_file = "/ahba_data/store_max1_reduced.h5"
    subcortex_mask = "/ahba_data/subcortex_mask.npy"

    results_dfs = []
    with pd.HDFStore(store_file, 'r') as store:
        for donor_id in store.keys():
            print "Loading expression data (%s)" % donor_id
            expression_data = store.get(donor_id.replace(".", "_"))

            print "Getting statmap values (%s)" % donor_id
            nifti_values = reduced_stat_map_data[expression_data.columns]

            print "Removing missing values (%s)" % donor_id
            na_mask = np.isnan(nifti_values)
            if mask == "subcortex":
                na_mask = np.logical_or(na_mask,
                    np.isnan(np.load(subcortex_mask)[expression_data.columns]))
            elif mask == "cortex":
                na_mask = np.logical_or(na_mask, np.logical_not(np.isnan(
                    np.load(subcortex_mask)[expression_data.columns])))
            else:
                assert mask == "full"

            nifti_values = np.array(nifti_values)[np.logical_not(na_mask)]
            expression_data.drop(expression_data.columns[na_mask], axis=1, inplace=True)

            print "z scoring (%s)" % donor_id
            expression_data = pd.DataFrame(zscore(expression_data, axis=1), columns=expression_data.columns,
                                           index=expression_data.index)
            nifti_values = zscore(nifti_values)

            print "Calculating linear regressions (%s)" % donor_id
            regression_results = np.linalg.lstsq(np.c_[nifti_values, np.ones_like(nifti_values)], expression_data.T)
            results_df = pd.DataFrame({"slope": regression_results[0][0]}, index=expression_data.index)

            results_df.columns = pd.MultiIndex.from_tuples([(donor_id[1:], c,) for c in results_df.columns],
                                                           names=['donor_id', 'parameter'])

            results_dfs.append(results_df)

        print "Concatenating results"
        results_df = pd.concat(results_dfs, axis=1)
        del results_dfs

    t, p = ttest_1samp(results_df, 0.0, axis=1)
    group_results_df = pd.DataFrame({"t": t, "p": p}, columns=['t', 'p'], index=expression_data.index)
    _, group_results_df["p (FDR corrected)"], _, _ = multipletests(group_results_df.p, method='fdr_bh')
    group_results_df["variance explained (mean)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).mean(axis=1)
    group_results_df["variance explained (std)"] = (results_df.xs('slope', axis=1, level=1) ** 2 * 100).std(axis=1)
    del results_df
    probe_info = pd.read_csv("/ahba_data/probe_info_max1.csv", index_col=0).drop(['chromosome', "gene_id"], axis=1)
    group_results_df = group_results_df.join(probe_info)
    group_results_df = group_results_df[["gene_symbol", "entrez_id.1", "gene_name","t", "p", "p (FDR corrected)",
                                         "variance explained (mean)", "variance explained (std)"]]

    return group_results_df
开发者ID:NeuroVault,项目名称:NeuroVault,代码行数:57,代码来源:ahba.py


示例14: calculate_quantile_pvalue

 def calculate_quantile_pvalue(
         self, quantile, minvalues=10
     ):
     # Check arguments
     if isinstance(quantile, float):
         quantile = [quantile]
     elif isinstance(quantile, list):
         for q in quantile:
             if not isinstance(q, float):
                 raise TypeError('quantile list must contain floats')
     else:
         raise TypeError('quantile must be float or list of floats')
     # Create colnames for output dataframe
     colNames = []
     for condition in self.matrices:
         for sample in self.matrices[condition]:
             colNames.append('{}_{}_no'.format(condition, sample))
             colNames.append('{}_{}_mean'.format(condition, sample))
     for condition in self.matrices:
         colNames.append('{}_no'.format(condition))
         colNames.append('{}_mean'.format(condition))
     colNames.extend(['pvalue', 'fdr'])
     # Create output dataframe
     outDF = pd.DataFrame(index=quantile, columns=colNames)
     outDF = outDF.sort_index()
     # Extract quantile distance data
     quantData = self.extract_dist_quantile(quantile)
     splitQuant = quantData.groupby('quan')
     for q, data in splitQuant:
         # Extract data for conditions and samples
         condValues = []
         for cond in self.matrices:
             # Extract data for condition
             condData = data[data['cond'] == cond]
             condDist = condData['dist']
             condValues.append(condDist)
             # Add condition data to output
             colPrefix = '{}_'.format(cond)
             outDF.loc[q, colPrefix + 'no'] = condDist.size
             outDF.loc[q, colPrefix + 'mean'] = condDist.mean()
             for smpl in self.matrices[cond]:
                 # Extract data for sample
                 smplData = condData[condData['smpl'] == smpl]
                 smplDist = smplData['dist']
                 # Add sample data to output
                 colPrefix = '{}_{}_'.format(cond, smpl)
                 outDF.loc[q, colPrefix + 'no'] = smplDist.size
                 outDF.loc[q, colPrefix + 'mean'] = smplDist.mean()
         # Calculate pvalues
         dist1, dist2 = condValues
         if dist1.size >= minvalues and dist2.size >= minvalues:
             outDF.loc[q, 'pvalue'] = mannwhitneyu(dist1, dist2)[1]
     # Add fdr and return
     pvalueIndex = outDF.index[~outDF['pvalue'].isnull()]
     outDF.loc[pvalueIndex, 'fdr'] = multipletests(
         outDF.loc[pvalueIndex, 'pvalue'], method='fdr_bh')[1]
     return(outDF)
开发者ID:adam-rabinowitz,项目名称:ngs_python,代码行数:57,代码来源:analyseInteraction.py


示例15: bhCorrection

def bhCorrection(s, n=None):
    s = s.fillna(1.)
    if n > len(s):
        p_vals = list(s) + [1] * (n - len(s))
    else:
        p_vals = list(s)
    q = multicomp.multipletests(p_vals, method='fdr_bh')[1][:len(s)]
    q = pd.Series(q[:len(s)], s.index, name='p_adj')
    return q
开发者ID:cdeboever3,项目名称:TCGA-1,代码行数:9,代码来源:Helpers.py


示例16: adjust_score

    def adjust_score(self, score):
        """
        Returns a list of adjusted p-values. Currently only the Benjamini-Hochberg method is supported.

        :param score: the list of p-values to adjust
        :return: the list of adjusted p-values
        """
        if self.args_dict['pvaladjust'] is None:
            return score
        else:
            return multipletests(score, alpha=self.args_dict['threshold'], method=self.args_dict['pvaladjust'])[1]
开发者ID:Talleyman,项目名称:Validate-Master2,代码行数:11,代码来源:winnow.py


示例17: qvalues

 def qvalues(self, below=0.1):
     if not self._pvalues:
         self.pvalues()
     pvals = [x[1] for x in self._pvalues]
     qvals = list(multipletests(pvals, method='fdr_bh')[1])
     res = [(q,p,x) for (q, (x,p)) in zip(qvals, self._pvalues)
         if q < below]
     self._qvalues = res
     log.notice('got %d peaks with qvalue below %.2f. From %d possible.' % (
         len(res), below, len(pvals)))
     return res
开发者ID:CollasLab,项目名称:edd,代码行数:11,代码来源:max_segments.py


示例18: bhCorrection

def bhCorrection(s, n=None):
    """
    Benjamini-Hochberg correction for a Series of p-values.
    """
    s = s.fillna(1.)
    if n > len(s):
        p_vals = list(s) + [1] * (n - len(s))
    else:
        p_vals = list(s)
    q = multicomp.multipletests(p_vals, method='fdr_bh')[1][:len(s)]
    q = pd.Series(q[:len(s)], s.index, name='p_adj')
    return q
开发者ID:theandygross,项目名称:Stats,代码行数:12,代码来源:Pandas.py


示例19: pvalues

def pvalues(vect, vals, side="two-sided", significance_threshold=0.05, multi_tests_cor_method="fdr_bh"):
    """
    Computes the pvalue of a given values in a vector.

    :param vect: the vector used to compute the pvalue
    :param vals: the values used to compute the pvalue (iterable)
    :param side: the sides to find the pvalue (can be 'two-sided', 'left' of 'right'). Default is 'two-sided'
    :param significance_threshold: the significance threshold. Default is 0.05
    :param multi_tests_cor_method: multiple testing correction method. Default is 'fdr_bh'

    :returns: The multiple testing corrected pvalues results as dictionary key:val, values: {corrected_pvalue, description, corrected_signif}, Sidak corrected significance threshold (alpha), Bonferroni corrected alpha

    :raises: IOError: if vect or val are not of an appropriate type or if side is not one of: 'two-sided', 'left' or 'right'

    .. note::
        multi_tests_cor_method parameter accepts the fllowing values:

        - **bonferroni** : one-step correction
        - **sidak** : one-step correction
        - **holm-sidak** : step down method using Sidak adjustments
        - **holm** : step-down method using Bonferroni adjustments
        - **simes-hochberg** : step-up method  (independent)
        - **hommel** : closed method based on Simes tests (non-negative)
        - **fdr_bh** : Benjamini/Hochberg  (non-negative)
        - **fdr_by** : Benjamini/Yekutieli (negative)
        - **fdr_tsbh** : two stage fdr correction (non-negative)
        - **fdr_tsbky** : two stage fdr correction (non-negative)

    """
    pvals = [None] * len(vals)
    for i in range(len(vals)):
        val = vals[i]
        p, d, s = pvalue(vect, val, side=side, significance_threshold=significance_threshold)
        pvals[i] = dict(pvalue=p, description=d, significance=s)
    # Multiple testing correction
    if multi_tests_cor_method is not None:
        s, cp, alphacSidak, alphacBonf = multipletests(
            [pvals[i]["pvalue"] for i in range(len(vals))], alpha=significance_threshold, method=multi_tests_cor_method
        )

        for i in range(len(vals)):
            pvals[i]["uncorrected_pvalue"] = pvals[i]["pvalue"]
            pvals[i]["pvalue"] = cp[i]
            pvals[i]["significance"] = s[i]
            d = pvals[i]["description"]
            pvals[i]["description"] = "%s %.3e" % (d[: d.rindex("=") + 1], cp[i])

        if multi_tests_cor_method == "bonferroni":
            return pvals, alphacSidak
        if multi_tests_cor_method == "sidak" or multi_tests_cor_method == "holm-sidak":
            return pvals, alphacSidak
    return pvals
开发者ID:pydupont,项目名称:PersonalPythonLibs,代码行数:52,代码来源:pvalue.py


示例20: calculate_dist_pvalue

 def calculate_dist_pvalue(self, rmzero=True, minvalues=10):
     # Extract distances for input matrices
     distProb = self.extract_dist_prob()
     splitDist = distProb.groupby('dist')
     # Create output columns
     colNames = []
     for condition in self.matrices:
         for sample in self.matrices[condition]:
             colNames.append('{}_{}_no'.format(condition, sample))
             colNames.append('{}_{}_mean'.format(condition, sample))
     for condition in self.matrices:
         colNames.append('{}_no'.format(condition))
         colNames.append('{}_mean'.format(condition))
     colNames.extend(['pvalue', 'fdr'])
     # Create output dataframe
     outDF = pd.DataFrame(
         columns = colNames, index = splitDist.groups.keys())
     outDF = outDF.sort_index()
     # Loop through data and calculate results
     for dist, data in splitDist:
         # Remove zero values
         if rmzero:
             data = data[data['prob'] > 0]
         # Extract data for conditions and samples
         condValues = []
         for cond in self.matrices:
             # Extract data for condition
             condData = data[data['cond'] == cond]
             condProb = condData['prob']
             condValues.append(condProb)
             # Add condition data to output
             colPrefix = '{}_'.format(cond)
             outDF.loc[dist, colPrefix + 'no'] = condProb.size
             outDF.loc[dist, colPrefix + 'mean'] = condProb.mean()
             for smpl in self.matrices[cond]:
                 # Extract data for sample
                 smplData = condData[condData['smpl'] == smpl]
                 smplProb = smplData['prob']
                 # Add sample data to output
                 colPrefix = '{}_{}_'.format(cond, smpl)
                 outDF.loc[dist, colPrefix + 'no'] = smplProb.size
                 outDF.loc[dist, colPrefix + 'mean'] = smplProb.mean()
         # Calculate pvalues
         prob1, prob2 = condValues
         if prob1.size >= minvalues and prob2.size >= minvalues:
             outDF.loc[dist, 'pvalue'] = mannwhitneyu(prob1, prob2)[1]
     # Sort data, add fdr and return
     pvalueIndex = outDF.index[~outDF['pvalue'].isnull()]
     outDF.loc[pvalueIndex, 'fdr'] = multipletests(
         outDF.loc[pvalueIndex, 'pvalue'], method='fdr_bh')[1]
     return(outDF)
开发者ID:adam-rabinowitz,项目名称:ngs_python,代码行数:51,代码来源:analyseInteraction.py



注:本文中的statsmodels.sandbox.stats.multicomp.multipletests函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python anova.anova_lm函数代码示例发布时间:2022-05-27
下一篇:
Python predstd.wls_prediction_std函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap