• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python scale.mad函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中statsmodels.robust.scale.mad函数的典型用法代码示例。如果您正苦于以下问题:Python mad函数的具体用法?Python mad怎么用?Python mad使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了mad函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: _estimate_scale

 def _estimate_scale(self, resid):
     """
     Estimates the scale based on the option provided to the fit method.
     """
     if isinstance(self.scale_est, str):
         if self.scale_est.lower() == "mad":
             return scale.mad(resid, center=0)
         if self.scale_est.lower() == "stand_mad":
             return scale.mad(resid)
         else:
             raise ValueError("Option %s for scale_est not understood" % self.scale_est)
     elif isinstance(self.scale_est, scale.HuberScale):
         return self.scale_est(self.df_resid, self.nobs, resid)
     else:
         return scale.scale_est(self, resid) ** 2
开发者ID:eph,项目名称:statsmodels,代码行数:15,代码来源:robust_linear_model.py


示例2: _prefilter_windows_deseq

 def _prefilter_windows_deseq(self, df):
     print("Removing windows where not all experiment libs show "
           "expression from DataFrame with {} rows...".format(len(df)),
           flush=True)
     t_start = time()
     for exp_lib in self._exp_lib_list:
         exp_lib_zero_count = 0.0
         df = df.loc[(df.loc[:, exp_lib] > exp_lib_zero_count), :]
     t_end = time()
     print("Removal took {} seconds. DataFrame contains now {} rows.".
           format((t_end-t_start), len(df)), flush=True)
     if df.empty:
         return df
     initial_window_df = df.copy()
     # normalize counts on initial windows
     initial_window_df[self._lib_names_list] = initial_window_df[
         self._lib_names_list].div(self._size_factors, axis='columns')
     # minimum expression cutoff based on mean over experiment libraries
     print("Removing windows based on mad cutoff from DataFrame "
           "with {} rows...".format(len(df)), flush=True)
     t_start = time()
     median_abs_dev_from_zero = mad(initial_window_df.loc[
         :, self._exp_lib_list].mean(axis=1), center=0.0)
     min_expr = (self._mad_multiplier * median_abs_dev_from_zero)
     print("Minimal window expression based on mean over RIP/CLIP "
           "libraries: {} (MAD from zero: {})".format(
               min_expr, median_abs_dev_from_zero), flush=True)
     df = df.loc[initial_window_df.loc[:, self._exp_lib_list].mean(
         axis=1) >= min_expr, :]
     t_end = time()
     print("Removal took {} seconds. DataFrame contains now {} rows.".
           format((t_end-t_start), len(df)), flush=True)
     return df
开发者ID:tbischler,项目名称:PEAKachu,代码行数:33,代码来源:window.py


示例3: mad_outliers

def mad_outliers(data, genes, threshold, percentile=95, as_json=True):
    res = dr.get_dataset_ensembl_info()
    outliers_id = []
    if as_json:
        yield ("{\"outliers\":[")
    for g in genes:
        row_values = data.loc[g, :]
        cut_row_values = row_values
        med = cut_row_values.median()
        row_mad = mad(cut_row_values)

        if row_mad != 0.0:
            filtered = (cut_row_values - med) / row_mad
            support = len(filtered[filtered > threshold])

            if scoreatpercentile(filtered, 95) > threshold:

                info = [gene for gene in res if gene.ensemblgeneid == g][0]
                formatted_info = {"id": g, "name": info.genename, "type": info.genetype, "samples": str(support),
                                  "distance": "NA"}
                jinfo = json.dumps(formatted_info)
                jinfo += ","
                outliers_id.append(g)
                print("outlier found :" + g)
                if as_json:
                    yield (jinfo)
                else:
                    yield (formatted_info)
    if len(outliers_id) > 0:
        pr.save_outliers(1, outliers_id)

    if as_json:
        yield ("]}")
开发者ID:armell,项目名称:RNASEqTool,代码行数:33,代码来源:outliers.py


示例4: reorient_and_discard_non_steady

def reorient_and_discard_non_steady(in_file, float32=False):
    import nibabel as nb
    import os
    import numpy as np
    import nibabel as nb
    from statsmodels.robust.scale import mad

    _, outfile = os.path.split(in_file)

    nii = nb.as_closest_canonical(nb.load(in_file))
    in_data = nii.get_data()

    # downcast to reduce space consumption and improve performance
    if float32 and np.dtype(in_data.dtype).itemsize > 4:
        in_data = in_data.astype(np.float32)

    data = in_data[:, :, :, :50]
    timeseries = data.max(axis=0).max(axis=0).max(axis=0)
    outlier_timecourse = (timeseries - np.median(timeseries)) / mad(
        timeseries)
    exclude_index = 0
    for i in range(10):
        if outlier_timecourse[i] > 10:
            exclude_index += 1
        else:
            break

    nb.Nifti1Image(in_data[:, :, :, exclude_index:], nii.affine, nii.header).to_filename(outfile)
    nii.uncache()
    return exclude_index, os.path.abspath(outfile)
开发者ID:yingqiuz,项目名称:mriqc,代码行数:30,代码来源:misc.py


示例5: artifact_mask

def artifact_mask(imdata, airdata, distance, zscore=10.):
    """Computes a mask of artifacts found in the air region"""
    from statsmodels.robust.scale import mad

    if not np.issubdtype(airdata.dtype, np.integer):
        airdata[airdata < .95] = 0
        airdata[airdata > 0.] = 1

    bg_img = imdata * airdata
    if np.sum((bg_img > 0).astype(np.uint8)) < 100:
        return np.zeros_like(airdata)

    # Find the background threshold (the most frequently occurring value
    # excluding 0)
    bg_location = np.median(bg_img[bg_img > 0])
    bg_spread = mad(bg_img[bg_img > 0])
    bg_img[bg_img > 0] -= bg_location
    bg_img[bg_img > 0] /= bg_spread

    # Apply this threshold to the background voxels to identify voxels
    # contributing artifacts.
    qi1_img = np.zeros_like(bg_img)
    qi1_img[bg_img > zscore] = 1
    qi1_img[distance < .10] = 0

    # Create a structural element to be used in an opening operation.
    struc = nd.generate_binary_structure(3, 1)
    qi1_img = nd.binary_opening(qi1_img, struc).astype(np.uint8)
    qi1_img[airdata <= 0] = 0

    return qi1_img
开发者ID:oesteban,项目名称:mriqc,代码行数:31,代码来源:anatomical.py


示例6: _filter_peaks_without_replicates

 def _filter_peaks_without_replicates(self, df):
     # calculate mad for original data frame
     median_abs_dev_from_zero = mad(df.loc[:, self._exp_lib_list].mean(
         axis=1), center=0.0)
     # minimum expression cutoff based on mean over experiment libraries
     print("Removing peaks based on mad cutoff from DataFrame "
           "with {} rows...".format(len(df)), flush=True)
     t_start = time()
     min_expr = (self._mad_multiplier * median_abs_dev_from_zero)
     print("Minimal peak expression based on mean over RIP/CLIP "
           "libraries:" "{} (MAD from zero: {})".format(
               min_expr, median_abs_dev_from_zero), flush=True)
     df = df.loc[df.loc[:, self._exp_lib_list].mean(axis=1) >= min_expr, :]
     t_end = time()
     print("Removal took {} seconds. DataFrame contains now {} rows.".
           format((t_end-t_start), len(df)), flush=True)
     if df.empty:
         return df
     # minimum fold change
     print("Removing windows based on minimum fold change from DataFrame "
           "with {} rows...".format(len(df)), flush=True)
     t_start = time()
     df = df.query('fold_change >= @self._fc_cutoff')
     t_end = time()
     print("Removal took {} seconds. DataFrame contains now {} rows.".
           format((t_end-t_start), len(df)), flush=True)
     return df
开发者ID:tbischler,项目名称:PEAKachu,代码行数:27,代码来源:adaptive.py


示例7: clipOutlier2D

def clipOutlier2D(arr2D, nSig=10):
    arr2D     = arr2D.copy()
    medArr2D  = median(arr2D,axis=0)
    sclArr2D  = np.sqrt(((scale.mad(arr2D)**2.).sum()))
    outliers  = abs(arr2D - medArr2D) >  nSig*sclArr2D
    inliers   = abs(arr2D - medArr2D) <= nSig*sclArr2D
    arr2D[outliers] = median(arr2D[inliers],axis=0)
    return arr2D
开发者ID:exowanderer,项目名称:ExoplanetTSO,代码行数:8,代码来源:Exoplanet_TSO_-_Photometric_Extraction_Pipeline.py


示例8: cut_modified

def cut_modified(x,q, use_mad_for_std=True):
    try:
        quantiles_in_sigmas = np.asarray(map(normal.ppf, q))
        x_clean = x.dropna()
        mean = np.mean(x_clean)
        std = np.std(x_clean) if not use_mad_for_std else mad(x_clean)
        bins = mean + quantiles_in_sigmas*std
        bins = np.sort(np.append(bins, (x_clean.min()-1E-6, x_clean.max()+1E-6)))
        return pd.cut(x, bins, labels=range(len(bins)-1))
    except ValueError as e:
        return [pd.np.NaN]*len(x)
开发者ID:shirinnj,项目名称:ML-Project,代码行数:11,代码来源:new_partitioning.py


示例9: independent_variable_model_collapse

def independent_variable_model_collapse(model,independent_column_name="Frequency", **options):
    """Returns a model with a single set of independent variables. Default is to average values together
    but geometric mean, std, variance, rss, mad and median are options.
    Geometric means of odd number of negative values fails"""
    if isinstance(model,pandas.DataFrame):
        model_1 = DataFrame_to_AsciiDataTable(model)
    defaults = {"method": "mean"}
    # load other options from model
    for option, value in model.options.items():
        if not re.search('begin_line|end_line', option):
            defaults[option] = value
    for element in model.elements:
        if model.__dict__[element]:
            if re.search("meta", element, re.IGNORECASE):
                defaults["metadata"] = model.metadata.copy()
            else:
                defaults[element] = model.__dict__[element][:]
    # We need to preserve the frequency column some how
    collapse_options = {}
    for key, value in defaults.items():
        collapse_options[key] = value
    for key, value in options.items():
        collapse_options[key] = value
    unique_independent_variable_list = sorted(list(set(model[independent_column_name])))
    independent_variable_selector = model.column_names.index(independent_column_name)
    out_data = []
    for index, independent_variable in enumerate(unique_independent_variable_list):
        data_row = [x for x in model.data[:] if x[independent_variable_selector] == independent_variable]
        if re.search('mean|av', collapse_options["method"], re.IGNORECASE):
            new_row = np.mean(np.array(data_row), axis=0).tolist()
        elif re.search('median', collapse_options["method"], re.IGNORECASE):
            new_row = np.median(np.array(data_row), axis=0).tolist()
        elif re.search('geometric', collapse_options["method"], re.IGNORECASE):
            new_row = gmean(np.array(data_row), axis=0).tolist()
        elif re.search('st', collapse_options["method"], re.IGNORECASE):
            new_row = np.std(np.array(data_row), axis=0).tolist()
        elif re.search('var', collapse_options["method"], re.IGNORECASE):
            new_row = np.var(np.array(data_row), axis=0, dtype=np.float64).tolist()
        elif re.search('rms', collapse_options["method"], re.IGNORECASE):
            new_row = np.sqrt(np.mean(np.square(np.array(data_row)), axis=0, dtype=np.float64)).tolist()
        elif re.search('rss', collapse_options["method"], re.IGNORECASE):
            new_row = np.sqrt(np.sum(np.square(np.array(data_row)), axis=0, dtype=np.float64)).tolist()
        elif re.search('mad', collapse_options["method"], re.IGNORECASE):
            new_row = mad(np.array(data_row), axis=0).tolist()
        new_row[independent_variable_selector]=independent_variable
        out_data.append(new_row)

    collapse_options["data"] = out_data

    if collapse_options["specific_descriptor"]:
        collapse_options["specific_descriptor"] = collapse_options["method"] + "_" + \
                                                  collapse_options["specific_descriptor"]
    resulting_model = AsciiDataTable(None, **collapse_options)
    return resulting_model
开发者ID:aricsanders,项目名称:pyMeasure,代码行数:54,代码来源:GeneralAnalysis.py


示例10: _estimate_scale

 def _estimate_scale(self, resid):
     """
     Estimates the scale based on the option provided to the fit method.
     """
     if isinstance(self.scale_est, str):
         if self.scale_est.lower() == 'mad':
             return scale.mad(resid)
         if self.scale_est.lower() == 'stand_mad':
             return scale.stand_mad(resid)
     elif isinstance(self.scale_est, scale.HuberScale):
         return scale.hubers_scale(self.df_resid, self.nobs, resid)
     else:
         return scale.scale_est(self, resid)**2
开发者ID:CRP,项目名称:statsmodels,代码行数:13,代码来源:robust_linear_model.py


示例11: remove_outliers

def remove_outliers(t, delta, mad_factor=3):
    """
    :param t: an instance of pd.Series
    :param delta: parameter for l1tf function
    """
    filtered_t = l1tf(t, delta)

    diff = t.values - np.asarray(filtered_t).squeeze()
    t = t.copy()
    t[np.abs(diff - np.median(diff)) > mad_factor * mad(diff)] = np.nan

    t = t.fillna(method='ffill').fillna(method='bfill')
    return t
开发者ID:kingease,项目名称:py-l1tf,代码行数:13,代码来源:pandas_wrapper.py


示例12: strip_outliers

def strip_outliers(original_signal, delta, mad_coef=3):
    """
    Based on l1 trend filtering, this function provides an endpoint
    """
    filtered_t = l1(original_signal, delta)

    diff = original_signal - filtered_t.squeeze()
    median_of_difference = np.median(diff)
    mad_of_difference = mad(diff)
    filtered_signal = original_signal.copy()
    threshold = mad_coef * mad_of_difference
    filtered_signal[np.abs(diff - median_of_difference) > threshold] = np.nan
    #filtered_signal = pd.Series(filtered_signal).fillna(method='ffill').fillna(method='bfill')

    return filtered_signal
开发者ID:bugra,项目名称:l1,代码行数:15,代码来源:tf.py


示例13: normalize_data

def normalize_data(data, out_file, mad=False,
                   mad_file=os.path.join('tables', 'full_mad_genes.tsv'),
                   output=True,
                   method='minmax'):
    """
    Filters unidentified genes and normalizes each input gene expression matrix

    Arguments:
    :param data: pandas DataFrame genes as rows and sample IDs as columns
    :param out_file: the file name to write normalized matrix
    :param mad: boolean indicating if MAD genes should be output to file
    :param mad_file: the file name to write mad genes
    :param method: the type of scaling to perform (defaults to minmax)

    Output:
    Writes normalized matrix (if output=True) and mad genes to file
    (if mad=True); returns the normalized matrix if output=False
    """

    # Drop all row names with unidentified gene
    data = data[-data.index.str.contains('?', regex=False)]

    # Sort data by gene name
    data = data.sort_index()

    # Zero-one normalize
    if method == 'minmax':
        min_max_scaler = preprocessing.MinMaxScaler()
        data_normalize = min_max_scaler.fit_transform(data.T)

    elif method == 'zscore':
        data_normalize = preprocessing.scale(data.T, axis=0)

    data_normalize = pd.DataFrame(data_normalize, index=data.columns,
                                  columns=data.index).T
    # Write to file
    if output:
        data_normalize.to_csv(out_file, sep='\t', header=True, index=True)
    else:
        return data_normalize

    # Write out MAD genes
    if mad:
        all_mad_genes = scale.mad(data_normalize, c=1, axis=1)
        all_mad_genes = pd.Series(all_mad_genes,
                                  index=data_normalize.index.values)
        all_mad_genes = all_mad_genes.sort_values(ascending=False)
        all_mad_genes.to_csv(mad_file, sep='\t', header=False)
开发者ID:greenelab,项目名称:nf1_inactivation,代码行数:48,代码来源:process_rnaseq.py


示例14: measure_one_background

def measure_one_background(image, center, aperRad, metric, apMethod='exact', bgMethod='circle'):
    """Class methods are similar to regular functions.

    Note:
        Do not include the `self` parameter in the ``Args`` section.

    Args:
        param1: The first parameter.
        param2: The second parameter.

    Returns:
        True if successful, False otherwise.

    """
    
    if np.ndim(aperRad) == 0:
        aperture  = CircularAperture(center, aperRad)
        aperture  = aperture.to_mask(method=apMethod)[0]    # list of ApertureMask objects (one for each position)
        aperture  = ~aperture.to_image(image).astype(bool) # inverse to keep 'outside' aperture
    else:
        innerRad, outerRad = aperRad
        
        innerAperture   = CircularAperture(center, innerRad)
        outerAperture   = CircularAperture(center, outerRad)
        
        inner_aper_mask = innerAperture.to_mask(method=method)[0]
        inner_aper_mask = inner_aper_mask.to_image(image.shape).astype(bool)
    
        outer_aper_mask = outerAperture.to_mask(method=method)[0]
        outer_aper_mask = outer_aper_mask.to_image(image.shape).astype(bool)     
        
        aperture        = (~inner_aper_mask)*outer_aper_mask
    
    if bgMethod == 'median':
        medFrame  = median(image[aperture])
        madFrame  = scale.mad(image[aperture])
        
        medianMask= abs(image - medFrame) < nSig*madFrame
        
        aperture  = medianMask*aperture
    
    if bgMethod == 'kde':
        kdeFrame = kde.KDEUnivariate(image[aperture].ravel())
        kdeFrame.fit()
        
        return kdeFrame.support[kdeFrame.density.argmax()]
    
    return metric(image[aperture])
开发者ID:exowanderer,项目名称:ExoplanetTSO,代码行数:48,代码来源:bak_auxiliary.py


示例15: calc_robust_median_diff

def calc_robust_median_diff(in4d):
    """Calculates the robust median fo slice to slice diffs"""
    img = ni.load(in4d)
    dat = img.get_data()
    shape = dat.shape
    tdat = dat.T
    tdat.shape = (shape[-1], np.prod(shape[:-1]))
    dat_diff = tdat[1:,:] - tdat[:-1,:]
    mad = scale.mad(dat_diff, axis=1)
    mad_std = (mad - mad.mean())/ mad.std()
    plt.plot(mad_std, 'ro-')
    plt.title('Robust Frame difference median')
    plt.grid()
    outfile = fname_presuffix(in4d, prefix='Robust_framediff_median',
                              suffix = '.png', use_ext=False)
    plt.savefig(outfile)
    print 'Saved ', outfile
    plt.close()
开发者ID:cindeem,项目名称:PetProcessing,代码行数:18,代码来源:qa.py


示例16: _run_interface

    def _run_interface(self, runtime):
        from scipy import ndimage as sim

        fmap_nii = nb.load(self.inputs.in_file)
        data = np.squeeze(fmap_nii.get_data().astype(np.float32))

        # Despike / denoise (no-mask)
        if self.inputs.despike:
            data = _despike2d(data, self.inputs.despike_threshold)

        mask = None
        if isdefined(self.inputs.in_mask):
            masknii = nb.load(self.inputs.in_mask)
            mask = masknii.get_data().astype(np.uint8)

            # Dilate mask
            if self.inputs.mask_erode > 0:
                struc = sim.iterate_structure(sim.generate_binary_structure(3, 2), 1)
                mask = sim.binary_erosion(
                    mask, struc,
                    iterations=self.inputs.mask_erode
                    ).astype(np.uint8)  # pylint: disable=no-member

        self._results['out_file'] = genfname(self.inputs.in_file, suffix='enh')
        datanii = nb.Nifti1Image(data, fmap_nii.affine, fmap_nii.header)

        if self.inputs.unwrap:
            data = _unwrap(data, self.inputs.in_magnitude, mask)
            self._results['out_unwrapped'] = genfname(self.inputs.in_file, suffix='unwrap')
            nb.Nifti1Image(data, fmap_nii.affine, fmap_nii.header).to_filename(
                self._results['out_unwrapped'])

        if not self.inputs.bspline_smooth:
            datanii.to_filename(self._results['out_file'])
            return runtime
        else:
            from fmriprep.utils import bspline as fbsp
            from statsmodels.robust.scale import mad

            # Fit BSplines (coarse)
            bspobj = fbsp.BSplineFieldmap(datanii, weights=mask,
                                          njobs=self.inputs.njobs)
            bspobj.fit()
            smoothed1 = bspobj.get_smoothed()

            # Manipulate the difference map
            diffmap = data - smoothed1.get_data()
            sderror = mad(diffmap[mask > 0])
            LOGGER.info('SD of error after B-Spline fitting is %f', sderror)
            errormask = np.zeros_like(diffmap)
            errormask[np.abs(diffmap) > (10 * sderror)] = 1
            errormask *= mask

            nslices = 0
            try:
                errorslice = np.squeeze(np.argwhere(errormask.sum(0).sum(0) > 0))
                nslices = errorslice[-1] - errorslice[0]
            except IndexError:  # mask is empty, do not refine
                pass

            if nslices > 1:
                diffmapmsk = mask[..., errorslice[0]:errorslice[-1]]
                diffmapnii = nb.Nifti1Image(
                    diffmap[..., errorslice[0]:errorslice[-1]] * diffmapmsk,
                    datanii.affine, datanii.header)

                bspobj2 = fbsp.BSplineFieldmap(diffmapnii, knots_zooms=[24., 24., 4.],
                                               njobs=self.inputs.njobs)
                bspobj2.fit()
                smoothed2 = bspobj2.get_smoothed().get_data()

                final = smoothed1.get_data().copy()
                final[..., errorslice[0]:errorslice[-1]] += smoothed2
            else:
                final = smoothed1.get_data()

            nb.Nifti1Image(final, datanii.affine, datanii.header).to_filename(
                self._results['out_file'])

        return runtime
开发者ID:rwblair,项目名称:preprocessing-workflow,代码行数:80,代码来源:fmap.py


示例17: rc

rc("font", **font)

fig_hist = plt.figure(figsize=(18, 12))

colours = ["#AE70ED", "#FFB60B", "#62A9FF", "#59DF00"]

##Sometimes if the table contains single sources, there is no SI fit, so column contains NaNs
## atpy reads as '--' so need to avoid these
sources_SIs = [source for source in sources if source.SI != "--"]
SIs = [float(source.SI) for source in sources_SIs]

##Plot all of the SIs together
##-----------------------------------------------------------------------------------------------------------------------
ax1 = fig_hist.add_subplot(221)
plot_by_kde(ax1, SIs, "k", 3.0, "All fits (%d sources)" % len(SIs), "-")
mad_all = mad(np.array(SIs))
med_all = np.median(np.array(SIs))
ax1.axvline(med_all, color="k", linestyle="--", linewidth=2.0, label="Median %.2f$\pm$%.2f" % (med_all, mad_all))

##Compare the good fits to the bad fits
##-----------------------------------------------------------------------------------------------------------------------
ax2 = fig_hist.add_subplot(222)
good_fit_SIs = [float(source.SI) for source in sources_SIs if float(source.low_resid) == 0]
bad_fit_SIs = [float(source.SI) for source in sources_SIs if float(source.low_resid) == 1]

plot_by_kde(ax2, good_fit_SIs, colours[0], 3.0, "$\chi^2_{red}<=2.0$\n(%d sources)" % len(good_fit_SIs), "-")
plot_by_kde(ax2, bad_fit_SIs, colours[3], 3.0, "$\chi^2_{red}>2.0$\n(%d sources)" % len(bad_fit_SIs), "--")

##Compare the matches with just one matched frequency to the base catalogue, to those
# with multiple frequencies matched
##-----------------------------------------------------------------------------------------------------------------------
开发者ID:tjgalvin,项目名称:PUMA,代码行数:31,代码来源:plot_table_stats.py


示例18: int

        if spline[0] == ul:

            rt_avg.append(int(spline[5]))

            if (len(rt_avg)) > 0:
                average = np.mean(rt_avg)
                corr_factor = int(spline[3]) / average
                break


    ################
    # remove outliars
    ################

    rt_mad = mad(retweet_counts)
    rt_median = np.median(retweet_counts)
    rt_top_thresh = round((rt_median + (50 * rt_mad)), 2)  # 5 is just an arbitrary number we choose

    print("MAD for rt is " + str(rt_mad))
    print("Median for rt is " + str(rt_median))
    print("Top threshold for rt is " + str(rt_top_thresh))

    rt_outliers = []
    retweets = []
    index = []

    for i, rc in enumerate(retweet_counts):

        if rc <= rt_top_thresh:
            retweets.append(rc)
开发者ID:cirnelle,项目名称:TwitterML,代码行数:30,代码来源:plot_retweet_growth.py


示例19: test_axisneg1

 def test_axisneg1(self):
     m = scale.mad(self.X, axis=-1)
     assert_equal(m.shape, (40,10))
开发者ID:ChadFulton,项目名称:statsmodels,代码行数:3,代码来源:test_scale.py


示例20: _prefilter_windows_gtest

 def _prefilter_windows_gtest(self, df):
     ''' This function filters the windows in a data frame by minimum
         expression based on a MAD cutoff and requires higher expression
         in the experiment libs than in the controls
     '''
     # remove windows where not all experiment libs show expression:
     #   expression = 1/size_factor ( = pseudocount)
     print("Removing windows where not all experiment libs show "
           "expression from DataFrame with {} rows...".format(len(df)),
           flush=True)
     t_start = time()
     for exp_lib in self._exp_lib_list:
         exp_lib_zero_count = 1/self._size_factors[exp_lib]
         df = df.loc[(df.loc[:, exp_lib] > exp_lib_zero_count), :]
     t_end = time()
     print("Removal took {} seconds. DataFrame contains now {} rows.".
           format((t_end-t_start), len(df)), flush=True)
     if df.empty:
         return df
     # minimum expression cutoff based on mean over experiment libraries
     print("Removing windows based on mad cutoff from DataFrame "
           "with {} rows...".format(len(df)), flush=True)
     t_start = time()
     median_abs_dev_from_zero = mad(df.loc[:, self._exp_lib_list].mean(
         axis=1), center=0.0)
     min_expr = (self._mad_multiplier * median_abs_dev_from_zero)
     print("Minimal window expression based on mean over RIP/CLIP "
           "libraries: {} (MAD from zero: {})".format(
               min_expr, median_abs_dev_from_zero), flush=True)
     df = df.loc[df.loc[:, self._exp_lib_list].mean(axis=1) >= min_expr, :]
     t_end = time()
     print("Removal took {} seconds. DataFrame contains now {} rows.".
           format((t_end-t_start), len(df)), flush=True)
     if df.empty:
         return df
     print("Removing windows where experiment expression is lower than "
           "control expression from DataFrame with {} rows...".format(
               len(df)), flush=True)
     t_start = time()
     if self._pairwise_replicates:
         # experiment expression must be larger than respective control
         # for each library pair
         for exp_lib, ctr_lib in zip(
                 self._exp_lib_list, self._ctr_lib_list):
             df = df.loc[(df.loc[:, exp_lib] > df.loc[:, ctr_lib]), :]
     else:
         # minimum experiment expression larger than maximum
         # control expression
         df = df.loc[df.loc[:, self._exp_lib_list].min(
             axis=1) > df.loc[:, self._ctr_lib_list].max(axis=1), :]
     t_end = time()
     print("Removal took {} seconds. DataFrame contains now {} rows.".
           format((t_end-t_start), len(df)), flush=True)
     if df.empty:
         return df
     # minimum fold change
     print("Removing windows based on minimum fold change from DataFrame "
           "with {} rows...".format(len(df)), flush=True)
     t_start = time()
     df = df.query('fold_change >= @self._fc_cutoff')
     t_end = time()
     print("Removal took {} seconds. DataFrame contains now {} rows.".
           format((t_end-t_start), len(df)), flush=True)
     return df
开发者ID:tbischler,项目名称:PEAKachu,代码行数:64,代码来源:window.py



注:本文中的statsmodels.robust.scale.mad函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python predstd.wls_prediction_std函数代码示例发布时间:2022-05-27
下一篇:
Python recursive_ls.RecursiveLS类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap