本文整理汇总了Python中statsmodels.robust.scale.mad函数的典型用法代码示例。如果您正苦于以下问题:Python mad函数的具体用法?Python mad怎么用?Python mad使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了mad函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: _estimate_scale
def _estimate_scale(self, resid):
"""
Estimates the scale based on the option provided to the fit method.
"""
if isinstance(self.scale_est, str):
if self.scale_est.lower() == "mad":
return scale.mad(resid, center=0)
if self.scale_est.lower() == "stand_mad":
return scale.mad(resid)
else:
raise ValueError("Option %s for scale_est not understood" % self.scale_est)
elif isinstance(self.scale_est, scale.HuberScale):
return self.scale_est(self.df_resid, self.nobs, resid)
else:
return scale.scale_est(self, resid) ** 2
开发者ID:eph,项目名称:statsmodels,代码行数:15,代码来源:robust_linear_model.py
示例2: _prefilter_windows_deseq
def _prefilter_windows_deseq(self, df):
print("Removing windows where not all experiment libs show "
"expression from DataFrame with {} rows...".format(len(df)),
flush=True)
t_start = time()
for exp_lib in self._exp_lib_list:
exp_lib_zero_count = 0.0
df = df.loc[(df.loc[:, exp_lib] > exp_lib_zero_count), :]
t_end = time()
print("Removal took {} seconds. DataFrame contains now {} rows.".
format((t_end-t_start), len(df)), flush=True)
if df.empty:
return df
initial_window_df = df.copy()
# normalize counts on initial windows
initial_window_df[self._lib_names_list] = initial_window_df[
self._lib_names_list].div(self._size_factors, axis='columns')
# minimum expression cutoff based on mean over experiment libraries
print("Removing windows based on mad cutoff from DataFrame "
"with {} rows...".format(len(df)), flush=True)
t_start = time()
median_abs_dev_from_zero = mad(initial_window_df.loc[
:, self._exp_lib_list].mean(axis=1), center=0.0)
min_expr = (self._mad_multiplier * median_abs_dev_from_zero)
print("Minimal window expression based on mean over RIP/CLIP "
"libraries: {} (MAD from zero: {})".format(
min_expr, median_abs_dev_from_zero), flush=True)
df = df.loc[initial_window_df.loc[:, self._exp_lib_list].mean(
axis=1) >= min_expr, :]
t_end = time()
print("Removal took {} seconds. DataFrame contains now {} rows.".
format((t_end-t_start), len(df)), flush=True)
return df
开发者ID:tbischler,项目名称:PEAKachu,代码行数:33,代码来源:window.py
示例3: mad_outliers
def mad_outliers(data, genes, threshold, percentile=95, as_json=True):
res = dr.get_dataset_ensembl_info()
outliers_id = []
if as_json:
yield ("{\"outliers\":[")
for g in genes:
row_values = data.loc[g, :]
cut_row_values = row_values
med = cut_row_values.median()
row_mad = mad(cut_row_values)
if row_mad != 0.0:
filtered = (cut_row_values - med) / row_mad
support = len(filtered[filtered > threshold])
if scoreatpercentile(filtered, 95) > threshold:
info = [gene for gene in res if gene.ensemblgeneid == g][0]
formatted_info = {"id": g, "name": info.genename, "type": info.genetype, "samples": str(support),
"distance": "NA"}
jinfo = json.dumps(formatted_info)
jinfo += ","
outliers_id.append(g)
print("outlier found :" + g)
if as_json:
yield (jinfo)
else:
yield (formatted_info)
if len(outliers_id) > 0:
pr.save_outliers(1, outliers_id)
if as_json:
yield ("]}")
开发者ID:armell,项目名称:RNASEqTool,代码行数:33,代码来源:outliers.py
示例4: reorient_and_discard_non_steady
def reorient_and_discard_non_steady(in_file, float32=False):
import nibabel as nb
import os
import numpy as np
import nibabel as nb
from statsmodels.robust.scale import mad
_, outfile = os.path.split(in_file)
nii = nb.as_closest_canonical(nb.load(in_file))
in_data = nii.get_data()
# downcast to reduce space consumption and improve performance
if float32 and np.dtype(in_data.dtype).itemsize > 4:
in_data = in_data.astype(np.float32)
data = in_data[:, :, :, :50]
timeseries = data.max(axis=0).max(axis=0).max(axis=0)
outlier_timecourse = (timeseries - np.median(timeseries)) / mad(
timeseries)
exclude_index = 0
for i in range(10):
if outlier_timecourse[i] > 10:
exclude_index += 1
else:
break
nb.Nifti1Image(in_data[:, :, :, exclude_index:], nii.affine, nii.header).to_filename(outfile)
nii.uncache()
return exclude_index, os.path.abspath(outfile)
开发者ID:yingqiuz,项目名称:mriqc,代码行数:30,代码来源:misc.py
示例5: artifact_mask
def artifact_mask(imdata, airdata, distance, zscore=10.):
"""Computes a mask of artifacts found in the air region"""
from statsmodels.robust.scale import mad
if not np.issubdtype(airdata.dtype, np.integer):
airdata[airdata < .95] = 0
airdata[airdata > 0.] = 1
bg_img = imdata * airdata
if np.sum((bg_img > 0).astype(np.uint8)) < 100:
return np.zeros_like(airdata)
# Find the background threshold (the most frequently occurring value
# excluding 0)
bg_location = np.median(bg_img[bg_img > 0])
bg_spread = mad(bg_img[bg_img > 0])
bg_img[bg_img > 0] -= bg_location
bg_img[bg_img > 0] /= bg_spread
# Apply this threshold to the background voxels to identify voxels
# contributing artifacts.
qi1_img = np.zeros_like(bg_img)
qi1_img[bg_img > zscore] = 1
qi1_img[distance < .10] = 0
# Create a structural element to be used in an opening operation.
struc = nd.generate_binary_structure(3, 1)
qi1_img = nd.binary_opening(qi1_img, struc).astype(np.uint8)
qi1_img[airdata <= 0] = 0
return qi1_img
开发者ID:oesteban,项目名称:mriqc,代码行数:31,代码来源:anatomical.py
示例6: _filter_peaks_without_replicates
def _filter_peaks_without_replicates(self, df):
# calculate mad for original data frame
median_abs_dev_from_zero = mad(df.loc[:, self._exp_lib_list].mean(
axis=1), center=0.0)
# minimum expression cutoff based on mean over experiment libraries
print("Removing peaks based on mad cutoff from DataFrame "
"with {} rows...".format(len(df)), flush=True)
t_start = time()
min_expr = (self._mad_multiplier * median_abs_dev_from_zero)
print("Minimal peak expression based on mean over RIP/CLIP "
"libraries:" "{} (MAD from zero: {})".format(
min_expr, median_abs_dev_from_zero), flush=True)
df = df.loc[df.loc[:, self._exp_lib_list].mean(axis=1) >= min_expr, :]
t_end = time()
print("Removal took {} seconds. DataFrame contains now {} rows.".
format((t_end-t_start), len(df)), flush=True)
if df.empty:
return df
# minimum fold change
print("Removing windows based on minimum fold change from DataFrame "
"with {} rows...".format(len(df)), flush=True)
t_start = time()
df = df.query('fold_change >= @self._fc_cutoff')
t_end = time()
print("Removal took {} seconds. DataFrame contains now {} rows.".
format((t_end-t_start), len(df)), flush=True)
return df
开发者ID:tbischler,项目名称:PEAKachu,代码行数:27,代码来源:adaptive.py
示例7: clipOutlier2D
def clipOutlier2D(arr2D, nSig=10):
arr2D = arr2D.copy()
medArr2D = median(arr2D,axis=0)
sclArr2D = np.sqrt(((scale.mad(arr2D)**2.).sum()))
outliers = abs(arr2D - medArr2D) > nSig*sclArr2D
inliers = abs(arr2D - medArr2D) <= nSig*sclArr2D
arr2D[outliers] = median(arr2D[inliers],axis=0)
return arr2D
开发者ID:exowanderer,项目名称:ExoplanetTSO,代码行数:8,代码来源:Exoplanet_TSO_-_Photometric_Extraction_Pipeline.py
示例8: cut_modified
def cut_modified(x,q, use_mad_for_std=True):
try:
quantiles_in_sigmas = np.asarray(map(normal.ppf, q))
x_clean = x.dropna()
mean = np.mean(x_clean)
std = np.std(x_clean) if not use_mad_for_std else mad(x_clean)
bins = mean + quantiles_in_sigmas*std
bins = np.sort(np.append(bins, (x_clean.min()-1E-6, x_clean.max()+1E-6)))
return pd.cut(x, bins, labels=range(len(bins)-1))
except ValueError as e:
return [pd.np.NaN]*len(x)
开发者ID:shirinnj,项目名称:ML-Project,代码行数:11,代码来源:new_partitioning.py
示例9: independent_variable_model_collapse
def independent_variable_model_collapse(model,independent_column_name="Frequency", **options):
"""Returns a model with a single set of independent variables. Default is to average values together
but geometric mean, std, variance, rss, mad and median are options.
Geometric means of odd number of negative values fails"""
if isinstance(model,pandas.DataFrame):
model_1 = DataFrame_to_AsciiDataTable(model)
defaults = {"method": "mean"}
# load other options from model
for option, value in model.options.items():
if not re.search('begin_line|end_line', option):
defaults[option] = value
for element in model.elements:
if model.__dict__[element]:
if re.search("meta", element, re.IGNORECASE):
defaults["metadata"] = model.metadata.copy()
else:
defaults[element] = model.__dict__[element][:]
# We need to preserve the frequency column some how
collapse_options = {}
for key, value in defaults.items():
collapse_options[key] = value
for key, value in options.items():
collapse_options[key] = value
unique_independent_variable_list = sorted(list(set(model[independent_column_name])))
independent_variable_selector = model.column_names.index(independent_column_name)
out_data = []
for index, independent_variable in enumerate(unique_independent_variable_list):
data_row = [x for x in model.data[:] if x[independent_variable_selector] == independent_variable]
if re.search('mean|av', collapse_options["method"], re.IGNORECASE):
new_row = np.mean(np.array(data_row), axis=0).tolist()
elif re.search('median', collapse_options["method"], re.IGNORECASE):
new_row = np.median(np.array(data_row), axis=0).tolist()
elif re.search('geometric', collapse_options["method"], re.IGNORECASE):
new_row = gmean(np.array(data_row), axis=0).tolist()
elif re.search('st', collapse_options["method"], re.IGNORECASE):
new_row = np.std(np.array(data_row), axis=0).tolist()
elif re.search('var', collapse_options["method"], re.IGNORECASE):
new_row = np.var(np.array(data_row), axis=0, dtype=np.float64).tolist()
elif re.search('rms', collapse_options["method"], re.IGNORECASE):
new_row = np.sqrt(np.mean(np.square(np.array(data_row)), axis=0, dtype=np.float64)).tolist()
elif re.search('rss', collapse_options["method"], re.IGNORECASE):
new_row = np.sqrt(np.sum(np.square(np.array(data_row)), axis=0, dtype=np.float64)).tolist()
elif re.search('mad', collapse_options["method"], re.IGNORECASE):
new_row = mad(np.array(data_row), axis=0).tolist()
new_row[independent_variable_selector]=independent_variable
out_data.append(new_row)
collapse_options["data"] = out_data
if collapse_options["specific_descriptor"]:
collapse_options["specific_descriptor"] = collapse_options["method"] + "_" + \
collapse_options["specific_descriptor"]
resulting_model = AsciiDataTable(None, **collapse_options)
return resulting_model
开发者ID:aricsanders,项目名称:pyMeasure,代码行数:54,代码来源:GeneralAnalysis.py
示例10: _estimate_scale
def _estimate_scale(self, resid):
"""
Estimates the scale based on the option provided to the fit method.
"""
if isinstance(self.scale_est, str):
if self.scale_est.lower() == 'mad':
return scale.mad(resid)
if self.scale_est.lower() == 'stand_mad':
return scale.stand_mad(resid)
elif isinstance(self.scale_est, scale.HuberScale):
return scale.hubers_scale(self.df_resid, self.nobs, resid)
else:
return scale.scale_est(self, resid)**2
开发者ID:CRP,项目名称:statsmodels,代码行数:13,代码来源:robust_linear_model.py
示例11: remove_outliers
def remove_outliers(t, delta, mad_factor=3):
"""
:param t: an instance of pd.Series
:param delta: parameter for l1tf function
"""
filtered_t = l1tf(t, delta)
diff = t.values - np.asarray(filtered_t).squeeze()
t = t.copy()
t[np.abs(diff - np.median(diff)) > mad_factor * mad(diff)] = np.nan
t = t.fillna(method='ffill').fillna(method='bfill')
return t
开发者ID:kingease,项目名称:py-l1tf,代码行数:13,代码来源:pandas_wrapper.py
示例12: strip_outliers
def strip_outliers(original_signal, delta, mad_coef=3):
"""
Based on l1 trend filtering, this function provides an endpoint
"""
filtered_t = l1(original_signal, delta)
diff = original_signal - filtered_t.squeeze()
median_of_difference = np.median(diff)
mad_of_difference = mad(diff)
filtered_signal = original_signal.copy()
threshold = mad_coef * mad_of_difference
filtered_signal[np.abs(diff - median_of_difference) > threshold] = np.nan
#filtered_signal = pd.Series(filtered_signal).fillna(method='ffill').fillna(method='bfill')
return filtered_signal
开发者ID:bugra,项目名称:l1,代码行数:15,代码来源:tf.py
示例13: normalize_data
def normalize_data(data, out_file, mad=False,
mad_file=os.path.join('tables', 'full_mad_genes.tsv'),
output=True,
method='minmax'):
"""
Filters unidentified genes and normalizes each input gene expression matrix
Arguments:
:param data: pandas DataFrame genes as rows and sample IDs as columns
:param out_file: the file name to write normalized matrix
:param mad: boolean indicating if MAD genes should be output to file
:param mad_file: the file name to write mad genes
:param method: the type of scaling to perform (defaults to minmax)
Output:
Writes normalized matrix (if output=True) and mad genes to file
(if mad=True); returns the normalized matrix if output=False
"""
# Drop all row names with unidentified gene
data = data[-data.index.str.contains('?', regex=False)]
# Sort data by gene name
data = data.sort_index()
# Zero-one normalize
if method == 'minmax':
min_max_scaler = preprocessing.MinMaxScaler()
data_normalize = min_max_scaler.fit_transform(data.T)
elif method == 'zscore':
data_normalize = preprocessing.scale(data.T, axis=0)
data_normalize = pd.DataFrame(data_normalize, index=data.columns,
columns=data.index).T
# Write to file
if output:
data_normalize.to_csv(out_file, sep='\t', header=True, index=True)
else:
return data_normalize
# Write out MAD genes
if mad:
all_mad_genes = scale.mad(data_normalize, c=1, axis=1)
all_mad_genes = pd.Series(all_mad_genes,
index=data_normalize.index.values)
all_mad_genes = all_mad_genes.sort_values(ascending=False)
all_mad_genes.to_csv(mad_file, sep='\t', header=False)
开发者ID:greenelab,项目名称:nf1_inactivation,代码行数:48,代码来源:process_rnaseq.py
示例14: measure_one_background
def measure_one_background(image, center, aperRad, metric, apMethod='exact', bgMethod='circle'):
"""Class methods are similar to regular functions.
Note:
Do not include the `self` parameter in the ``Args`` section.
Args:
param1: The first parameter.
param2: The second parameter.
Returns:
True if successful, False otherwise.
"""
if np.ndim(aperRad) == 0:
aperture = CircularAperture(center, aperRad)
aperture = aperture.to_mask(method=apMethod)[0] # list of ApertureMask objects (one for each position)
aperture = ~aperture.to_image(image).astype(bool) # inverse to keep 'outside' aperture
else:
innerRad, outerRad = aperRad
innerAperture = CircularAperture(center, innerRad)
outerAperture = CircularAperture(center, outerRad)
inner_aper_mask = innerAperture.to_mask(method=method)[0]
inner_aper_mask = inner_aper_mask.to_image(image.shape).astype(bool)
outer_aper_mask = outerAperture.to_mask(method=method)[0]
outer_aper_mask = outer_aper_mask.to_image(image.shape).astype(bool)
aperture = (~inner_aper_mask)*outer_aper_mask
if bgMethod == 'median':
medFrame = median(image[aperture])
madFrame = scale.mad(image[aperture])
medianMask= abs(image - medFrame) < nSig*madFrame
aperture = medianMask*aperture
if bgMethod == 'kde':
kdeFrame = kde.KDEUnivariate(image[aperture].ravel())
kdeFrame.fit()
return kdeFrame.support[kdeFrame.density.argmax()]
return metric(image[aperture])
开发者ID:exowanderer,项目名称:ExoplanetTSO,代码行数:48,代码来源:bak_auxiliary.py
示例15: calc_robust_median_diff
def calc_robust_median_diff(in4d):
"""Calculates the robust median fo slice to slice diffs"""
img = ni.load(in4d)
dat = img.get_data()
shape = dat.shape
tdat = dat.T
tdat.shape = (shape[-1], np.prod(shape[:-1]))
dat_diff = tdat[1:,:] - tdat[:-1,:]
mad = scale.mad(dat_diff, axis=1)
mad_std = (mad - mad.mean())/ mad.std()
plt.plot(mad_std, 'ro-')
plt.title('Robust Frame difference median')
plt.grid()
outfile = fname_presuffix(in4d, prefix='Robust_framediff_median',
suffix = '.png', use_ext=False)
plt.savefig(outfile)
print 'Saved ', outfile
plt.close()
开发者ID:cindeem,项目名称:PetProcessing,代码行数:18,代码来源:qa.py
示例16: _run_interface
def _run_interface(self, runtime):
from scipy import ndimage as sim
fmap_nii = nb.load(self.inputs.in_file)
data = np.squeeze(fmap_nii.get_data().astype(np.float32))
# Despike / denoise (no-mask)
if self.inputs.despike:
data = _despike2d(data, self.inputs.despike_threshold)
mask = None
if isdefined(self.inputs.in_mask):
masknii = nb.load(self.inputs.in_mask)
mask = masknii.get_data().astype(np.uint8)
# Dilate mask
if self.inputs.mask_erode > 0:
struc = sim.iterate_structure(sim.generate_binary_structure(3, 2), 1)
mask = sim.binary_erosion(
mask, struc,
iterations=self.inputs.mask_erode
).astype(np.uint8) # pylint: disable=no-member
self._results['out_file'] = genfname(self.inputs.in_file, suffix='enh')
datanii = nb.Nifti1Image(data, fmap_nii.affine, fmap_nii.header)
if self.inputs.unwrap:
data = _unwrap(data, self.inputs.in_magnitude, mask)
self._results['out_unwrapped'] = genfname(self.inputs.in_file, suffix='unwrap')
nb.Nifti1Image(data, fmap_nii.affine, fmap_nii.header).to_filename(
self._results['out_unwrapped'])
if not self.inputs.bspline_smooth:
datanii.to_filename(self._results['out_file'])
return runtime
else:
from fmriprep.utils import bspline as fbsp
from statsmodels.robust.scale import mad
# Fit BSplines (coarse)
bspobj = fbsp.BSplineFieldmap(datanii, weights=mask,
njobs=self.inputs.njobs)
bspobj.fit()
smoothed1 = bspobj.get_smoothed()
# Manipulate the difference map
diffmap = data - smoothed1.get_data()
sderror = mad(diffmap[mask > 0])
LOGGER.info('SD of error after B-Spline fitting is %f', sderror)
errormask = np.zeros_like(diffmap)
errormask[np.abs(diffmap) > (10 * sderror)] = 1
errormask *= mask
nslices = 0
try:
errorslice = np.squeeze(np.argwhere(errormask.sum(0).sum(0) > 0))
nslices = errorslice[-1] - errorslice[0]
except IndexError: # mask is empty, do not refine
pass
if nslices > 1:
diffmapmsk = mask[..., errorslice[0]:errorslice[-1]]
diffmapnii = nb.Nifti1Image(
diffmap[..., errorslice[0]:errorslice[-1]] * diffmapmsk,
datanii.affine, datanii.header)
bspobj2 = fbsp.BSplineFieldmap(diffmapnii, knots_zooms=[24., 24., 4.],
njobs=self.inputs.njobs)
bspobj2.fit()
smoothed2 = bspobj2.get_smoothed().get_data()
final = smoothed1.get_data().copy()
final[..., errorslice[0]:errorslice[-1]] += smoothed2
else:
final = smoothed1.get_data()
nb.Nifti1Image(final, datanii.affine, datanii.header).to_filename(
self._results['out_file'])
return runtime
开发者ID:rwblair,项目名称:preprocessing-workflow,代码行数:80,代码来源:fmap.py
示例17: rc
rc("font", **font)
fig_hist = plt.figure(figsize=(18, 12))
colours = ["#AE70ED", "#FFB60B", "#62A9FF", "#59DF00"]
##Sometimes if the table contains single sources, there is no SI fit, so column contains NaNs
## atpy reads as '--' so need to avoid these
sources_SIs = [source for source in sources if source.SI != "--"]
SIs = [float(source.SI) for source in sources_SIs]
##Plot all of the SIs together
##-----------------------------------------------------------------------------------------------------------------------
ax1 = fig_hist.add_subplot(221)
plot_by_kde(ax1, SIs, "k", 3.0, "All fits (%d sources)" % len(SIs), "-")
mad_all = mad(np.array(SIs))
med_all = np.median(np.array(SIs))
ax1.axvline(med_all, color="k", linestyle="--", linewidth=2.0, label="Median %.2f$\pm$%.2f" % (med_all, mad_all))
##Compare the good fits to the bad fits
##-----------------------------------------------------------------------------------------------------------------------
ax2 = fig_hist.add_subplot(222)
good_fit_SIs = [float(source.SI) for source in sources_SIs if float(source.low_resid) == 0]
bad_fit_SIs = [float(source.SI) for source in sources_SIs if float(source.low_resid) == 1]
plot_by_kde(ax2, good_fit_SIs, colours[0], 3.0, "$\chi^2_{red}<=2.0$\n(%d sources)" % len(good_fit_SIs), "-")
plot_by_kde(ax2, bad_fit_SIs, colours[3], 3.0, "$\chi^2_{red}>2.0$\n(%d sources)" % len(bad_fit_SIs), "--")
##Compare the matches with just one matched frequency to the base catalogue, to those
# with multiple frequencies matched
##-----------------------------------------------------------------------------------------------------------------------
开发者ID:tjgalvin,项目名称:PUMA,代码行数:31,代码来源:plot_table_stats.py
示例18: int
if spline[0] == ul:
rt_avg.append(int(spline[5]))
if (len(rt_avg)) > 0:
average = np.mean(rt_avg)
corr_factor = int(spline[3]) / average
break
################
# remove outliars
################
rt_mad = mad(retweet_counts)
rt_median = np.median(retweet_counts)
rt_top_thresh = round((rt_median + (50 * rt_mad)), 2) # 5 is just an arbitrary number we choose
print("MAD for rt is " + str(rt_mad))
print("Median for rt is " + str(rt_median))
print("Top threshold for rt is " + str(rt_top_thresh))
rt_outliers = []
retweets = []
index = []
for i, rc in enumerate(retweet_counts):
if rc <= rt_top_thresh:
retweets.append(rc)
开发者ID:cirnelle,项目名称:TwitterML,代码行数:30,代码来源:plot_retweet_growth.py
示例19: test_axisneg1
def test_axisneg1(self):
m = scale.mad(self.X, axis=-1)
assert_equal(m.shape, (40,10))
开发者ID:ChadFulton,项目名称:statsmodels,代码行数:3,代码来源:test_scale.py
示例20: _prefilter_windows_gtest
def _prefilter_windows_gtest(self, df):
''' This function filters the windows in a data frame by minimum
expression based on a MAD cutoff and requires higher expression
in the experiment libs than in the controls
'''
# remove windows where not all experiment libs show expression:
# expression = 1/size_factor ( = pseudocount)
print("Removing windows where not all experiment libs show "
"expression from DataFrame with {} rows...".format(len(df)),
flush=True)
t_start = time()
for exp_lib in self._exp_lib_list:
exp_lib_zero_count = 1/self._size_factors[exp_lib]
df = df.loc[(df.loc[:, exp_lib] > exp_lib_zero_count), :]
t_end = time()
print("Removal took {} seconds. DataFrame contains now {} rows.".
format((t_end-t_start), len(df)), flush=True)
if df.empty:
return df
# minimum expression cutoff based on mean over experiment libraries
print("Removing windows based on mad cutoff from DataFrame "
"with {} rows...".format(len(df)), flush=True)
t_start = time()
median_abs_dev_from_zero = mad(df.loc[:, self._exp_lib_list].mean(
axis=1), center=0.0)
min_expr = (self._mad_multiplier * median_abs_dev_from_zero)
print("Minimal window expression based on mean over RIP/CLIP "
"libraries: {} (MAD from zero: {})".format(
min_expr, median_abs_dev_from_zero), flush=True)
df = df.loc[df.loc[:, self._exp_lib_list].mean(axis=1) >= min_expr, :]
t_end = time()
print("Removal took {} seconds. DataFrame contains now {} rows.".
format((t_end-t_start), len(df)), flush=True)
if df.empty:
return df
print("Removing windows where experiment expression is lower than "
"control expression from DataFrame with {} rows...".format(
len(df)), flush=True)
t_start = time()
if self._pairwise_replicates:
# experiment expression must be larger than respective control
# for each library pair
for exp_lib, ctr_lib in zip(
self._exp_lib_list, self._ctr_lib_list):
df = df.loc[(df.loc[:, exp_lib] > df.loc[:, ctr_lib]), :]
else:
# minimum experiment expression larger than maximum
# control expression
df = df.loc[df.loc[:, self._exp_lib_list].min(
axis=1) > df.loc[:, self._ctr_lib_list].max(axis=1), :]
t_end = time()
print("Removal took {} seconds. DataFrame contains now {} rows.".
format((t_end-t_start), len(df)), flush=True)
if df.empty:
return df
# minimum fold change
print("Removing windows based on minimum fold change from DataFrame "
"with {} rows...".format(len(df)), flush=True)
t_start = time()
df = df.query('fold_change >= @self._fc_cutoff')
t_end = time()
print("Removal took {} seconds. DataFrame contains now {} rows.".
format((t_end-t_start), len(df)), flush=True)
return df
开发者ID:tbischler,项目名称:PEAKachu,代码行数:64,代码来源:window.py
注:本文中的statsmodels.robust.scale.mad函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论