本文整理汇总了Python中pytadbit.utils.extraviews.tadbit_savefig函数的典型用法代码示例。如果您正苦于以下问题:Python tadbit_savefig函数的具体用法?Python tadbit_savefig怎么用?Python tadbit_savefig使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了tadbit_savefig函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: do_3d_plot
def do_3d_plot(nam, outfile, size, count, minmax, sigma=0, log=False):
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(1, 1, 1, projection='3d')
beg = -size / 2
end = size / 2
X = np.arange(beg, end, 1)
Y = np.arange(beg, end, 1)
X, Y = np.meshgrid(X, Y)
Z = np.array([np.array([float(i) for i in l.split()]) for l in open(nam) if not l.startswith('#')])
plt.title(nam + '\nMean: %.3f, median: %.3f, standard-deviation: %.3f (N=%d)' % (np.mean(Z), np.median(Z), np.std(Z), count))
if sigma:
Z = ndimage.gaussian_filter(Z, sigma=sigma, order=0)
if log:
Z = np.log(Z)
zspan = minmax if minmax else np.max(np.abs(Z))
zmax = zspan
zmin = -zspan
else:
zspan = minmax if minmax else np.max(np.abs(Z - 1))
zmin = -zspan + 1
zmax = zspan + 1
cmap = 'coolwarm' # 'coolwarm'
_ = ax.contourf(X, Y, Z, zdir='z', offset=zmin,
cmap=cmap, vmin=zmin, vmax=zmax)
surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cmap,
linewidth=0, antialiased=True, alpha=1,
vmin=zmin, vmax=zmax, shade=True)
ax.set_zlim3d(zmin, zmax)
ax.view_init(elev=15, azim=25)
cb = fig.colorbar(surf, shrink=0.5, aspect=20)
cb.set_label('%sverage normalized interactions%s' %
('Log a' if log else 'A',
'\nSmoothed with $\sigma=%s$' % sigma))
tadbit_savefig(outfile)
开发者ID:3DGenomes,项目名称:TADbit,代码行数:34,代码来源:average_submatrix.py
示例2: correlate_matrices
def correlate_matrices(hic_data1, hic_data2, max_dist=10, intra=False,
savefig=None, show=False, savedata=None):
"""
Compare the iteractions of two Hi-C matrices at a given distance,
with spearman rank correlation
:param hic_data1: Hi-C-data object
:param hic_data2: Hi-C-data object
:param 1 resolution: to be used for scaling the plot
:param 10 max_dist: maximum distance from diagonal (e.g. 10 mean we will
not look further than 10 times the resolution)
:param None savefig: path to save the plot
:param False intra: only takes into account intra-chromosomal contacts
:param False show: displays the plot
:returns: list of correlations and list of genomic distances
"""
corr = []
dist = []
if (intra and hic_data1.sections and hic_data2.sections and
hic_data1.sections == hic_data2.sections):
for i in xrange(1, max_dist + 1):
diag1 = []
diag2 = []
for crm in hic_data1.section_pos:
for j in xrange(hic_data1.section_pos[crm][0],
hic_data1.section_pos[crm][1] - i):
diag1.append(hic_data1[j, i + j])
diag2.append(hic_data2[j, i + j])
corr.append(spearmanr(diag1, diag2)[0])
dist.append(i)
else:
if intra:
warn('WARNING: hic_dta does not contain chromosome coordinates, ' +
'intra set to False')
for i in xrange(1, max_dist + 1):
diag1 = []
diag2 = []
for j in xrange(len(hic_data1) - i):
diag1.append(hic_data1[j, i + j])
diag2.append(hic_data2[j, i + j])
corr.append(spearmanr(diag1, diag2)[0])
dist.append(i)
if show or savefig:
plt.plot(dist, corr, color='orange', linewidth=3, alpha=.8)
plt.xlabel('Genomic distance in bins')
plt.ylabel('Spearman rank correlation')
plt.xlim((0, dist[-1]))
if savefig:
tadbit_savefig(savefig)
if show:
plt.show()
plt.close('all')
if savedata:
out = open(savedata, 'w')
out.write('# genomic distance\tSpearman rank correlation\n')
for i in xrange(len(corr)):
out.write('%s\t%s\n' % (dist[i], corr[i]))
out.close()
return corr, dist
开发者ID:kipal1988,项目名称:tadbit,代码行数:60,代码来源:analyze.py
示例3: objective_function
def objective_function(self, log=False, smooth=True,
axe=None, savefig=None):
"""
This function plots the objective function value per each Monte-Carlo
step.
:param False log: log plot
:param True smooth: curve smoothing
:param None axe: a matplotlib.axes.Axes object to define the plot
appearance
:param None savefig: path to a file where to save the image generated;
if None, the image will be shown using matplotlib GUI (the extension
of the file name will determine the desired format).
"""
show = False
if not axe:
fig = plt.figure(figsize=(7, 7))
axe = fig.add_subplot(111)
show = True
axe.patch.set_facecolor('lightgrey')
axe.patch.set_alpha(0.4)
axe.grid(ls='-', color='w', lw=1.5, alpha=0.6, which='major')
axe.grid(ls='-', color='w', lw=1, alpha=0.3, which='minor')
axe.set_axisbelow(True)
axe.minorticks_on() # always on, not only for log
# remove tick marks
axe.tick_params(axis='both', direction='out', top=False,
right=False, left=False, bottom=False)
axe.tick_params(axis='both', direction='out', top=False,
right=False, left=False, bottom=False,
which='minor')
else:
fig = axe.get_figure()
# text
plt.xlabel('Iteration number')
plt.ylabel('IMP Objective Function Value')
plt.title('Model ' + str(self['rand_init']))
# smooth
nrjz = self['log_objfun'][1:]
if smooth:
xnew = linspace(0, len(nrjz), 10000)
nrjz_smooth = spline(range(len(nrjz)), nrjz, xnew,
order=3)
axe.plot(xnew, nrjz_smooth, color='darkred')
else:
axe.plot(nrjz, color='darkred')
# plot
axe.plot(nrjz, color='darkred', marker='o', alpha=.5, ms=4, ls='None')
# log
if log:
axe.set_yscale('log')
if savefig:
tadbit_savefig(savefig)
elif show:
plt.show()
开发者ID:MarcoDiS,项目名称:TADbit,代码行数:56,代码来源:impmodel.py
示例4: plot_iterative_mapping
def plot_iterative_mapping(fnam1, fnam2, total_reads=None, axe=None, savefig=None):
"""
:param fnam: input file name
:param total_reads: total number of reads in the initial FASTQ file
:param None axe: a matplotlib.axes.Axes object to define the plot
appearance
:param None savefig: path to a file where to save the image generated;
if None, the image will be shown using matplotlib GUI (the extension
of the file name will determine the desired format).
:returns: a dictionary with the number of reads per mapped length
"""
count_by_len = {}
total_reads = total_reads or 1
if not axe:
fig=plt.figure()
_ = fig.add_subplot(111)
colors = ['olive', 'darkcyan']
for i, fnam in enumerate([fnam1, fnam2]):
fhandler = open(fnam)
line = fhandler.next()
while line.startswith('#'):
line = fhandler.next()
try:
count_by_len[i] = {}
while True:
_, length, _, _ = line.rsplit('\t', 3)
try:
count_by_len[i][int(length)] += 1
except KeyError:
count_by_len[i][int(length)] = 1
line = fhandler.next()
except StopIteration:
pass
fhandler.close()
lengths = sorted(count_by_len[i].keys())
for k in lengths[::-1]:
count_by_len[i][k] += sum([count_by_len[i][j]
for j in lengths if j < k])
plt.plot(lengths, [float(count_by_len[i][l]) / total_reads
for l in lengths],
label='read' + str(i + 1), linewidth=2, color=colors[i])
plt.xlabel('read length (bp)')
if total_reads != 1:
plt.ylabel('Proportion of mapped reads')
else:
plt.ylabel('Number of mapped reads')
plt.legend(loc=4)
if savefig:
tadbit_savefig(savefig)
elif not axe:
plt.show()
return count_by_len
开发者ID:Tong-Chen,项目名称:tadbit,代码行数:52,代码来源:analyze.py
示例5: plot_genomic_distribution
def plot_genomic_distribution(fnam, first_read=True, resolution=10000,
genome_seq=None, axe=None, ylim=None, savefig=None):
"""
:param fnam: input file name
:param True first_read: map first read.
:param 100 resolution: group reads that are closer than this resolution
parameter
:param None axe: a matplotlib.axes.Axes object to define the plot
appearance
:param None savefig: path to a file where to save the image generated;
if None, the image will be shown using matplotlib GUI (the extension
of the file name will determine the desired format).
"""
distr = {}
idx1, idx2 = (1, 3) if first_read else (7, 9)
for line in open(fnam):
crm, pos = line.split()[idx1:idx2]
pos = int(pos) / resolution
try:
distr[crm][pos] += 1
except KeyError:
try:
distr[crm][pos] = 1
except KeyError:
distr[crm] = {pos: 1}
if not axe:
fig=plt.figure(figsize=(15, 3 * len(distr.keys())))
max_y = max([max(distr[c].values()) for c in distr])
max_x = max([len(distr[c].values()) for c in distr])
for i, crm in enumerate(genome_seq if genome_seq else distr):
plt.subplot(len(distr.keys()), 1, i + 1)
plt.plot(range(max(distr[crm])),
[distr[crm].get(j, 0) for j in xrange(max(distr[crm]))],
color='red', lw=1.5, alpha=0.7)
if genome_seq:
if ylim:
plt.vlines(len(genome_seq[crm]) / resolution, ylim[0], ylim[1])
else:
plt.vlines(len(genome_seq[crm]) / resolution, 0, max_y)
plt.xlim((0, max_x))
plt.ylim(ylim or (0, max_y))
plt.title(crm)
if savefig:
tadbit_savefig(savefig)
elif not axe:
plt.show()
开发者ID:Adrimel,项目名称:tadbit,代码行数:51,代码来源:analyze.py
示例6: hic_map
def hic_map(data, genome_seq, biases=None, masked=None, resolution=100000,
savefig=None, show=False, savedata=None, focus=None):
if isinstance(data, str):
fnam = data
cumcs = {}
total = 0
for crm in genome_seq:
cumcs[crm] = total
total += len(genome_seq[crm]) / resolution + 1
# bin the data
data = [[0 for _ in xrange(total + 1)] for _ in xrange(total + 1)]
masked = masked or set()
for line in open(fnam):
read, cr1, ps1, _, _, _, _, cr2, ps2, _, _, _, _ = line.split()
if read in masked:
continue
ps1 = int(ps1) / resolution
ps2 = int(ps2) / resolution
try:
data[cumcs[cr1] + ps1][cumcs[cr2] + ps2] += 1
except:
break
else:
hic_data = data
beg, end = focus if focus else (0, len(hic_data))
beg -= 1 if focus else 0
if biases:
data = [[hic_data[len(hic_data) * i + j] / (biases[i] * biases[j])
for j in xrange(beg, end)]
for i in xrange(beg, end)]
else:
data = [[hic_data[len(hic_data) * i + j]
for j in xrange(beg, end)]
for i in xrange(beg, end)]
# do the plot
if show or savefig:
import numpy as np
plt.figure(figsize=(16, 12))
plt.imshow(np.log2(data), origin='lower', cmap='gist_earth',
interpolation='nearest')
plt.colorbar()
if savefig:
tadbit_savefig(savefig)
elif show:
plt.show()
if savedata:
out = open(savedata, 'w')
for line in data:
out.write('\t'.join([str(cell) for cell in line]) + '\n')
out.close()
开发者ID:Adrimel,项目名称:tadbit,代码行数:50,代码来源:analyze.py
示例7: plot_distance_vs_interactions
def plot_distance_vs_interactions(fnam, min_diff=100, max_diff=1000000,
resolution=100, axe=None, savefig=None):
"""
:param fnam: input file name
:param 100 min_diff: lower limit kn genomic distance (usually equal to read
length)
:param 1000000 max_diff: upper limit in genomic distance to look for
:param 100 resolution: group reads that are closer than this resolution
parameter
:param None axe: a matplotlib.axes.Axes object to define the plot
appearance
:param None savefig: path to a file where to save the image generated;
if None, the image will be shown using matplotlib GUI (the extension
of the file name will determine the desired format).
"""
dist_intr = {}
for line in open(fnam):
_, cr1, ps1, _, _, _, _, cr2, ps2, _ = line.rsplit('\t', 9)
if cr1 != cr2:
continue
diff = resolution * (abs(int(ps1) - int(ps2)) / resolution)
if max_diff > diff > min_diff:
dist_intr.setdefault(diff, 0)
dist_intr[diff] += 1
for k in dist_intr.keys()[:]:
if dist_intr[k] <= 2:
del(dist_intr[k])
if not axe:
fig=plt.figure()
ax = fig.add_subplot(111)
x, y = zip(*sorted(dist_intr.items(), key=lambda x:x[0]))
plt.plot(x, y, 'k.')
# sigma = 10
# p_x = gaussian_filter1d(x, sigma)
# p_y = gaussian_filter1d(y, sigma)
# plot line of best fit
# plt.plot(p_x, p_y,color= 'darkgreen', lw=2, label='Gaussian fit')
plt.yscale('log')
plt.xscale('log')
plt.xlabel('Log genomic distance (binned by %d bp)' % resolution)
plt.ylabel('Log interaction count')
# plt.legend()
if savefig:
tadbit_savefig(savefig)
elif not axe:
plt.show()
开发者ID:Adrimel,项目名称:tadbit,代码行数:50,代码来源:analyze.py
示例8: correlate_matrices
def correlate_matrices(hic_data1, hic_data2, max_dist=10,
savefig=None, show=False, savedata=None):
"""
Compare the iteractions of two Hi-C matrices at a given distance,
with spearman rank correlation
:param hic_data1: Hi-C-data object
:param hic_data2: Hi-C-data object
:param 1 resolution: to be used for scaling the plot
:param 10 max_dist: maximum distance from diagonal (e.g. 10 mean we will
not look further than 10 times the resolution)
:param None savefig: path to save the plot
:param False show: displays the plot
:returns: list of correlations and list of genomic distances
"""
corr = []
dist = []
for i in xrange(1, max_dist + 1):
diag1 = []
diag2 = []
for j in xrange(len(hic_data1) - i):
diag1.append(hic_data1[j, i + j])
diag2.append(hic_data2[j, i + j])
corr.append(spearmanr(diag1, diag2)[0])
dist.append(i)
if show or savefig:
plt.plot(dist, corr, color='orange', linewidth=3, alpha=.8)
plt.xlabel('Genomic distance in bins')
plt.ylabel('Spearman rank correlation')
plt.xlim((0, dist[-1]))
if savefig:
tadbit_savefig(savefig)
if show:
plt.show()
plt.close('all')
if savedata:
out = open(savedata, 'w')
out.write('# genomic distance\tSpearman rank correlation\n')
for i in xrange(len(corr)):
out.write('%s\t%s\n' % (dist[i], corr[i]))
out.close()
return corr, dist
开发者ID:yuanbaowen521,项目名称:tadbit,代码行数:44,代码来源:analyze.py
示例9: eig_correlate_matrices
def eig_correlate_matrices(hic_data1, hic_data2, nvect=6, normalized=False,
savefig=None, show=False, savedata=None,
remove_bad_columns=True, **kwargs):
"""
Compare the iteractions of two Hi-C matrices using their 6 first
eigenvectors, with pearson correlation
:param hic_data1: Hi-C-data object
:param hic_data2: Hi-C-data object
:param 6 nvect: number of eigenvectors to compare
:param None savefig: path to save the plot
:param False show: displays the plot
:param False normalized: use normalized data
:param True remove_bads: computes the union of bad columns between samples
and exclude them from the comparison
:param kwargs: any argument to pass to matplotlib imshow function
:returns: matrix of correlations
"""
data1 = hic_data1.get_matrix(normalized=normalized)
data2 = hic_data2.get_matrix(normalized=normalized)
## reduce matrices to remove bad columns
if remove_bad_columns:
# union of bad columns
bads = hic_data1.bads.copy()
bads.update(hic_data2.bads)
# remove them form both matrices
for bad in sorted(bads, reverse=True):
del(data1[bad])
del(data2[bad])
for i in xrange(len(data1)):
_ = data1[i].pop(bad)
_ = data2[i].pop(bad)
# get the log
size = len(data1)
data1 = nozero_log(data1, np.log2)
data2 = nozero_log(data2, np.log2)
# get the eigenvectors
ev1, evect1 = eigh(data1)
ev2, evect2 = eigh(data2)
corr = [[0 for _ in xrange(nvect)] for _ in xrange(nvect)]
# sort eigenvectors according to their eigenvalues => first is last!!
sort_perm = ev1.argsort()
ev1.sort()
evect1 = evect1[sort_perm]
sort_perm = ev2.argsort()
ev2.sort()
evect2 = evect2[sort_perm]
# calculate Pearson correlation
for i in xrange(nvect):
for j in xrange(nvect):
corr[i][j] = abs(pearsonr(evect1[:,-i-1],
evect2[:,-j-1])[0])
# plot
axe = plt.axes([0.1, 0.1, 0.6, 0.8])
cbaxes = plt.axes([0.85, 0.1, 0.03, 0.8])
if show or savefig:
im = axe.imshow(corr, interpolation="nearest",origin='lower', **kwargs)
axe.set_xlabel('Eigen Vectors exp. 1')
axe.set_ylabel('Eigen Vectors exp. 2')
axe.set_xticks(range(nvect))
axe.set_yticks(range(nvect))
axe.set_xticklabels(range(1, nvect + 2))
axe.set_yticklabels(range(1, nvect + 2))
axe.xaxis.set_tick_params(length=0, width=0)
axe.yaxis.set_tick_params(length=0, width=0)
cbar = plt.colorbar(im, cax = cbaxes )
cbar.ax.set_ylabel('Pearson correlation', rotation=90*3,
verticalalignment='bottom')
axe2 = axe.twinx()
axe2.set_yticks(range(nvect))
axe2.set_yticklabels(['%.1f' % (e) for e in ev2[-nvect:][::-1]])
axe2.set_ylabel('corresponding Eigen Values exp. 2', rotation=90*3,
verticalalignment='bottom')
axe2.set_ylim((-0.5, nvect - 0.5))
axe2.yaxis.set_tick_params(length=0, width=0)
axe3 = axe.twiny()
axe3.set_xticks(range(nvect))
axe3.set_xticklabels(['%.1f' % (e) for e in ev1[-nvect:][::-1]])
axe3.set_xlabel('corresponding Eigen Values exp. 1')
axe3.set_xlim((-0.5, nvect - 0.5))
axe3.xaxis.set_tick_params(length=0, width=0)
axe.set_ylim((-0.5, nvect - 0.5))
axe.set_xlim((-0.5, nvect - 0.5))
if savefig:
tadbit_savefig(savefig)
if show:
plt.show()
plt.close('all')
if savedata:
out = open(savedata, 'w')
out.write('# ' + '\t'.join(['Eigen Vector %s'% i
for i in xrange(nvect)]) + '\n')
for i in xrange(nvect):
out.write('\t'.join([str(corr[i][j])
for j in xrange(nvect)]) + '\n')
#.........这里部分代码省略.........
开发者ID:kangk1204,项目名称:TADbit,代码行数:101,代码来源:analyze.py
示例10: quality_plot
#.........这里部分代码省略.........
ax2.plot(sites[r_enz], linewidth=2, color = color.next(),
alpha=0.9,
label='Undigested RE site (%s: %s)' % (r_enz, r_sites[r_enz])
if any([f > 0 for f in fixes[r_enz]])
else 'Undigested & Dangling-Ends (%s: %s)' % (r_enz, r_sites[r_enz]))
ax2.set_ylabel('Undigested')
ax2.yaxis.label.set_color('darkred')
ax2.tick_params(axis='y', colors='darkred', **tkw)
lines, labels = ax2.get_legend_handles_labels()
ax3 = ax2.twinx()
color = iter(plt.cm.Blues(linspace(0.3, 0.95, len(liges))))
for r1, r2 in liges:
# print 'ligated', r1, r2
# print liges[(r1, r2)][:20]
ax3.plot(liges[(r1, r2)], linewidth=2, color=color.next(),
alpha=0.9,
label = 'Ligated (%s-%s: %s)' % (r1, r2, l_sites[(r1, r2)].upper()))
ax3.yaxis.label.set_color('darkblue')
ax3.tick_params(axis='y', colors='darkblue', **tkw)
ax3.set_ylabel('Ligated')
tmp_lines, tmp_labels = ax3.get_legend_handles_labels()
lines.extend(tmp_lines)
labels.extend(tmp_labels)
color = iter(plt.cm.Greens(linspace(0.3, 0.95, len(r_enzs))))
for i, r_enz in enumerate(r_enzs):
if any([f > 0 for f in fixes[r_enz]]):
ax4 = ax2.twinx()
ax4.spines["right"].set_position(("axes", 1.07))
make_patch_spines_invisible(ax4)
ax4.spines["right"].set_visible(True)
# print 'repaired', r_enz
# print fixes[r_enz][:20]
ax4.plot(fixes[r_enz], linewidth=2, color=color.next(),
alpha=0.9,
label='Dangling-ends (%s: %s)' % (r_enz, d_sites[r_enz]))
ax4.yaxis.label.set_color('darkgreen')
ax4.tick_params(axis='y', colors='darkgreen', **tkw)
ax4.set_ylabel('Dangling-ends')
tmp_lines, tmp_labels = ax4.get_legend_handles_labels()
lines.extend(tmp_lines)
labels.extend(tmp_labels)
else:
ax2.set_ylabel('Undigested & Dangling-ends')
ax2.set_xlim((0, max_seq_len))
# Count ligation sites
lig_cnt = {}
for k in liges:
lig_cnt[k] = (nansum(liges[k]) - liges[k][0] -
liges[k][max_seq_len / 2])
# Count undigested sites
sit_cnt = {}
for r_enz in r_enzs:
sit_cnt[r_enz] = (nansum(sites[r_enz]) - sites[r_enz][0] -
sites[r_enz][max_seq_len / 2])
# Count Dangling-Ends
des = {}
for r_enz in r_enzs:
if any([f > 0 for f in fixes[r_enz]]):
des[r_enz] = ((100. * (fixes[r_enz][0] + (fixes[r_enz][(max_seq_len / 2)]
if paired else 0))) / nreads)
else:
des[r_enz] = (100. * (sites[r_enz][0] + (sites[r_enz][(max_seq_len / 2)]
if paired else 0))) / nreads
# Decorate plot
title = ''
for r_enz in r_enzs:
lcnt = float(sum([lig_cnt[(r_enz1, r_enz2)] * (2 if r_enz1 == r_enz2 else 1)
for r_enz1 in r_enzs for r_enz2 in r_enzs
if r_enz1 == r_enz or r_enz2 == r_enz]))
title += ('Percentage of digested sites (not considering Dangling-Ends) '
'%s: %.1f%%\n' % (r_enz,
100. * float(lcnt) / (lcnt + sit_cnt[r_enz])))
for r_enz in r_enzs:
title += 'Percentage of dangling-ends %s: %.1f%%\n' % (r_enz, des[r_enz])
for r_enz1 in r_enzs:
for r_enz2 in r_enzs:
title += ('Percentage of reads with ligation site (%s-%s): %.1f%% \n' %
(r_enz1, r_enz2, (ligep[(r_enz1, r_enz2)] * 100.) / nreads))
plt.title(title.strip(), size=10, ha='left', x=0)
plt.subplots_adjust(right=0.85)
ax2.legend(lines, labels, bbox_to_anchor=(0.75, 1.0),
loc=3, borderaxespad=0., frameon=False, fontsize=9)
plt.tight_layout()
if savefig:
tadbit_savefig(savefig)
plt.close('all')
elif not axe:
plt.show()
for k in ligep:
ligep[k] = (ligep[k] * 100.) / nreads
return des, ligep
开发者ID:3DGenomes,项目名称:TADbit,代码行数:101,代码来源:fastq_utils.py
示例11: plot_genomic_distribution
def plot_genomic_distribution(fnam, first_read=True, resolution=10000,
axe=None, ylim=None, savefig=None, show=False,
savedata=None, chr_names=None, nreads=None):
"""
:param fnam: input file name
:param True first_read: uses first read.
:param 100 resolution: group reads that are closer than this resolution
parameter
:param None axe: a matplotlib.axes.Axes object to define the plot
appearance
:param None savefig: path to a file where to save the image generated;
if None, the image will be shown using matplotlib GUI (the extension
of the file name will determine the desired format).
:param None savedata: path where to store the output read counts per bin.
:param None chr_names: can pass a list of chromosome names in case only some
them the need to be plotted (this option may last even more than default)
"""
distr = {}
idx1, idx2 = (1, 3) if first_read else (7, 9)
genome_seq = OrderedDict()
fhandler = open(fnam)
line = fhandler.next()
if chr_names:
chr_names = set(chr_names)
cond1 = lambda x: x not in chr_names
else:
cond1 = lambda x: False
if nreads:
cond2 = lambda x: x >= nreads
else:
cond2 = lambda x: False
cond = lambda x, y: cond1(x) and cond2(y)
count = 0
while line.startswith('#'):
if line.startswith('# CRM '):
crm, clen = line[6:].split('\t')
genome_seq[crm] = int(clen)
line = fhandler.next()
try:
while True:
crm, pos = line.strip().split('\t')[idx1:idx2]
count += 1
if cond(crm, count):
line = fhandler.next()
if cond2(count):
break
continue
pos = int(pos) / resolution
try:
distr[crm][pos] += 1
except KeyError:
try:
distr[crm][pos] = 1
except KeyError:
distr[crm] = {pos: 1}
line = fhandler.next()
except StopIteration:
pass
fhandler.close()
if not axe:
_ = plt.figure(figsize=(15, 1 + 3 * len(
chr_names if chr_names else distr.keys())))
max_y = max([max(distr[c].values()) for c in distr])
max_x = max([len(distr[c].values()) for c in distr])
ncrms = len(chr_names if chr_names else genome_seq if genome_seq else distr)
data = {}
for i, crm in enumerate(chr_names if chr_names else genome_seq
if genome_seq else distr):
try:
data[crm] = [distr[crm].get(j, 0) for j in xrange(max(distr[crm]))]
if savefig:
plt.subplot(ncrms, 1, i + 1)
plt.plot(range(max(distr[crm])), data[crm],
color='red', lw=1.5, alpha=0.7)
except KeyError:
pass
if savefig:
if ylim:
plt.vlines(genome_seq[crm] / resolution, ylim[0], ylim[1])
else:
plt.vlines(genome_seq[crm] / resolution, 0, max_y)
plt.xlim((0, max_x))
plt.ylim(ylim or (0, max_y))
plt.title(crm)
if savefig:
tadbit_savefig(savefig)
plt.close('all')
elif show:
plt.show()
if savedata:
out = open(savedata, 'w')
out.write('# CRM\tstart-end\tcount\n')
out.write('\n'.join('%s\t%d-%d\t%d' % (c, (i * resolution) + 1, ((i + 1) * resolution), v) for c in data
for i, v in enumerate(data[c])))
out.write('\n')
out.close()
开发者ID:kangk1204,项目名称:TADbit,代码行数:100,代码来源:analyze.py
示例12: correlate_matrices
def correlate_matrices(hic_data1, hic_data2, max_dist=10, intra=False, axe=None,
savefig=None, show=False, savedata=None,
normalized=False, remove_bad_columns=True, **kwargs):
"""
Compare the iteractions of two Hi-C matrices at a given distance,
with spearman rank correlation
:param hic_data1: Hi-C-data object
:param hic_data2: Hi-C-data object
:param 1 resolution: to be used for scaling the plot
:param 10 max_dist: maximum distance from diagonal (e.g. 10 mean we will
not look further than 10 times the resolution)
:param None savefig: path to save the plot
:param False intra: only takes into account intra-chromosomal contacts
:param False show: displays the plot
:param False normalized: use normalized data
:param True remove_bads: computes the union of bad columns between samples
and exclude them from the comparison
:returns: list of correlations and list of genomic distances
"""
corrs = []
dists = []
if normalized:
get_the_guy1 = lambda i, j: (hic_data1[j, i] / hic_data1.bias[i] /
hic_data1.bias[j])
get_the_guy2 = lambda i, j: (hic_data2[j, i] / hic_data2.bias[i] /
hic_data2.bias[j])
else:
get_the_guy1 = lambda i, j: hic_data1[j, i]
get_the_guy2 = lambda i, j: hic_data2[j, i]
if remove_bad_columns:
# union of bad columns
bads = hic_data1.bads.copy()
bads.update(hic_data2.bads)
if (intra and hic_data1.sections and hic_data2.sections and
hic_data1.sections == hic_data2.sections):
for dist in xrange(1, max_dist + 1):
diag1 = []
diag2 = []
for crm in hic_data1.section_pos:
for j in xrange(hic_data1.section_pos[crm][0],
hic_data1.section_pos[crm][1] - dist):
i = j + dist
if j in bads or i in bads:
continue
diag1.append(get_the_guy1(i, j))
diag2.append(get_the_guy2(i, j))
corrs.append(spearmanr(diag1, diag2)[0])
dists.append(dist)
else:
if intra:
warn('WARNING: hic_dta does not contain chromosome coordinates, ' +
'intra set to False')
for dist in xrange(1, max_dist + 1):
diag1 = []
diag2 = []
for j in xrange(len(hic_data1) - dist):
i = j + dist
if j in bads or i in bads:
continue
diag1.append(get_the_guy1(i, j))
diag2.append(get_the_guy2(i, j))
corrs.append(spearmanr(diag1, diag2)[0])
dists.append(dist)
if show or savefig or axe:
if not axe:
fig = plt.figure()
axe = fig.add_subplot(111)
given_axe = False
else:
given_axe = True
axe.plot(dists, corrs, color='orange', linewidth=3, alpha=.8)
axe.set_xlabel('Genomic distance in bins')
axe.set_ylabel('Spearman rank correlation')
axe.set_xlim((0, dists[-1]))
if savefig:
tadbit_savefig(savefig)
if show:
plt.show()
if not given_axe:
plt.close('all')
if savedata:
out = open(savedata, 'w')
out.write('# genomic distance\tSpearman rank correlation\n')
for i in xrange(len(corrs)):
out.write('%s\t%s\n' % (dists[i], corrs[i]))
out.close()
if kwargs.get('get_bads', False):
return corrs, dists, bads
else:
return corrs, dists
开发者ID:kangk1204,项目名称:TADbit,代码行数:95,代码来源:analyze.py
示例13: plot_distance_vs_interactions
#.........这里部分代码省略.........
for k in xrange(len(xp)):
if yp[k]:
x.append(xp[k])
y.append(yp[k])
axe.plot(x, y, 'k.')
best = (float('-inf'), 0, 0, 0, 0, 0, 0, 0, 0, 0)
logx = np.log(x)
logy = np.log(y)
ntries = 100
# set k for better fit
# for k in xrange(1, ntries/5, ntries/5/5):
if resolution == 1:
k = 1
for i in xrange(3, ntries-2-k):
v1 = i * len(x) / ntries
try:
a1, b1, r21, _, _ = linregress(logx[ :v1], logy[ :v1])
except ValueError:
a1 = b1 = r21 = 0
r21 *= r21
for j in xrange(i + 1 + k, ntries - 2 - k):
v2 = j * len(x) / ntries
try:
a2, b2, r22, _, _ = linregress(logx[v1+k:v2], logy[v1+k:v2])
a3, b3, r23, _, _ = linregress(logx[v2+k: ], logy[v2+k: ])
except ValueError:
a2 = b2 = r22 = 0
a3 = b3 = r23 = 0
r2 = r21 + r22**2 + r23**2
if r2 > best[0]:
best = (r2, v1, v2, a1, a2, a3,
b1, b2, b3, k)
# plot line of best fit
(v1, v2,
a1, a2, a3,
b1, b2, b3, k) = best[1:]
yfit1 = lambda xx: np.exp(b1 + a1*np.array (np.log(xx)))
yfit2 = lambda xx: np.exp(b2 + a2*np.array (np.log(xx)))
yfit3 = lambda xx: np.exp(b3 + a3*np.array (np.log(xx)))
axe.plot(x[ :v1], yfit1(x[ :v1] ), color= 'yellow', lw=2,
label = r'$\alpha_{%s}=%.2f$' % (
'0-0.7 \mathrm{ Mb}' if resolution != 1 else '1', a1))
#label = r'$\alpha_1=%.2f$ (0-%d)' % (a1, x[v1]))
axe.plot(x[v1+k:v2], yfit2(x[v1+k:v2]), color= 'orange', lw=2,
label = r'$\alpha_{%s}=%.2f$' % (
'0.7-10 \mathrm{ Mb}' if resolution != 1 else '2', a2))
# label = r'$\alpha_2=%.2f$ (%d-%d)' % (a2, x[v1], x[v2]))
axe.plot(x[v2+k: ], yfit3(x[v2+k: ] ), color= 'red' , lw=2,
label = r'$\alpha_{%s}=%.2f$' % (
'10 \mathrm{ Mb}-\infty' if resolution != 1 else '3', a3))
# label = r'$\alpha_3=%.2f$ (%d-$\infty$)' % (a3, x[v2+k]))
else:
# from 0.7 Mb
v1 = 700000 / resolution
# to 10 Mb
v2 = 10000000 / resolution
try:
a1, b1, r21, _, _ = linregress(logx[ :v1], logy[ :v1])
except ValueError:
a1, b1, r21 = 0, 0, 0
try:
a2, b2, r22, _, _ = linregress(logx[v1:v2], logy[v1:v2])
except ValueError:
a2, b2, r22 = 0, 0, 0
try:
a3, b3, r23, _, _ = linregress(logx[v2: ], logy[v2: ])
except ValueError:
a3, b3, r23 = 0, 0, 0
yfit1 = lambda xx: np.exp(b1 + a1*np.array (np.log(xx)))
yfit2 = lambda xx: np.exp(b2 + a2*np.array (np.log(xx)))
yfit3 = lambda xx: np.exp(b3 + a3*np.array (np.log(xx)))
axe.plot(x[ :v1], yfit1(x[ :v1] ), color= 'yellow', lw=2,
label = r'$\alpha_{%s}=%.2f$' % (
'0-0.7 \mathrm{ Mb}' if resolution != 1 else '1', a1))
#label = r'$\alpha_1=%.2f$ (0-%d)' % (a1, x[v1]))
axe.plot(x[v1:v2], yfit2(x[v1:v2]), color= 'orange', lw=2,
label = r'$\alpha_{%s}=%.2f$' % (
'0.7-10 \mathrm{ Mb}' if resolution != 1 else '2', a2))
# label = r'$\alpha_2=%.2f$ (%d-%d)' % (a2, x[v1], x[v2]))
axe.plot(x[v2: ], yfit3(x[v2: ] ), color= 'red' , lw=2,
label = r'$\alpha_{%s}=%.2f$' % (
'10 \mathrm{ Mb}-\infty' if resolution != 1 else '3', a3))
# label = r'$\alpha_3=%.2f$ (%d-$\infty$)' % (a3, x[v2+k]))
axe.set_ylabel('Log interaction count')
axe.set_xlabel('Log genomic distance (resolution: %s)' % nicer(resolution))
axe.legend(loc='lower left', frameon=False)
axe.set_xscale('log')
axe.set_yscale('log')
axe.set_xlim((min_diff, max_diff))
try:
axe.set_ylim((0, max(y)))
except ValueError:
pass
if savefig:
tadbit_savefig(savefig)
plt.close('all')
elif show==True:
plt.show()
plt.close('all')
return (a1, b1, r21), (a2, b2, r22), (a3, b3, r23)
开发者ID:kangk1204,项目名称:TADbit,代码行数:101,代码来源:analyze.py
示例14: insert_sizes
def insert_sizes(fnam, savefig=None, nreads=None, max_size=99.9, axe=None,
show=False, xlog=False, stats=('median', 'perc_max')):
"""
Plots the distribution of dangling-ends lengths
:param fnam: input file name
:param None savefig: path where to store the output images.
:param 99.9 max_size: top percentage of distances to consider, within the
top 0.01% are usually found very long outliers.
:param False xlog: represent x axis in logarithmic scale
:param ('median', 'perc_max') stats: returns this set of values calculated from the
distribution of insert/fragment sizes. Possible values are:
- 'median' median of the distribution
- 'perc_max' percentil defined by the other parameter 'max_size'
- 'first_deacay' starting from the median of the distribution to the
first window where 10 consecutive insert sizes are counted less than
a given value (this given value is equal to the sum of all
sizes divided by 100 000)
- 'MAD' Double Median Adjusted Deviation
:returns: the median value and the percentile inputed as max_size.
"""
distr = {}
genome_seq = OrderedDict()
fhandler = open(fnam)
line = fhandler.next()
while line.startswith('#'):
if line.startswith('# CRM '):
crm, clen = line[6:].split()
genome_seq[crm] = int(clen)
line = fhandler.next()
des = []
if nreads:
nreads /= 2
try:
while True:
(crm1, pos1, dir1, _, re1, _,
crm2, pos2, dir2, _, re2) = line.strip().split('\t')[1:12]
if re1==re2 and crm1 == crm2 and dir1 != dir2:
pos1, pos2 = int(pos1), int(pos2)
if (pos2 > pos1) == int(dir1):
des.append(abs(pos2 - pos1))
if len(des) == nreads:
break
line = fhandler.next()
except StopIteration:
pass
fhandler.close()
max_perc = np.percentile(des, max_size)
perc99 = np.percentile(des, 99)
perc01 = np.percentile(des, 1)
perc50 = np.percentile(des, 50)
perc95 = np.percentile(des, 95)
perc05 = np.percentile(des, 5)
to_return = {'median': perc50}
cutoff = len(des) / 100000.
count = 0
for v in xrange(int(perc50), int(max(des))):
if des.count(v) < cutoff:
count += 1
else:
count = 0
if count >= 10:
to_return['first_decay'] = v - 10
break
else:
raise Exception('ERROR: not found')
to_return['perc_max'] = max_perc
to_return['MAD'] = mad(des)
if not savefig and not axe and not show:
return [to_return[k] for k in stats]
ax = setup_plot(axe, figsize=(10, 5.5))
desapan = ax.axvspan(perc95, perc99, facecolor='darkolivegreen', alpha=.3,
label='1-99%% DEs\n(%.0f-%.0f nts)' % (perc01, perc99))
ax.axvspan(perc01, perc05, facecolor='darkolivegreen', alpha=.3)
desapan = ax.axvspan(perc05, perc95, facecolor='darkseagreen', alpha=.3,
label='5-95%% DEs\n(%.0f-%.0f nts)' % (perc05, perc95))
deshist = ax.hist(des, bins=100, range=(0, max_perc),
alpha=.7, color='darkred', label='Dangling-ends')
ylims = ax.get_ylim()
plots = []
ax.set_xlabel('Genomic distance between reads')
ax.set_ylabel('Count')
ax.set_title('Distribution of dangling-ends ' +
'lenghts\n(median: %s, top %.1f%%, up to %0.f nts)' % (
perc50, max_size, max_perc))
if xlog:
ax.set_xscale('log')
ax.set_xlim((50, max_perc))
plt.subplots_adjust(left=0.1, right=0.75)
ax.legend(bbox_to_anchor=(1.4, 1), frameon=False)
if savefig:
tadbit_savefig(savefig)
elif show and not axe:
plt.show()
plt.close('all')
return [to_return[k] for k in stats]
开发者ID:kangk1204,项目名称:TADbit,代码行数:97,代码来源:analyze.py
示例15: filter_by_zero_count
def filter_by_zero_count(matrx, draw_hist=False, savefig=None):
"""
fits the distribution of Hi-C interaction count by column in the matrix to
a polynomial. Then searches for the first possible
"""
nbins = 100
# get sum of columns
cols = []
for c in sorted(matrx, key=sum):
cols.append(len(c) - c.count(0))
cols = np.array(cols)
if draw_hist:
plt.figure(figsize=(9, 9))
median = np.median(cols)
# mad = np.median([abs(median - c ) for c in cols])
best =(None, None, None, None)
# bin the sum of columns
xmin = min(cols)
xmax = max(cols)
y = np.linspace(xmin, xmax, nbins)
hist = np.digitize(cols, y)
x = [sum(hist == i) for i in range(1, nbins + 1)]
if draw_hist:
hist = plt.hist(cols, bins=100, alpha=.3, color='grey')
xp = range(0, cols[-1])
# check if the binning is correct
# we want at list half of the bins with some data
while list(x).count(0) > 2*len(x)/3:
cols = cols[:-1]
xmin = min(cols)
xmax = max(cols)
y = np.linspace(xmin, xmax, nbins)
hist = np.digitize(cols, y)
x = [sum(hist == i) for i in range(1, nbins + 1)]
if draw_hist:
plt.clf()
hist = plt.hist(cols, bins=100, alpha=.3, color='grey')
xp = range(0, cols[-1])
# find best polynomial fit in a given range
for order in range(7, 14):
z = np.polyfit(y, x, order)
zpp = np.polyder(z, m=1)
roots = np.roots(np.polyder(z))
# check that we are concave down, otherwise take next root
pente = np.polyval(zpp, abs(roots[-2] - roots[-1]) / 2 + roots[-1])
if pente > 0:
root = roots[-1]
else:
root = roots[-2]
# root must be higher than zero
if root <= 0:
continue
# and lower than the median
if root >= median:
continue
p = np.poly1d(z)
R2 = get_r2(p, x, y)
if best[0] < R2:
best = (R2, order, p, z, root)
p, z, root = best[2:]
if draw_hist:
a = plt.plot(xp, p(xp), "--", color='k')
b = plt.vlines(root, 0, plt.ylim()[1], colors='r', linestyles='dashed')
try:
plt.legend(a + [b], ['polyfit \n%s' % (
''.join([sub('e([-+][0-9]+)', 'e^{\\1}',
'$%s%.1fx^%s$' % ('+' if j>0 else '', j,
'{' + str(i) + '}'))
for i, j in enumerate(list(p)[::-1])])),
'first solution of polynomial derivation'],
fontsize='x-small')
except TypeError:
plt.legend(a + [b], ['polyfit \n%s' % (
''.join([sub('e([-+][0-9]+)', 'e^{\\1}',
'$%s%.1fx^%s$' % ('+' if j>0 else '', j,
'{' + str(i) + '}'))
for i, j in enumerate(list(p)[::-1])])),
'first solution of polynomial derivation'])
plt.ylim(0, plt.ylim()[1])
|
请发表评论