本文整理汇总了Python中scipy.cluster.hierarchy.linkage函数的典型用法代码示例。如果您正苦于以下问题:Python linkage函数的具体用法?Python linkage怎么用?Python linkage使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了linkage函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: random_distribution
def random_distribution(n):
#make up some data
data = np.random.normal(scale=n, size=(n, n))
data[0:n / 2,0:n / 2] += 75
data[n / 2:, n / 2:] = np.random.poisson(lam=n,size=data[n / 2:, n / 2:].shape)
#cluster the rows
row_dist = ssd.squareform(ssd.pdist(data))
row_Z = sch.linkage(row_dist)
row_idxing = sch.leaves_list(row_Z)
row_labels = ['bar{}'.format(i) for i in range(n)]
#cluster the columns
col_dist = ssd.squareform(ssd.pdist(data.T))
col_Z = sch.linkage(col_dist)
col_idxing = sch.leaves_list(col_Z)
#make the dendrogram
col_labels = ['foo{}'.format(i) for i in range(n)]
data = data[:,col_idxing][row_idxing,:]
heatmap = pdh.DendroHeatMap(heat_map_data=data,left_dendrogram=row_Z, top_dendrogram=col_Z, heatmap_colors=("#ffeda0", "#feb24c", "#f03b20"), window_size="auto", color_legend_displayed=False, label_color="#777777")
heatmap.row_labels = row_labels
heatmap.col_labels = col_labels
heatmap.title = 'An example heatmap'
heatmap.show()#heatmap.save("example.png")
开发者ID:kaniblu,项目名称:dhm,代码行数:28,代码来源:example.py
示例2: getDistMatrixes
def getDistMatrixes(cls, distDict, distMeasure, linkageCriterion):
"""
Find and return the correlation matrix, linkage matrix and distance matrix for the distance/correlation
measure given with distMeasure parameter.
"""
from scipy.spatial.distance import squareform
from numpy import ones, fill_diagonal
from scipy.cluster.hierarchy import linkage
if distMeasure == cls.CORR_PEARSON or distMeasure == cls.SIM_MCCONNAUGHEY:
'''As these measures generate values between -1 and 1, need special handling'''
# Cluster distances, i.e. convert correlation into distance between 0 and 1
triangularCorrMatrix = distDict[distMeasure]
triangularDistMatrix = ones(len(triangularCorrMatrix)) - [(x + 1) / 2 for x in triangularCorrMatrix]
linkageMatrix = linkage(cls.removeNanDistances(triangularDistMatrix), linkageCriterion)
# Make correlation matrix square
correlationMatrix = squareform(triangularCorrMatrix)
fill_diagonal(correlationMatrix, 1)
else:
# Cluster distances
triangularDistMatrix = distDict[distMeasure]
linkageMatrix = linkage(cls.removeNanDistances(triangularDistMatrix), linkageCriterion)
# Convert triangular distances into square correlation matrix
squareDistMatrix = squareform(triangularDistMatrix)
squareSize = len(squareDistMatrix)
correlationMatrix = ones((squareSize, squareSize)) - squareDistMatrix
return correlationMatrix, linkageMatrix, triangularDistMatrix
开发者ID:johhorn,项目名称:gwas-clustering,代码行数:32,代码来源:CommonClusteringFunctions.py
示例3: draw_intensity
def draw_intensity(a, cmap=GREEN_CMAP, metric='euclidean', method='average', sort_x=True, sort_y=True):
main_axes = plt.gca()
divider = make_axes_locatable(main_axes)
if sort_x is True:
plt.sca(divider.append_axes("top", 0.5, pad=0))
xlinkage = linkage(pdist(a.T, metric=metric), method=method, metric=metric)
xdendro = dendrogram(xlinkage, orientation='top', no_labels=True,
distance_sort='descending',
link_color_func=lambda x: 'black')
plt.gca().set_axis_off()
a = a[[a.columns[i] for i in xdendro['leaves']]]
if sort_y is True:
plt.sca(divider.append_axes("left", 1.0, pad=0))
ylinkage = linkage(pdist(a, metric=metric), method=method, metric=metric)
ydendro = dendrogram(ylinkage, orientation='right', no_labels=True,
distance_sort='descending',
link_color_func=lambda x: 'black')
plt.gca().set_axis_off()
a = a.ix[[a.index[i] for i in ydendro['leaves']]]
plt.sca(main_axes)
plt.imshow(a, aspect='auto', interpolation='none',
cmap=cmap, vmin=0.0, vmax=1.0)
plt.colorbar(pad=0.15)
plt.gca().yaxis.tick_right()
plt.xticks(range(a.shape[1]), a.columns, rotation=90, size='small')
plt.yticks(range(a.shape[0]), a.index, size='x-small')
plt.gca().xaxis.set_ticks_position('none')
plt.gca().yaxis.set_ticks_position('none')
plt.gca().invert_yaxis()
plt.show()
开发者ID:neuroinformatics,项目名称:bah2016_registration,代码行数:34,代码来源:draw_cluster.py
示例4: hierarchical_clustering
def hierarchical_clustering(data, skill, method='single', metric='euclidean', dendrogram=True, concepts=False, cluster_number=3, corr_as_vectors=False):
pk, level = data.get_skill_id(skill)
items = data.get_items_df()
skills = data.get_skills_df()
corr = compute_corr(data, merge_skills=concepts)
print("Corr ({}) contain total {} values and from that {} nans".format(corr.shape, corr.size, corr.isnull().sum().sum()))
corr[corr.isnull()] = 0
if concepts:
items = items[items["skill_lvl_" + str(level)] == pk]
skill_ids = items[~items["skill_lvl_3"].isnull()]["skill_lvl_3"].unique()
corr = pd.DataFrame(corr, index=skill_ids, columns=skill_ids)
labels = list(skills.loc[corr.index]["name"])
else:
items = items[items["skill_lvl_" + str(level)] == pk]
items = items[items["visualization"] != "pairing"]
corr = pd.DataFrame(corr, index=items.index, columns=items.index)
labels = ["{1} - {0}".format(item["name"], item["visualization"][0]) for id, item in list(items.iterrows())]
if corr_as_vectors:
Z = hr.linkage(corr, method=method, metric=metric)
else:
Z = hr.linkage(dst.squareform(1 - corr), method=method)
Z[Z < 0] = 0
if dendrogram:
plt.title('{}: method: {}, metric: {}, as vectors: {}'.format(skill, method, metric, corr_as_vectors))
plt.xlabel('items' if not concepts else "concepts")
plt.ylabel('distance')
hr.dendrogram(Z, leaf_rotation=90., leaf_font_size=10., labels=labels)
return hr.fcluster(Z, cluster_number, "maxclust")
开发者ID:thran,项目名称:experiments2.0,代码行数:32,代码来源:experiments_clustering.py
示例5: compare_clusters
def compare_clusters(args):
ref_df = pd.read_table(args['ref'], sep='\t', skipinitialspace=True, index_col=0).as_matrix()
check_symmetry(ref_df)
linkage_ref = linkage(ref_df, 'average')
c_ref, coph_dists_ref = cophenet(linkage_ref, pdist(ref_df))
outfile = open(args['output'],"w")
outfile.write("Tree_cluster\tMantel_Correlation_Coefficient\tManter_P-value\tCophenetic_Pearson\tCophenetic_P-value\n")
for i in args['all']:
fst_df = pd.read_table(i, sep='\t', skipinitialspace=True, index_col=0).as_matrix()
check_symmetry(fst_df)
mantel_coeff = 0.0
p_value_mantel = 0.0
cophenetic_pearson = 0.0
p_value_cophenetic = 0.0
n = 0
try:
mantel_coeff, p_value_mantel, n = mantel(ref_df, fst_df)
linkage_fst = linkage(fst_df, 'average')
c_fst, coph_dists_fst = cophenet(linkage_fst, pdist(fst_df))
cophenetic_pearson, p_value_cophenetic = pearsonr(coph_dists_ref, coph_dists_fst)
except Exception as e:
print("Error : %s" % str(e))
mantel_coeff = "Failed"
p_value_manel = "Failed"
cophenetic_pearson = "Failed"
p_value_cophenetic = "Failed"
outfile.write(i+"\t"+str(mantel_coeff)+"\t"+str(p_value_mantel)+"\t"+str(cophenetic_pearson)+"\t"+str(p_value_cophenetic)+"\n")
outfile.close()
开发者ID:zorino,项目名称:ray,代码行数:33,代码来源:treeclust-compare.py
示例6: cluster_fps
def cluster_fps(self):
clkg = hcluster.linkage(self.dm,method = 'average')
coarse_r = hcluster.fcluster(clkg,0.3,criterion = 'distance')
self.coarse_r = coarse_r
bcount = np.bincount(coarse_r)
knum = len(np.nonzero(bcount > 1)[0])
s = self.density_matrix.shape
if False and len(s) >1 and s[0] > 10 and s[1] > 10 and knum < min(s) / 2:
(u,s,vt) = la.svds(self.sps_matrixs,k = knum)
self.u = u
print '============'
else:
self.result = self.coarse_r
return (clkg,clkg)
#rankA = npla.matrix_rank(self.sps_matrixs)
# if rankA < 3:
a = np.matrix(np.diag(s)) * np.matrix(vt)
pd = dist.pdist(np.array(a.T),'cosine')
pd[np.abs(pd) < 1e-11] = 0
lkg = hcluster.linkage(pd,method = 'average')
self.lkg = lkg
self.result = hcluster.fcluster(lkg,self.svd_cluster_thr,criterion = 'distance')
# self.result = hcluster.fcluster(lkg,1)
# self.result = hcluster.fclusterdata(u,0.7,metric = 'cosine', criterion = 'distance',method = 'average')
return (lkg,clkg)
开发者ID:lrpopeyou,项目名称:cluster_fp_user,代码行数:33,代码来源:cluster_by_grid.py
示例7: main
def main():
D = 2 # so we can visualize it more easily
s = 4 # separation so we can control how far apart the means are
mu1 = np.array([0, 0])
mu2 = np.array([s, s])
mu3 = np.array([0, s])
N = 900 # number of samples
X = np.zeros((N, D))
X[:300, :] = np.random.randn(300, D) + mu1
X[300:600, :] = np.random.randn(300, D) + mu2
X[600:, :] = np.random.randn(300, D) + mu3
Z = linkage(X, 'ward')
print "Z.shape:", Z.shape
# Z has the format [idx1, idx2, dist, sample_count]
# therefore, its size will be (N-1, 4)
plt.title("Ward")
dendrogram(Z)
plt.show()
Z = linkage(X, 'single')
plt.title("Single")
dendrogram(Z)
plt.show()
Z = linkage(X, 'complete')
plt.title("Complete")
dendrogram(Z)
plt.show()
开发者ID:AndreyDrv,项目名称:machine_learning_examples,代码行数:30,代码来源:hcluster.py
示例8: HierarchicalCluster
def HierarchicalCluster(A):
#see http://stackoverflow.com/questions/2982929/plotting-results-of-hierarchical-clustering-ontop-of-a-matrix-of-data-in-python
Corr = np.corrcoef(A.T)
fig = plt.figure(figsize=(8,8))
ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
Y = hrc.linkage(Corr, method='centroid')
Z1 = hrc.dendrogram(Y, orientation='right')
ax1.set_xticks([])
ax1.set_yticks([])
ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
Y = hrc.linkage(Corr, method='centroid')
Z2 = hrc.dendrogram(Y)
ax2.set_xticks([])
ax2.set_yticks([])
axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
idx1 = Z1['leaves']
idx2 = Z2['leaves']
Corr = Corr[idx1, :]
Corr = Corr[:, idx2]
im = axmatrix.matshow(Corr, aspect='auto', origin='lower')
axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
pylab.colorbar(im, cax=axcolor)
fig.show()
fig.savefig('dendrogram.png')
开发者ID:izkula,项目名称:matrix-viz,代码行数:27,代码来源:npz_to_csv.py
示例9: hcluster_cols
def hcluster_cols(self, thresh):
try:
link = linkage(self.X.T, method='complete', metric = 'cosine')
assignments = fcluster(link, thresh, 'distance')
except:
link = linkage(self.X.T, method='complete', metric = 'euclidean')
assignments = fcluster(link, thresh, 'distance')
col_ind = np.arange(len(self.crimes))
d = pd.DataFrame(zip(col_ind, assignments)).groupby(1)[0].aggregate(lambda x: tuple(x))
df_new = pd.DataFrame(index = np.arange(len(self.names)))
for i in d:
cols = []
for w in i:
cols.append(w)
if len(cols) > 1:
df_new[str(self.crimes[cols])] = np.mean(self.X[:,cols], axis = 1)
else:
df_new[str(self.crimes[cols[0]])] = self.X[:,cols[0]]
# plt.figure(figsize=(10,20))
# dendro = dendrogram(link, color_threshold=thresh, leaf_font_size=13, labels = self.crimes, orientation = 'left')
# plt.subplots_adjust(top=.99, bottom=0.5, left=0.05, right=0.99)
# plt.show()
self.df = df_new
self.crimes = df_new.columns.values
开发者ID:nhu2000,项目名称:hood_project,代码行数:28,代码来源:pca_class.py
示例10: starthcc
def starthcc(self):
print self.dm,self.lin
dataFrame = pd.DataFrame(self.tr, columns=['x', 'y'])
from scipy.spatial.distance import pdist, squareform
# not printed as pretty, but the values are correct
distxy = squareform(pdist(dataFrame, metric=(self.dm)))
#print distxy
if self.lin=="single":
plt.figure()
R = dendrogram(linkage(distxy, method=str(self.lin)))
plt.xlabel('X units')
plt.ylabel('Y units')
plt.suptitle('Cluster Dendrogram', fontweight='bold', fontsize=14);
plt.show()
elif self.lin=="complete":
plt.figure()
R = dendrogram(linkage(distxy, method=str(self.lin)))
plt.xlabel('X units')
plt.ylabel('Y units')
plt.suptitle('Cluster Dendrogram', fontweight='bold', fontsize=14);
plt.show()
else:
plt.figure()
R = dendrogram(linkage(distxy, method=str(self.lin)))
plt.xlabel('X units')
plt.ylabel('Y units')
plt.suptitle('Cluster Dendrogram', fontweight='bold', fontsize=14);
plt.show()
开发者ID:vishnumani2009,项目名称:OpenSource-Open-Ended-Statistical-toolkit,代码行数:35,代码来源:heirarfrontend.py
示例11: save_mat
def save_mat(c2map, filepath):
mat = c2map['mat']
fig = pylab.figure(figsize=(8,8))
# Compute and plot first dendrogram.
ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
Y = sch.linkage(mat, method='centroid')
Z1 = sch.dendrogram(Y, orientation='right')
ax1.set_xticks([])
ax1.set_yticks([])
# Compute and plot second dendrogram.
ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
Y = sch.linkage(mat, method='single')
Z2 = sch.dendrogram(Y)
ax2.set_xticks([])
ax2.set_yticks([])
# Plot distance matrix.
axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
idx1 = Z1['leaves']
idx2 = Z2['leaves']
mat = mat[idx1,:]
mat = mat[:,idx2]
im = axmatrix.matshow(mat, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu)
axmatrix.set_xticks([])
axmatrix.set_yticks([])
# Plot colorbar.
axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
pylab.colorbar(im, cax=axcolor)
fig.savefig(filepath)
开发者ID:andpromobile,项目名称:csipb-jamu-prj,代码行数:33,代码来源:c2map.py
示例12: plot_transition_clustermap
def plot_transition_clustermap(data_array, gene_names, pseudotimes, n_clusters=10, gradient=False):
if gradient:
data_to_plot = zscore(np.gradient(data_array)[1].T, axis=0)
scale = None
metric = 'seuclidean'
row_linkage = linkage(pdist(abs(data_to_plot), metric=metric), method='complete')
else:
data_to_plot = data_array.T
scale = 0
metric = 'correlation'
row_linkage = linkage(pdist(data_to_plot, metric=metric), method='complete')
assignments = fcluster(row_linkage, n_clusters, criterion='maxclust')
cm = sns.clustermap(data_to_plot, col_cluster=False, standard_scale=scale,
yticklabels=gene_names, row_linkage=row_linkage,
row_colors=[settings.STATE_COLORS[i] for i in assignments])
r = np.arange(10, data_array.shape[0], data_array.shape[0]/10)
plt.setp(cm.ax_heatmap.get_yticklabels(), fontsize=5)
cm.ax_heatmap.set_xticks(r)
cm.ax_heatmap.set_xticklabels(['%.1f' % x for x in pseudotimes[r]])
cm.ax_heatmap.set_xlabel('Pseudotime')
cm.ax_heatmap.set_ylabel('Gene')
gene_clusters = defaultdict(list)
for i, cl in enumerate(assignments):
gene_clusters[settings.STATE_COLORS[cl]].append(gene_names[i])
return gene_clusters
开发者ID:dimenwarper,项目名称:scimitar,代码行数:27,代码来源:plotting.py
示例13: check_linkage_q
def check_linkage_q(self, method):
# Tests linkage(Y, method) on the Q data set.
Z = linkage(hierarchy_test_data.X, method)
expectedZ = getattr(hierarchy_test_data, "linkage_X_" + method)
assert_allclose(Z, expectedZ, atol=1e-06)
y = scipy.spatial.distance.pdist(hierarchy_test_data.X, metric="euclidean")
Z = linkage(y, method)
assert_allclose(Z, expectedZ, atol=1e-06)
开发者ID:metamorph-inc,项目名称:meta-core,代码行数:9,代码来源:test_hierarchy.py
示例14: _cluster_idx
def _cluster_idx(df):
""" sort indices by clusters """
dcol = pdist(df.T)
drow = pdist(df)
lcol = linkage(dcol)
lrow = linkage(drow)
cols = dendrogram(lcol, no_plot=True)['leaves']
rows = dendrogram(lrow, no_plot=True)['leaves']
return rows,cols
开发者ID:jdrudolph,项目名称:mypy,代码行数:9,代码来源:pdplot.py
示例15: plot_clustered_heatmap
def plot_clustered_heatmap(df, genes_list, cancer, output_path, scale='binary'):
# Build nxm matrix (n samples, m genes)
X = df[genes_list].as_matrix().transpose()
if scale == 'binary':
Z = linkage(X, method='complete', metric='hamming')
colorscale = [[0, "rgb(111, 168, 220)"], [1, "rgb(5, 10, 172)"]]
colorbar = {'tick0': 0,'dtick': 1}
elif scale == 'logarithmic':
Z = linkage(X, method='ward')
X_max = X.max()
colorscale = [[0, 'rgb(250, 250, 250)'],
[1./X_max, 'rgb(200, 200, 200)'],
[5./X_max, 'rgb(150, 150, 200)'],
[20./X_max, 'rgb(100, 100, 200)'],
[100./X_max, 'rgb(50, 50, 200)'],
[1., 'rgb(0, 0, 200)']]
colorbar = {'tick0': 0,
'tickmode': 'array',
'tickvals': [0, 1, 5, 20, 100, X_max]}
c, coph_dists = cophenet(Z, pdist(X))
print "Cophenetic Correlation Coefficient:", c
#layout = go.Layout(yaxis=dict(title='%s germline mutations (ordered by samples somatic mutation load)'% cancer, zeroline=False))
# fig = pylab.figure(figsize=(8,8))
# ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
# ax1.set_xticks([])
# ax1.set_yticks([])
# axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
den = dendrogram(Z, orientation='left')
idx = den['leaves']
X = X[idx,:]
print "X shape:", X.shape
genes_ordered = [genes_list[i] for i in idx]
logger.info("ordered genes: %s", str(genes_ordered))
# im = axmatrix.matshow(X, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu)
# axmatrix.set_xticks([])
# axmatrix.set_yticks([])
# # Plot colorbar.
# axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
# pylab.colorbar(im, cax=axcolor)
# fig.savefig(output_path)
# Plotting the heatmap (without the hirarchy)
heatmap_trace = go.Heatmap(z=X.tolist(), x=df.patient_id, y=genes_ordered, showscale=True, colorscale=colorscale, colorbar=colorbar)
mutation_load_trace = go.Bar(x=df.patient_id, y=df.somatic_mutations_count/30.0)
fig = tls.make_subplots(rows=29, cols=1, specs=[[{'rowspan':5, 'colspan' : 1}]] + [[None]] * 4 + [[{'rowspan' : 24, 'colspan' : 1}]] + [[None]] * 23)
fig.append_trace(mutation_load_trace, 1, 1)
fig.append_trace(heatmap_trace, 6, 1)
fig['layout']['xaxis1'].update(showticklabels = False)
fig['layout']['xaxis1'].update(zeroline = False, showgrid=False)
fig['layout']['yaxis1'].update(zeroline = False, showgrid = False, tickfont=dict(family='Arial', size=4))
fig['layout']['xaxis2'].update(showticklabels = False)
fig['layout']['xaxis2'].update(zeroline = False, showgrid=False)
fig['layout']['yaxis2'].update(zeroline = False, showgrid = False, tickfont=dict(family='Arial', size=4))
plot(fig, auto_open=False, filename="%s_%s_heatmap_clustered.html" % (output_path, cancer))
开发者ID:galynz,项目名称:bio_project,代码行数:57,代码来源:plot_mutations_heatmap.py
示例16: refineEnsemble
def refineEnsemble(ens, lower=.5, upper=10.):
"""Refine a PDB ensemble based on RMSD criterions."""
from scipy.cluster.hierarchy import linkage, fcluster
from scipy.spatial.distance import squareform
from collections import Counter
### calculate pairwise RMSDs ###
RMSD = ens.getRMSDs(pairwise=True)
# convert the RMSD table to the compressed form
v = squareform(RMSD)
### apply upper threshold ###
Z_upper = linkage(v, method='complete')
labels = fcluster(Z_upper, upper, criterion='distance')
most_common_label = Counter(labels).most_common(1)[0][0]
I = np.where(labels==most_common_label)[0]
### apply lower threshold ###
Z_lower = linkage(v, method='single')
labels = fcluster(Z_lower, lower, criterion='distance')
uniq_labels = np.unique(labels)
clusters = []
for label in uniq_labels:
indices = np.where(labels==label)[0]
clusters.append(indices)
J = np.ones(len(clusters), dtype=int) * -1
rmsd = None
for i, cluster in enumerate(clusters):
if len(cluster) > 0:
# find the conformations with the largest coverage
# (the weight of the ref should be 1)
weights = [ens[j].getWeights().sum() for j in cluster]
js = np.where(weights==np.max(weights))[0]
# in the case where there are multiple structures with the same weight,
# the one with the smallest rmsd wrt the ens._coords is selected.
if len(js) > 1:
# rmsd is not calulated unless necessary for the sake of efficiency
rmsd = ens.getRMSDs() if rmsd is None else rmsd
j = js[np.argmin(rmsd[js])]
else:
j = js[0]
J[i] = cluster[j]
else:
J[i] = cluster[0]
### refine ensemble ###
K = np.intersect1d(I, J)
reens = ens[K]
return reens
开发者ID:fongchun,项目名称:ProDy,代码行数:56,代码来源:legacy.py
示例17: clusterData
def clusterData(xdata, rowMethod=True, columnMethod=False, method='average', metric='euclidean'):
"""clusterData clusters the data either by row, by column, or both
:param xdata: a data dictionary - the one to be transformed
:type x: dict, must contain 'data', 'proteins', 'fractions'
:param rowMethod: a boolean asking if you want to flip on the rows (proteins get clustered)
:type rowMethod: bool
:param columnMethod: a boolean asking if you want to flip on the columns (fractions get clustered)
:type columnMethod: bool
:param method: string defining the linkage type, defaults to 'average' - 'ward' might be a good option
:type method: string
:param metric: string defining the distance metric, defaults to 'euclidean'
:type metric: string
:returns: a data ditionary. 'data', 'proteins', 'fractions', 'fi', 'pi', 'topDendro', 'rightDendro' are updated
"""
xdat = xdata.copy()
x = xdat['data']
ind1 = xdat['proteins']
ind2 = xdat['fractions']
xt = x
idx1 = None
idx2 = None
toReturn = xdat
Y1 = None
Y2 = None
if rowMethod:
d1 = ssd.pdist(x)
D1 = ssd.squareform(d1) # full matrix
Y1 = sch.linkage(D1, method=method, metric=metric) ### gene-clustering metric - 'average', 'single', 'centroid', 'complete'
Z1 = sch.dendrogram(Y1, no_plot=True, orientation='right')
idx1 = Z1['leaves'] ### apply the clustering for the gene-dendrograms to the actual matrix data
xt = xt[idx1,:] # xt is transformed x
newIndex = []
for i in idx1:
newIndex.append(ind1[i])
toReturn['proteins'] = newIndex
toReturn['pi'] = idx1
if columnMethod:
d2 = ssd.pdist(x.T)
D2 = ssd.squareform(d2)
Y2 = sch.linkage(D2, method=method, metric=metric) ### array-clustering metric - 'average', 'single', 'centroid', 'complete'
Z2 = sch.dendrogram(Y2, no_plot=True)
idx2 = Z2['leaves'] ### apply the clustering for the array-dendrograms to the actual matrix data
xt = xt[:,idx2]
newIndex = []
for i in idx2:
newIndex.append(ind2[i])
toReturn['fractions'] = newIndex
toReturn['fi'] = idx2
toReturn['data'] = xt
toReturn['topDendro'] = Y2
toReturn['rightDendro'] = Y1
return toReturn
开发者ID:joeydavis,项目名称:clusteringModule,代码行数:56,代码来源:clusteringModule.py
示例18: heatmap_v1
def heatmap_v1(self,data_I,row_labels_I,column_labels_I):
'''Generate a heatmap using pandas and scipy
DEPRECATED: kept for compatibility with old io methods'''
"""dendrogram documentation:
Output:
'color_list': A list of color names. The k?th element represents the color of the k?th link.
'icoord' and 'dcoord': Each of them is a list of lists. Let icoord = [I1, I2, ..., Ip] where Ik = [xk1, xk2, xk3, xk4] and dcoord = [D1, D2, ..., Dp] where Dk = [yk1, yk2, yk3, yk4], then the k?th link painted is (xk1, yk1) - (xk2, yk2) - (xk3, yk3) - (xk4, yk4).
'ivl': A list of labels corresponding to the leaf nodes.
'leaves': For each i, H[i] == j, cluster node j appears in position i in the left-to-right traversal of the leaves, where \(j < 2n-1\) and \(i < n\). If j is less than n, the i-th leaf node corresponds to an original observation. Otherwise, it corresponds to a non-singleton cluster."""
#parse input into col_labels and row_labels
#TODO: pandas is not needed for this.
mets_data = pd.DataFrame(data=data_I, index=row_labels_I, columns=column_labels_I)
mets_data = mets_data.dropna(how='all').fillna(0.)
#mets_data = mets_data.replace([np.inf], 10.)
#mets_data = mets_data.replace([-np.inf], -10.)
col_labels = list(mets_data.columns)
row_labels = list(mets_data.index)
#heatmap data matrix
heatmap_data = []
for i,g in enumerate(mets_data.index):
for j,c in enumerate(mets_data.columns):
#heatmap_data.append({"col": j+1, "row": i+1, "value": mets_data.ix[g][c]})
heatmap_data.append({"col": j, "row": i, "value": mets_data.ix[g][c]})
#perform the custering on the both the rows and columns
dm = mets_data
D1 = squareform(pdist(dm, metric='euclidean'))
D2 = squareform(pdist(dm.T, metric='euclidean'))
Y = linkage(D1, method='single')
Z1 = dendrogram(Y, labels=dm.index)
Y = linkage(D2, method='single')
Z2 = dendrogram(Y, labels=dm.columns)
#parse the output
hccol = Z2['leaves'] # no hclustering; same as heatmap_data['col']
hcrow = Z1['leaves'] # no hclustering; same as heatmap_data['row']
hccolicoord = Z2['icoord'] # no hclustering; same as heatmap_data['col']
hcrowicoord = Z1['icoord'] # no hclustering; same as heatmap_data['row']
hccoldcoord = Z2['dcoord'] # no hclustering; same as heatmap_data['col']
hcrowdcoord = Z1['dcoord'] # no hclustering; same as heatmap_data['row']
#hccol = [x+1 for x in hccol]; # hccol index should match heatmap_data index
#hcrow = [x+1 for x in hcrow];
return {'hcrow': hcrow, 'hccol': hccol, 'row_labels':row_labels,
'col_labels':col_labels,
'heatmap_data':heatmap_data,
'maxval' : max([x['value'] for x in heatmap_data]),
'minval' : min([x['value'] for x in heatmap_data])}
开发者ID:SBRG,项目名称:sbaas,代码行数:56,代码来源:base_calculate.py
示例19: test_correspond_4_and_up
def test_correspond_4_and_up(self):
# Tests correspond(Z, y) on linkage and CDMs over observation sets of
# different sizes. Correspondance should be false.
for (i, j) in list(zip(list(range(2, 4)), list(range(3, 5)))) + list(zip(list(range(3, 5)), list(range(2, 4)))):
y = np.random.rand(i * (i - 1) // 2)
y2 = np.random.rand(j * (j - 1) // 2)
Z = linkage(y)
Z2 = linkage(y2)
assert_equal(correspond(Z, y2), False)
assert_equal(correspond(Z2, y), False)
开发者ID:Kitchi,项目名称:scipy,代码行数:10,代码来源:test_hierarchy.py
示例20: heatmap_plot_zscore_bigneuron
def heatmap_plot_zscore_bigneuron(df_zscore_features, df_all, output_dir, title=None):
print "heatmap plot:bigneuron"
#taiwan
metric ='nt_type'
mtypes = np.unique(df_all[metric])
print mtypes
mtypes_pal = sns.color_palette("hls", len(mtypes))
mtypes_lut = dict(zip(mtypes, mtypes_pal))
mtypes_colors = df_all[metric].map(mtypes_lut)
linkage = hierarchy.linkage(df_zscore_features, method='ward', metric='euclidean')
data = df_zscore_features.transpose()
row_linkage = hierarchy.linkage(data, method='ward', metric='euclidean')
feature_order = hierarchy.leaves_list(row_linkage)
#print data.index
matchIndex = [data.index[x] for x in feature_order]
#print matchIndex
data = data.reindex(matchIndex)
pl.figure()
g = sns.clustermap(data, row_cluster = False, col_linkage=linkage, method='ward', metric='euclidean',
linewidths = 0.0,col_colors = [mtypes_colors],
cmap = sns.cubehelix_palette(light=1, as_cmap=True),figsize=(40,10))
pl.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
pl.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
#g.ax_heatmap.set_xticklabels([])
pl.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.95) # !!!!!
if title:
pl.title(title)
location ="best"
num_cols=1
# Legend for row and col colors
for label in mtypes:
g.ax_row_dendrogram.bar(0, 0, color=mtypes_lut[label], label=label, linewidth=0.0)
g.ax_row_dendrogram.legend(loc=location, ncol=num_cols,borderpad=0)
filename = output_dir + '/zscore_feature_heatmap.png'
pl.savefig(filename, dpi=300)
#pl.show()
print("save zscore matrix heatmap figure to :" + filename)
pl.close()
print "done clustering and heatmap plotting"
return linkage
开发者ID:XiaoxiaoLiu,项目名称:morphology_analysis,代码行数:55,代码来源:morph_cluster.py
注:本文中的scipy.cluster.hierarchy.linkage函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论