• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python hierarchy.linkage函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中scipy.cluster.hierarchy.linkage函数的典型用法代码示例。如果您正苦于以下问题:Python linkage函数的具体用法?Python linkage怎么用?Python linkage使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了linkage函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: random_distribution

def random_distribution(n):

    #make up some data
    data = np.random.normal(scale=n, size=(n, n))
    data[0:n / 2,0:n / 2] += 75
    data[n / 2:, n / 2:] = np.random.poisson(lam=n,size=data[n / 2:, n / 2:].shape)
    #cluster the rows
    row_dist = ssd.squareform(ssd.pdist(data))
    row_Z = sch.linkage(row_dist)
    row_idxing = sch.leaves_list(row_Z)

    row_labels = ['bar{}'.format(i) for i in range(n)]

    #cluster the columns
    col_dist = ssd.squareform(ssd.pdist(data.T))
    col_Z = sch.linkage(col_dist)
    col_idxing = sch.leaves_list(col_Z)
    #make the dendrogram

    col_labels = ['foo{}'.format(i) for i in range(n)]

    data = data[:,col_idxing][row_idxing,:]

    heatmap = pdh.DendroHeatMap(heat_map_data=data,left_dendrogram=row_Z, top_dendrogram=col_Z, heatmap_colors=("#ffeda0", "#feb24c", "#f03b20"), window_size="auto", color_legend_displayed=False, label_color="#777777")
    heatmap.row_labels = row_labels
    heatmap.col_labels = col_labels
    heatmap.title = 'An example heatmap'
    heatmap.show()#heatmap.save("example.png")
开发者ID:kaniblu,项目名称:dhm,代码行数:28,代码来源:example.py


示例2: getDistMatrixes

    def getDistMatrixes(cls, distDict, distMeasure, linkageCriterion):
        """
        Find and return the correlation matrix, linkage matrix and distance matrix for the distance/correlation
        measure given with distMeasure parameter.
        """
        from scipy.spatial.distance import squareform
        from numpy import ones, fill_diagonal
        from scipy.cluster.hierarchy import linkage

        if distMeasure == cls.CORR_PEARSON or distMeasure == cls.SIM_MCCONNAUGHEY:
            '''As these measures generate values between -1 and 1, need special handling'''

            # Cluster distances, i.e. convert correlation into distance between 0 and 1
            triangularCorrMatrix = distDict[distMeasure]
            triangularDistMatrix = ones(len(triangularCorrMatrix)) - [(x + 1) / 2 for x in triangularCorrMatrix]
            linkageMatrix = linkage(cls.removeNanDistances(triangularDistMatrix), linkageCriterion)

            # Make correlation matrix square
            correlationMatrix = squareform(triangularCorrMatrix)
            fill_diagonal(correlationMatrix, 1)
        else:

            # Cluster distances
            triangularDistMatrix = distDict[distMeasure]
            linkageMatrix = linkage(cls.removeNanDistances(triangularDistMatrix), linkageCriterion)

            # Convert triangular distances into square correlation matrix
            squareDistMatrix = squareform(triangularDistMatrix)
            squareSize = len(squareDistMatrix)
            correlationMatrix = ones((squareSize, squareSize)) - squareDistMatrix

        return correlationMatrix, linkageMatrix, triangularDistMatrix
开发者ID:johhorn,项目名称:gwas-clustering,代码行数:32,代码来源:CommonClusteringFunctions.py


示例3: draw_intensity

def draw_intensity(a, cmap=GREEN_CMAP, metric='euclidean', method='average', sort_x=True, sort_y=True):
    main_axes = plt.gca()
    divider = make_axes_locatable(main_axes)

    if sort_x is True:
        plt.sca(divider.append_axes("top", 0.5, pad=0))
        xlinkage = linkage(pdist(a.T, metric=metric), method=method, metric=metric)
        xdendro = dendrogram(xlinkage, orientation='top', no_labels=True,
                             distance_sort='descending',
                             link_color_func=lambda x: 'black')
        plt.gca().set_axis_off()
        a = a[[a.columns[i] for i in xdendro['leaves']]]

    if sort_y is True:
        plt.sca(divider.append_axes("left", 1.0, pad=0))
        ylinkage = linkage(pdist(a, metric=metric), method=method, metric=metric)
        ydendro = dendrogram(ylinkage, orientation='right', no_labels=True,
                             distance_sort='descending',
                             link_color_func=lambda x: 'black')
        plt.gca().set_axis_off()
        a = a.ix[[a.index[i] for i in ydendro['leaves']]]

    plt.sca(main_axes)
    plt.imshow(a, aspect='auto', interpolation='none',
               cmap=cmap, vmin=0.0, vmax=1.0)
    plt.colorbar(pad=0.15)
    plt.gca().yaxis.tick_right()
    plt.xticks(range(a.shape[1]), a.columns, rotation=90, size='small')
    plt.yticks(range(a.shape[0]), a.index, size='x-small')
    plt.gca().xaxis.set_ticks_position('none')
    plt.gca().yaxis.set_ticks_position('none')
    plt.gca().invert_yaxis()

    plt.show()
开发者ID:neuroinformatics,项目名称:bah2016_registration,代码行数:34,代码来源:draw_cluster.py


示例4: hierarchical_clustering

def hierarchical_clustering(data, skill,  method='single', metric='euclidean', dendrogram=True, concepts=False, cluster_number=3, corr_as_vectors=False):
    pk, level = data.get_skill_id(skill)
    items = data.get_items_df()
    skills = data.get_skills_df()
    corr = compute_corr(data, merge_skills=concepts)
    print("Corr ({}) contain total {} values and from that {} nans".format(corr.shape, corr.size, corr.isnull().sum().sum()))
    corr[corr.isnull()] = 0

    if concepts:
        items = items[items["skill_lvl_" + str(level)] == pk]
        skill_ids = items[~items["skill_lvl_3"].isnull()]["skill_lvl_3"].unique()
        corr = pd.DataFrame(corr, index=skill_ids, columns=skill_ids)
        labels = list(skills.loc[corr.index]["name"])

    else:
        items = items[items["skill_lvl_" + str(level)] == pk]
        items = items[items["visualization"] != "pairing"]
        corr = pd.DataFrame(corr, index=items.index, columns=items.index)
        labels = ["{1} - {0}".format(item["name"], item["visualization"][0]) for id, item in list(items.iterrows())]

    if corr_as_vectors:
        Z = hr.linkage(corr, method=method, metric=metric)
    else:
        Z = hr.linkage(dst.squareform(1 - corr), method=method)
    Z[Z < 0] = 0
    if dendrogram:
        plt.title('{}: method: {}, metric: {}, as vectors: {}'.format(skill, method, metric, corr_as_vectors))
        plt.xlabel('items' if not concepts else "concepts")
        plt.ylabel('distance')
        hr.dendrogram(Z, leaf_rotation=90., leaf_font_size=10., labels=labels)

    return hr.fcluster(Z, cluster_number, "maxclust")
开发者ID:thran,项目名称:experiments2.0,代码行数:32,代码来源:experiments_clustering.py


示例5: compare_clusters

def compare_clusters(args):

    ref_df = pd.read_table(args['ref'], sep='\t', skipinitialspace=True, index_col=0).as_matrix()
    check_symmetry(ref_df)
    linkage_ref = linkage(ref_df, 'average')
    c_ref, coph_dists_ref = cophenet(linkage_ref, pdist(ref_df))

    outfile = open(args['output'],"w")
    outfile.write("Tree_cluster\tMantel_Correlation_Coefficient\tManter_P-value\tCophenetic_Pearson\tCophenetic_P-value\n")

    for i in args['all']:
        fst_df = pd.read_table(i, sep='\t', skipinitialspace=True, index_col=0).as_matrix()
        check_symmetry(fst_df)
        mantel_coeff = 0.0
        p_value_mantel = 0.0
        cophenetic_pearson = 0.0
        p_value_cophenetic = 0.0
        n = 0
        try:
            mantel_coeff, p_value_mantel, n = mantel(ref_df, fst_df)
            linkage_fst = linkage(fst_df, 'average')
            c_fst, coph_dists_fst = cophenet(linkage_fst, pdist(fst_df))
            cophenetic_pearson, p_value_cophenetic = pearsonr(coph_dists_ref, coph_dists_fst)
        except Exception as e:
            print("Error : %s" % str(e))
            mantel_coeff = "Failed"
            p_value_manel = "Failed"
            cophenetic_pearson = "Failed"
            p_value_cophenetic = "Failed"

        outfile.write(i+"\t"+str(mantel_coeff)+"\t"+str(p_value_mantel)+"\t"+str(cophenetic_pearson)+"\t"+str(p_value_cophenetic)+"\n")

    outfile.close()
开发者ID:zorino,项目名称:ray,代码行数:33,代码来源:treeclust-compare.py


示例6: cluster_fps

    def cluster_fps(self):
        clkg = hcluster.linkage(self.dm,method = 'average') 
        coarse_r = hcluster.fcluster(clkg,0.3,criterion = 'distance')
        self.coarse_r = coarse_r

        bcount = np.bincount(coarse_r)
        knum = len(np.nonzero(bcount > 1)[0])

        s = self.density_matrix.shape
        if False and len(s) >1 and s[0] > 10 and s[1] > 10 and knum < min(s) / 2:
            (u,s,vt) = la.svds(self.sps_matrixs,k = knum)
            self.u = u
            print '============'
        else:
            
            self.result = self.coarse_r
            return (clkg,clkg)
 

#rankA = npla.matrix_rank(self.sps_matrixs)
#        if rankA < 3:
        a = np.matrix(np.diag(s)) * np.matrix(vt)
        pd = dist.pdist(np.array(a.T),'cosine')
        pd[np.abs(pd) < 1e-11] = 0
        lkg = hcluster.linkage(pd,method = 'average')
        self.lkg = lkg

        self.result = hcluster.fcluster(lkg,self.svd_cluster_thr,criterion = 'distance')

#        self.result = hcluster.fcluster(lkg,1)

# self.result = hcluster.fclusterdata(u,0.7,metric = 'cosine', criterion = 'distance',method = 'average')
        return (lkg,clkg)
开发者ID:lrpopeyou,项目名称:cluster_fp_user,代码行数:33,代码来源:cluster_by_grid.py


示例7: main

def main():
    D = 2 # so we can visualize it more easily
    s = 4 # separation so we can control how far apart the means are
    mu1 = np.array([0, 0])
    mu2 = np.array([s, s])
    mu3 = np.array([0, s])

    N = 900 # number of samples
    X = np.zeros((N, D))
    X[:300, :] = np.random.randn(300, D) + mu1
    X[300:600, :] = np.random.randn(300, D) + mu2
    X[600:, :] = np.random.randn(300, D) + mu3

    Z = linkage(X, 'ward')
    print "Z.shape:", Z.shape
    # Z has the format [idx1, idx2, dist, sample_count]
    # therefore, its size will be (N-1, 4)
    plt.title("Ward")
    dendrogram(Z)
    plt.show()

    Z = linkage(X, 'single')
    plt.title("Single")
    dendrogram(Z)
    plt.show()

    Z = linkage(X, 'complete')
    plt.title("Complete")
    dendrogram(Z)
    plt.show()
开发者ID:AndreyDrv,项目名称:machine_learning_examples,代码行数:30,代码来源:hcluster.py


示例8: HierarchicalCluster

def HierarchicalCluster(A):
    #see http://stackoverflow.com/questions/2982929/plotting-results-of-hierarchical-clustering-ontop-of-a-matrix-of-data-in-python
    Corr = np.corrcoef(A.T)
    fig = plt.figure(figsize=(8,8))
    ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
    Y = hrc.linkage(Corr, method='centroid')
    Z1 = hrc.dendrogram(Y, orientation='right')
    ax1.set_xticks([])
    ax1.set_yticks([])

    ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
    Y = hrc.linkage(Corr, method='centroid')
    Z2 = hrc.dendrogram(Y)
    ax2.set_xticks([])
    ax2.set_yticks([])

    axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
    idx1 = Z1['leaves']
    idx2 = Z2['leaves']
    Corr = Corr[idx1, :]
    Corr = Corr[:, idx2]
    im = axmatrix.matshow(Corr, aspect='auto', origin='lower')

    axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
    pylab.colorbar(im, cax=axcolor)
    fig.show()
    fig.savefig('dendrogram.png')
开发者ID:izkula,项目名称:matrix-viz,代码行数:27,代码来源:npz_to_csv.py


示例9: hcluster_cols

	def hcluster_cols(self, thresh):
		try:
			link = linkage(self.X.T, method='complete', metric = 'cosine')
			assignments = fcluster(link, thresh, 'distance')

		except:
			link = linkage(self.X.T, method='complete', metric = 'euclidean')
			assignments = fcluster(link, thresh, 'distance')

		col_ind = np.arange(len(self.crimes))
		d = pd.DataFrame(zip(col_ind, assignments)).groupby(1)[0].aggregate(lambda x: tuple(x))
		df_new = pd.DataFrame(index = np.arange(len(self.names)))
		for i in d:
			cols = []
			for w in i:
			    cols.append(w)
			if len(cols) > 1:
				df_new[str(self.crimes[cols])] = np.mean(self.X[:,cols], axis = 1)
			else:
			    df_new[str(self.crimes[cols[0]])] = self.X[:,cols[0]]

		# plt.figure(figsize=(10,20))
		# dendro = dendrogram(link, color_threshold=thresh, leaf_font_size=13, labels = self.crimes, orientation = 'left')
		# plt.subplots_adjust(top=.99, bottom=0.5, left=0.05, right=0.99)
		# plt.show()

		self.df = df_new
		self.crimes = df_new.columns.values
开发者ID:nhu2000,项目名称:hood_project,代码行数:28,代码来源:pca_class.py


示例10: starthcc

 def starthcc(self):
     print self.dm,self.lin
     dataFrame = pd.DataFrame(self.tr, columns=['x', 'y'])
     from scipy.spatial.distance import pdist, squareform
     
     # not printed as pretty, but the values are correct
     distxy = squareform(pdist(dataFrame, metric=(self.dm)))
     #print distxy
     if self.lin=="single":
         plt.figure()
         R = dendrogram(linkage(distxy, method=str(self.lin)))
         
         plt.xlabel('X units')
         plt.ylabel('Y units')
         plt.suptitle('Cluster Dendrogram', fontweight='bold', fontsize=14);
        
         plt.show()
     elif self.lin=="complete":
         plt.figure()
         R = dendrogram(linkage(distxy, method=str(self.lin)))
         
         plt.xlabel('X units')
         plt.ylabel('Y units')
         plt.suptitle('Cluster Dendrogram', fontweight='bold', fontsize=14);
        
         plt.show()
     else:
         plt.figure()
         R = dendrogram(linkage(distxy, method=str(self.lin)))
         
         plt.xlabel('X units')
         plt.ylabel('Y units')
         plt.suptitle('Cluster Dendrogram', fontweight='bold', fontsize=14);
        
         plt.show()
开发者ID:vishnumani2009,项目名称:OpenSource-Open-Ended-Statistical-toolkit,代码行数:35,代码来源:heirarfrontend.py


示例11: save_mat

def save_mat(c2map, filepath):
	mat = c2map['mat']
	fig = pylab.figure(figsize=(8,8))
	
	# Compute and plot first dendrogram.
	ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
	Y = sch.linkage(mat, method='centroid')
	Z1 = sch.dendrogram(Y, orientation='right')
	ax1.set_xticks([])
	ax1.set_yticks([])

	# Compute and plot second dendrogram.
	ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
	Y = sch.linkage(mat, method='single')
	Z2 = sch.dendrogram(Y)
	ax2.set_xticks([])
	ax2.set_yticks([])

	# Plot distance matrix.
	axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
	idx1 = Z1['leaves']
	idx2 = Z2['leaves']
	mat = mat[idx1,:]
	mat = mat[:,idx2]
	im = axmatrix.matshow(mat, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu)
	axmatrix.set_xticks([])
	axmatrix.set_yticks([])

	# Plot colorbar.
	axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
	pylab.colorbar(im, cax=axcolor)

	fig.savefig(filepath)
开发者ID:andpromobile,项目名称:csipb-jamu-prj,代码行数:33,代码来源:c2map.py


示例12: plot_transition_clustermap

def plot_transition_clustermap(data_array, gene_names, pseudotimes, n_clusters=10, gradient=False):
    if gradient:
        data_to_plot = zscore(np.gradient(data_array)[1].T, axis=0)
        scale = None
        metric = 'seuclidean'
        row_linkage = linkage(pdist(abs(data_to_plot), metric=metric), method='complete')
    else:
        data_to_plot = data_array.T
        scale = 0
        metric = 'correlation'
        row_linkage = linkage(pdist(data_to_plot, metric=metric), method='complete')
    
    assignments = fcluster(row_linkage, n_clusters, criterion='maxclust')
    cm = sns.clustermap(data_to_plot, col_cluster=False, standard_scale=scale, 
                        yticklabels=gene_names, row_linkage=row_linkage,
                        row_colors=[settings.STATE_COLORS[i] for i in assignments])
    r = np.arange(10, data_array.shape[0], data_array.shape[0]/10)
    plt.setp(cm.ax_heatmap.get_yticklabels(), fontsize=5)
    cm.ax_heatmap.set_xticks(r)
    cm.ax_heatmap.set_xticklabels(['%.1f' % x for x in pseudotimes[r]])
    cm.ax_heatmap.set_xlabel('Pseudotime')
    cm.ax_heatmap.set_ylabel('Gene')
    
    gene_clusters = defaultdict(list)
    for i, cl in enumerate(assignments):
        gene_clusters[settings.STATE_COLORS[cl]].append(gene_names[i])
    return gene_clusters
开发者ID:dimenwarper,项目名称:scimitar,代码行数:27,代码来源:plotting.py


示例13: check_linkage_q

    def check_linkage_q(self, method):
        # Tests linkage(Y, method) on the Q data set.
        Z = linkage(hierarchy_test_data.X, method)
        expectedZ = getattr(hierarchy_test_data, "linkage_X_" + method)
        assert_allclose(Z, expectedZ, atol=1e-06)

        y = scipy.spatial.distance.pdist(hierarchy_test_data.X, metric="euclidean")
        Z = linkage(y, method)
        assert_allclose(Z, expectedZ, atol=1e-06)
开发者ID:metamorph-inc,项目名称:meta-core,代码行数:9,代码来源:test_hierarchy.py


示例14: _cluster_idx

def _cluster_idx(df):
    """ sort indices by clusters """
    dcol = pdist(df.T)
    drow = pdist(df)
    lcol = linkage(dcol)
    lrow = linkage(drow)
    cols = dendrogram(lcol, no_plot=True)['leaves']
    rows = dendrogram(lrow, no_plot=True)['leaves']
    return rows,cols
开发者ID:jdrudolph,项目名称:mypy,代码行数:9,代码来源:pdplot.py


示例15: plot_clustered_heatmap

def plot_clustered_heatmap(df, genes_list, cancer, output_path, scale='binary'):
    # Build nxm matrix (n samples, m genes)
    X = df[genes_list].as_matrix().transpose()
    
    if scale == 'binary':
        Z = linkage(X, method='complete', metric='hamming')
        colorscale = [[0, "rgb(111, 168, 220)"], [1, "rgb(5, 10, 172)"]]
        colorbar = {'tick0': 0,'dtick': 1}
    elif scale == 'logarithmic':
        Z = linkage(X, method='ward')
        X_max = X.max()
        colorscale = [[0, 'rgb(250, 250, 250)'],
                      [1./X_max, 'rgb(200, 200, 200)'],
                      [5./X_max, 'rgb(150, 150, 200)'],
                      [20./X_max, 'rgb(100, 100, 200)'],
                      [100./X_max, 'rgb(50, 50, 200)'],
                      [1., 'rgb(0, 0, 200)']]
        colorbar = {'tick0': 0,
                    'tickmode': 'array',
                    'tickvals': [0, 1, 5, 20, 100, X_max]}
    c, coph_dists = cophenet(Z, pdist(X))
    print "Cophenetic Correlation Coefficient:", c
    
    #layout = go.Layout(yaxis=dict(title='%s germline mutations (ordered by samples somatic mutation load)'% cancer, zeroline=False))    
#    fig = pylab.figure(figsize=(8,8))
#    ax1 = fig.add_axes([0.09,0.1,0.2,0.6])
#    ax1.set_xticks([])
#    ax1.set_yticks([])
#    axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
    den = dendrogram(Z, orientation='left')
    idx = den['leaves']
    X = X[idx,:]
    print "X shape:", X.shape
    genes_ordered = [genes_list[i] for i in idx]
    logger.info("ordered genes: %s", str(genes_ordered))
    
#    im = axmatrix.matshow(X, aspect='auto', origin='lower', cmap=pylab.cm.YlGnBu)
#    axmatrix.set_xticks([])
#    axmatrix.set_yticks([])
#    # Plot colorbar.
#    axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
#    pylab.colorbar(im, cax=axcolor)
#    fig.savefig(output_path)
    
    # Plotting the heatmap (without the hirarchy)
    heatmap_trace = go.Heatmap(z=X.tolist(), x=df.patient_id, y=genes_ordered, showscale=True, colorscale=colorscale, colorbar=colorbar)
    mutation_load_trace = go.Bar(x=df.patient_id, y=df.somatic_mutations_count/30.0)
    fig = tls.make_subplots(rows=29, cols=1, specs=[[{'rowspan':5, 'colspan' : 1}]] + [[None]] * 4 + [[{'rowspan' : 24, 'colspan' : 1}]] + [[None]] * 23)
    fig.append_trace(mutation_load_trace, 1, 1)
    fig.append_trace(heatmap_trace, 6, 1)
    fig['layout']['xaxis1'].update(showticklabels = False)
    fig['layout']['xaxis1'].update(zeroline = False, showgrid=False)
    fig['layout']['yaxis1'].update(zeroline = False, showgrid = False, tickfont=dict(family='Arial', size=4))
    fig['layout']['xaxis2'].update(showticklabels = False)
    fig['layout']['xaxis2'].update(zeroline = False, showgrid=False)
    fig['layout']['yaxis2'].update(zeroline = False, showgrid = False, tickfont=dict(family='Arial', size=4))
    plot(fig, auto_open=False, filename="%s_%s_heatmap_clustered.html" % (output_path, cancer))
开发者ID:galynz,项目名称:bio_project,代码行数:57,代码来源:plot_mutations_heatmap.py


示例16: refineEnsemble

def refineEnsemble(ens, lower=.5, upper=10.):
    """Refine a PDB ensemble based on RMSD criterions.""" 

    from scipy.cluster.hierarchy import linkage, fcluster
    from scipy.spatial.distance import squareform
    from collections import Counter

    ### calculate pairwise RMSDs ###
    RMSD = ens.getRMSDs(pairwise=True)

    # convert the RMSD table to the compressed form
    v = squareform(RMSD)

    ### apply upper threshold ###
    Z_upper = linkage(v, method='complete')
    labels = fcluster(Z_upper, upper, criterion='distance')
    most_common_label = Counter(labels).most_common(1)[0][0]
    I = np.where(labels==most_common_label)[0]

    ### apply lower threshold ###
    Z_lower = linkage(v, method='single')
    labels = fcluster(Z_lower, lower, criterion='distance')
    uniq_labels = np.unique(labels)

    clusters = []
    for label in uniq_labels:
        indices = np.where(labels==label)[0]
        clusters.append(indices)

    J = np.ones(len(clusters), dtype=int) * -1
    rmsd = None
    for i, cluster in enumerate(clusters):
        if len(cluster) > 0:
            # find the conformations with the largest coverage 
            # (the weight of the ref should be 1)
            weights = [ens[j].getWeights().sum() for j in cluster]
            js = np.where(weights==np.max(weights))[0]

            # in the case where there are multiple structures with the same weight,
            # the one with the smallest rmsd wrt the ens._coords is selected. 
            if len(js) > 1:
                # rmsd is not calulated unless necessary for the sake of efficiency
                rmsd = ens.getRMSDs() if rmsd is None else rmsd
                j = js[np.argmin(rmsd[js])]
            else:
                j = js[0]
            J[i] = cluster[j]
        else:
            J[i] = cluster[0]

    ### refine ensemble ###
    K = np.intersect1d(I, J)

    reens = ens[K]

    return reens
开发者ID:fongchun,项目名称:ProDy,代码行数:56,代码来源:legacy.py


示例17: clusterData

def clusterData(xdata, rowMethod=True, columnMethod=False, method='average', metric='euclidean'):
    """clusterData clusters the data either by row, by column, or both

    :param xdata: a data dictionary - the one to be transformed
    :type x: dict, must contain 'data', 'proteins', 'fractions'
    :param rowMethod: a boolean asking if you want to flip on the rows (proteins get clustered)
    :type rowMethod: bool
    :param columnMethod: a boolean asking if you want to flip on the columns (fractions get clustered)
    :type columnMethod: bool
    :param method: string defining the linkage type, defaults to 'average' - 'ward' might be a good option
    :type method: string
    :param metric: string defining the distance metric, defaults to 'euclidean'
    :type metric: string
    :returns:  a data ditionary. 'data', 'proteins', 'fractions', 'fi', 'pi', 'topDendro', 'rightDendro' are updated

    """
        
    xdat = xdata.copy()
    x = xdat['data']
    ind1 = xdat['proteins']
    ind2 = xdat['fractions']
    xt = x
    idx1 = None
    idx2 = None
    
    toReturn = xdat
    Y1 = None
    Y2 = None
    if rowMethod:
        d1 = ssd.pdist(x)
        D1 = ssd.squareform(d1)  # full matrix
        Y1 = sch.linkage(D1, method=method, metric=metric) ### gene-clustering metric - 'average', 'single', 'centroid', 'complete'
        Z1 = sch.dendrogram(Y1, no_plot=True, orientation='right')
        idx1 = Z1['leaves'] ### apply the clustering for the gene-dendrograms to the actual matrix data
        xt = xt[idx1,:]   # xt is transformed x
        newIndex = []
        for i in idx1:
            newIndex.append(ind1[i])
        toReturn['proteins'] = newIndex
        toReturn['pi'] = idx1
    if columnMethod:
        d2 = ssd.pdist(x.T)
        D2 = ssd.squareform(d2)
        Y2 = sch.linkage(D2, method=method, metric=metric) ### array-clustering metric - 'average', 'single', 'centroid', 'complete'
        Z2 = sch.dendrogram(Y2, no_plot=True)
        idx2 = Z2['leaves'] ### apply the clustering for the array-dendrograms to the actual matrix data
        xt = xt[:,idx2]
        newIndex = []
        for i in idx2:
            newIndex.append(ind2[i])
        toReturn['fractions'] = newIndex
        toReturn['fi'] = idx2
    toReturn['data'] = xt
    toReturn['topDendro'] = Y2
    toReturn['rightDendro'] = Y1
    return toReturn
开发者ID:joeydavis,项目名称:clusteringModule,代码行数:56,代码来源:clusteringModule.py


示例18: heatmap_v1

    def heatmap_v1(self,data_I,row_labels_I,column_labels_I):
        '''Generate a heatmap using pandas and scipy
        DEPRECATED: kept for compatibility with old io methods'''

        """dendrogram documentation:

        Output:
        'color_list': A list of color names. The k?th element represents the color of the k?th link.
        'icoord' and 'dcoord':  Each of them is a list of lists. Let icoord = [I1, I2, ..., Ip] where Ik = [xk1, xk2, xk3, xk4] and dcoord = [D1, D2, ..., Dp] where Dk = [yk1, yk2, yk3, yk4], then the k?th link painted is (xk1, yk1) - (xk2, yk2) - (xk3, yk3) - (xk4, yk4).
        'ivl':  A list of labels corresponding to the leaf nodes.
        'leaves': For each i, H[i] == j, cluster node j appears in position i in the left-to-right traversal of the leaves, where \(j < 2n-1\) and \(i < n\). If j is less than n, the i-th leaf node corresponds to an original observation. Otherwise, it corresponds to a non-singleton cluster."""

        #parse input into col_labels and row_labels
        #TODO: pandas is not needed for this.
        mets_data = pd.DataFrame(data=data_I, index=row_labels_I, columns=column_labels_I)

        mets_data = mets_data.dropna(how='all').fillna(0.)
        #mets_data = mets_data.replace([np.inf], 10.)
        #mets_data = mets_data.replace([-np.inf], -10.)
        col_labels = list(mets_data.columns)
        row_labels = list(mets_data.index)

        #heatmap data matrix
        heatmap_data = []
        for i,g in enumerate(mets_data.index):
            for j,c in enumerate(mets_data.columns):
                #heatmap_data.append({"col": j+1, "row": i+1, "value": mets_data.ix[g][c]})
                heatmap_data.append({"col": j, "row": i, "value": mets_data.ix[g][c]})

        #perform the custering on the both the rows and columns
        dm = mets_data
        D1 = squareform(pdist(dm, metric='euclidean'))
        D2 = squareform(pdist(dm.T, metric='euclidean'))

        Y = linkage(D1, method='single')
        Z1 = dendrogram(Y, labels=dm.index)

        Y = linkage(D2, method='single')
        Z2 = dendrogram(Y, labels=dm.columns)

        #parse the output
        hccol = Z2['leaves'] # no hclustering; same as heatmap_data['col']
        hcrow = Z1['leaves'] # no hclustering; same as heatmap_data['row']
        hccolicoord = Z2['icoord'] # no hclustering; same as heatmap_data['col']
        hcrowicoord = Z1['icoord'] # no hclustering; same as heatmap_data['row']
        hccoldcoord = Z2['dcoord'] # no hclustering; same as heatmap_data['col']
        hcrowdcoord = Z1['dcoord'] # no hclustering; same as heatmap_data['row']
        
        #hccol = [x+1 for x in hccol]; # hccol index should match heatmap_data index
        #hcrow = [x+1 for x in hcrow];

        return {'hcrow': hcrow, 'hccol': hccol, 'row_labels':row_labels,
                                            'col_labels':col_labels,
                                            'heatmap_data':heatmap_data,
                                            'maxval' : max([x['value'] for x in heatmap_data]),
                                            'minval' : min([x['value'] for x in heatmap_data])}
开发者ID:SBRG,项目名称:sbaas,代码行数:56,代码来源:base_calculate.py


示例19: test_correspond_4_and_up

 def test_correspond_4_and_up(self):
     # Tests correspond(Z, y) on linkage and CDMs over observation sets of
     # different sizes. Correspondance should be false.
     for (i, j) in list(zip(list(range(2, 4)), list(range(3, 5)))) + list(zip(list(range(3, 5)), list(range(2, 4)))):
         y = np.random.rand(i * (i - 1) // 2)
         y2 = np.random.rand(j * (j - 1) // 2)
         Z = linkage(y)
         Z2 = linkage(y2)
         assert_equal(correspond(Z, y2), False)
         assert_equal(correspond(Z2, y), False)
开发者ID:Kitchi,项目名称:scipy,代码行数:10,代码来源:test_hierarchy.py


示例20: heatmap_plot_zscore_bigneuron

def heatmap_plot_zscore_bigneuron(df_zscore_features, df_all, output_dir, title=None):

    print "heatmap plot:bigneuron"

    #taiwan
    metric ='nt_type'
    mtypes = np.unique(df_all[metric])
    print mtypes
    mtypes_pal = sns.color_palette("hls", len(mtypes))

    mtypes_lut = dict(zip(mtypes, mtypes_pal))
    mtypes_colors = df_all[metric].map(mtypes_lut)



    linkage = hierarchy.linkage(df_zscore_features, method='ward', metric='euclidean')

    data = df_zscore_features.transpose()
    row_linkage = hierarchy.linkage(data, method='ward', metric='euclidean')
    feature_order = hierarchy.leaves_list(row_linkage)

    #print data.index
    matchIndex = [data.index[x] for x in feature_order]
    #print matchIndex
    data = data.reindex(matchIndex)

    pl.figure()
    g = sns.clustermap(data, row_cluster = False, col_linkage=linkage, method='ward', metric='euclidean',
                       linewidths = 0.0,col_colors = [mtypes_colors],
                       cmap = sns.cubehelix_palette(light=1, as_cmap=True),figsize=(40,10))

    pl.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    pl.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
    #g.ax_heatmap.set_xticklabels([])
    pl.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.95)  # !!!!!

    if title:
        pl.title(title)


    location ="best"
    num_cols=1
    # Legend for row and col colors

    for label in mtypes:
         g.ax_row_dendrogram.bar(0, 0, color=mtypes_lut[label], label=label, linewidth=0.0)
         g.ax_row_dendrogram.legend(loc=location, ncol=num_cols,borderpad=0)

    filename = output_dir + '/zscore_feature_heatmap.png'
    pl.savefig(filename, dpi=300)
    #pl.show()
    print("save zscore matrix heatmap figure to :" + filename)
    pl.close()
    print "done clustering and heatmap plotting"
    return linkage
开发者ID:XiaoxiaoLiu,项目名称:morphology_analysis,代码行数:55,代码来源:morph_cluster.py



注:本文中的scipy.cluster.hierarchy.linkage函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python hierarchy.maxdists函数代码示例发布时间:2022-05-27
下一篇:
Python hierarchy.leaves_list函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap