• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python hierarchy.ward函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中scipy.cluster.hierarchy.ward函数的典型用法代码示例。如果您正苦于以下问题:Python ward函数的具体用法?Python ward怎么用?Python ward使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了ward函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: demoFourGs

def demoFourGs():
    '''
    Demonstrate the performance of LCC
    on points drawn from a four gaussians
    '''           
    s=(640,480)
    dat = genNormalClusters(N=100, size=s)
    cList = ['red', 'blue','green','yellow']
    img_truth = plotClusts(dat[0], dat[1], size=s, 
                           colors=[cList[i] for i in dat[1]], window=None)
    
    #generate normal hierarchical clustering off euclidean data points
    print "Generating Hierarchical Clustering on Raw Data"
    Z2 = spc.ward(scipy.array(dat[0]))
    clusts2 = spc.fcluster(Z2, 4, criterion="maxclust")
    img_HC = plotClusts(dat[0], clusts2, size=s, 
                           colors=[cList[i-1] for i in clusts2], window=None)
    
    #generate LCC clustering
    print "Generating LCC Clustering"
    (clusts, _,_,_) = pf.LatentConfigurationClustering(dat[0], pt_dist, 4, numtrees=27)
    img_LCC = plotClusts(dat[0], clusts, size=s, 
                           colors=[cList[i-1] for i in clusts], window=None)
    
    im = pv.ImageMontage([img_truth, img_LCC, img_HC], layout=(1,3), gutter=3,
                          tileSize=(320,240), labels=None )
    im.show(window="Truth vs. LCC vs. HC")
开发者ID:Sciumo,项目名称:ProximityForest,代码行数:27,代码来源:LatentConfigurationClustering_Demo.py


示例2: test_scikit_vs_scipy

def test_scikit_vs_scipy():
    """Test scikit ward with full connectivity (i.e. unstructured) vs scipy
    """
    from scipy.sparse import lil_matrix

    n, p, k = 10, 5, 3
    rnd = np.random.RandomState(0)

    connectivity = lil_matrix(np.ones((n, n)))
    for i in range(5):
        X = 0.1 * rnd.normal(size=(n, p))
        X -= 4 * np.arange(n)[:, np.newaxis]
        X -= X.mean(axis=1)[:, np.newaxis]

        out = hierarchy.ward(X)

        children_ = out[:, :2].astype(np.int)
        children, _, n_leaves, _ = ward_tree(X, connectivity)

        cut = _hc_cut(k, children, n_leaves)
        cut_ = _hc_cut(k, children_, n_leaves)
        assess_same_labelling(cut, cut_)

    # Test error management in _hc_cut
    assert_raises(ValueError, _hc_cut, n_leaves + 1, children, n_leaves)
开发者ID:VirgileFritsch,项目名称:scikit-learn,代码行数:25,代码来源:test_hierarchical.py


示例3: make_tree

def make_tree(X, C, method='single'):
    if method == 'single':
        tree = to_tree(single(C))
    elif method == 'ward':
        tree = to_tree(ward(X))
    elif method == 'average':
        tree = to_tree(average(C))
    return Tree(root=construct_node(tree))
开发者ID:sharadmv,项目名称:trees,代码行数:8,代码来源:agglomerative.py


示例4: plotHierarchichalClusterGraph

def plotHierarchichalClusterGraph(tf_idf_matrix, headlines_utf):
    dist = 1 - cosine_similarity(tf_idf_matrix)
    linkage_matrix = ward(dist)
    fig, ax = plt.subplots(figsize=(15, 20)) # set size
    dendrogram(linkage_matrix, orientation="right", labels=headlines_utf);

    plt.tick_params(axis= 'x', which='both', bottom='off', top='off', labelbottom='off')
    plt.tight_layout()
    plt.savefig('../plots/hierachichal_clusters.png', dpi=200) 
开发者ID:rubyagarwal,项目名称:NewsClustering,代码行数:9,代码来源:clusterInfoProcessor.py


示例5: setUp

    def setUp(self):
        np.random.seed(0)
        x = np.random.rand(10)
        dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x-y))
        lm = ward(dm.condensed_form())
        ids = np.arange(len(x)).astype(np.str)
        self.tree = TreeNode.from_linkage_matrix(lm, ids)

        # initialize tree with branch length and named internal nodes
        for i, n in enumerate(self.tree.postorder(include_self=True)):
            n.length = 1
            if not n.is_tip():
                n.name = "y%d" % i
开发者ID:biocore,项目名称:gneiss,代码行数:13,代码来源:test_dendrogram.py


示例6: hierarchyCluster

def hierarchyCluster(dist,titles):
    linkage_matrix = ward(dist) #define the linkage_matrix using ward clustering pre-computed distances
    fig, ax = plt.subplots(figsize=(15, 20)) # set size
    ax = dendrogram(linkage_matrix, orientation="right", labels=titles);

    plt.tick_params(\
        axis= 'x',          # changes apply to the x-axis
        which='major',      # both major and minor ticks are affected
        bottom='off',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelbottom='on')

    plt.tight_layout() #show plot with tight layout
    plt.show()
开发者ID:tuling56,项目名称:Python,代码行数:14,代码来源:mtextcluster_fun.py


示例7: _ward_cluster

def _ward_cluster(X):
    """Clusters 1-corr using Ward distance

    Parameters
    ----------
    X
    Returns
    -------
    """
    # pairwise (1-corr) of zscores
    D = pdist( X, metric="correlation" )

    # return top branch split using ward linkage
    return fcluster( ward(D), 2, criterion="maxclust" )
开发者ID:jknox13,项目名称:cortical_paper,代码行数:14,代码来源:clustering.py


示例8: hierachical_clustering

    def hierachical_clustering(self):
        linkage_matrix = ward(self.__dist_matrix) #define the linkage_matrix using ward clustering pre-computed distances

        fig, ax = plt.subplots(figsize=(15, 9)) # set size
        ax = dendrogram(linkage_matrix, orientation="right", labels=titles);

        plt.tick_params(\
            axis= 'x',          # changes apply to the x-axis
            which='both',      # both major and minor ticks are affected
            bottom='off',      # ticks along the bottom edge are off
            top='off',         # ticks along the top edge are off
            labelbottom='off')

        fig.set_tight_layout(True) #show plot with tight layout
        plt.show()
开发者ID:adisorn711,项目名称:comp6237cw2,代码行数:15,代码来源:AJTokenizer.py


示例9: test_cache_ntips

    def test_cache_ntips(self):
        dm = DistanceMatrix.from_iterable([0, 1, 2, 3],
                                          lambda x, y: np.abs(x-y))
        lm = ward(dm.condensed_form())
        ids = np.arange(4).astype(np.str)
        t = mock.from_linkage_matrix(lm, ids)

        t._cache_ntips()

        self.assertEquals(t.leafcount, 4)
        self.assertEquals(t.children[0].leafcount, 2)
        self.assertEquals(t.children[1].leafcount, 2)
        self.assertEquals(t.children[0].children[0].leafcount, 1)
        self.assertEquals(t.children[0].children[1].leafcount, 1)
        self.assertEquals(t.children[1].children[0].leafcount, 1)
        self.assertEquals(t.children[1].children[1].leafcount, 1)
开发者ID:biocore,项目名称:gneiss,代码行数:16,代码来源:test_dendrogram.py


示例10: knn

def knn(df, axis=None, labels=None):
    dist = 1 - cosine_similarity(df.values)
    # define the linkage_matrix using ward clustering pre-computed distances
    linkage_matrix = ward(dist)

    fig, ax = plt.subplots(figsize=(15, 20))  # set size
    ax = dendrogram(linkage_matrix, orientation="right", labels=labels)

    plt.tick_params(
        axis='x',          # changes apply to the x-axis
        which='both',      # both major and minor ticks are affected
        bottom='off',      # ticks along the bottom edge are off
        top='off',         # ticks along the top edge are off
        labelbottom='off')

    plt.tight_layout()
开发者ID:heggy231,项目名称:social-deprivation,代码行数:16,代码来源:dataAnalysis.py


示例11: create_hierarchy

    def create_hierarchy(self, sim_matrix):
        linkage_matrix = ward(sim_matrix)
        fig, ax = plt.subplots(figsize=(15, 20)) # set size
        ax = dendrogram(linkage_matrix, orientation="right", labels=self.titles);

        plt.tick_params(\
            axis= 'x',          # changes apply to the x-axis
            which='both',      # both major and minor ticks are affected
            bottom='off',      # ticks along the bottom edge are off
            top='off',         # ticks along the top edge are off
            labelbottom='off')

        plt.tight_layout() #show plot with tight layout

        #uncomment below to save figure
        plt.savefig('ward_clusters.png', dpi=200) #save figure as ward_clusters
        return
开发者ID:MoizRauf,项目名称:OQuant_Wiki_Clustering,代码行数:17,代码来源:ClusteringAlgo.py


示例12: lsa_dendrogram

def lsa_dendrogram(lessonpath):
    # document-term matrix and document indices
    dtm, docindex, lessonname = dtm_matrix(lessonpath)

    # reconstructed dtm matrix using LSA and a reduced subspace of dimension 3
    dtm2 = LSA_dtm(dtm, 3)

    # distance metric based on cosine similarity
    dist = 1 - cosine_similarity(dtm)

    dist = np.round(dist, 10)

    # linkage matrix
    linkage_matrix = ward(dist)

    # dendrogram
    show(dendrogram(linkage_matrix, orientation="right", labels=docindex))
开发者ID:dizcology,项目名称:cogitatio_2,代码行数:17,代码来源:LSA_code.py


示例13: find_clusters

 def find_clusters(self, features):
     ''' Returns the clusters and their centroids.'''
     # 1. Cluster the data.
     totalClusters = int(round(features.shape[0] / 2))
     distance = 1 - pairwise_distances(features, metric = "cosine")
     # Ward minimizes the sum of squared differences within all clusters.
     # It is a variance-minimizing approach, which is similar to the k-means objective function.
     linkage_matrix = ward(distance)
     clusters = fcluster(linkage_matrix, totalClusters, criterion = 'maxclust')
     print "Number of clusters:", totalClusters
     
     # 2. Find the centroid for each cluster.
     centroid = np.empty([totalClusters, features.shape[1]])
     for i in range(1, totalClusters + 1):
         nCluster = np.where(clusters == i)
         centroid[i-1,:] = np.mean(features[nCluster], axis = 0)
     return (clusters, centroid)
开发者ID:yxy-github,项目名称:Twitter,代码行数:17,代码来源:twitterAlgorithms.py


示例14: get_clusters

	def get_clusters(self, data, features=None, text_features=[], n_clusters=8, centroid_features=10, random_seeds=True, 
		weights=[]):

		"""
		Applies Agglomerative hierarchial clustering using Ward's linkage

		Parameters
		----------
		data : Pandas DataFrame
			Data on which on apply clustering 
		features : list, optional, default : all columns used as features
			Subset of columns in the data frame to be used as features
		text_features : list, optional, default : None
			List of features that are of type text. These are then vectorizer using 
			TfidfVectorizer.
		n_clusters : int, optional, default: 8
			The number of clusters to form as well as the number of centroids to generate.
		centroid_features : int, optional, default: 10
			The number of most-important-features to return against each cluster centroid
		random_seeds : boolean, optional, default: False
			If False, uses clusters from kernel density estimation followed by thresholding
			as initial seeds. The number of clusters is also determined by results of kde and
			thus n_clusters parameter is ignored. 

		Returns
		-------
		result : tuple (labels, centroid_features)
			labels : 
				cluster numbers against each row of the data passed
			centroids : dictionary
				map of most important features of each cluster 
		"""

		X = self.encode_features(data, features, text_features)

		ipshell()

		dist = 1 - cosine_similarity(X)

		self.linkage_matrix = ward(dist)

		return (km.labels_, centroids)
开发者ID:fahadsultan,项目名称:datalib,代码行数:42,代码来源:ward.py


示例15: setUp

    def setUp(self):
        np.random.seed(0)
        self.table = pd.DataFrame(np.random.random((5, 5)),
                                  index=['0', '1', '2', '3', '4'],
                                  columns=['0', '1', '2', '3', '4'])

        num_otus = 5  # otus
        x = np.random.rand(num_otus)
        dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x-y))
        lm = ward(dm.condensed_form())
        t = TreeNode.from_linkage_matrix(lm, np.arange(len(x)).astype(np.str))
        self.t = SquareDendrogram.from_tree(t)
        self.md = pd.Series(['a', 'a', 'a', 'b', 'b'],
                            index=['0', '1', '2', '3', '4'])
        for i, n in enumerate(t.postorder()):
            if not n.is_tip():
                n.name = "y%d" % i
            n.length = np.random.rand()*3

        self.highlights = pd.DataFrame({'y8': ['#FF0000', '#00FF00'],
                                        'y6': ['#0000FF', '#F0000F']}).T
开发者ID:biocore,项目名称:gneiss,代码行数:21,代码来源:test_heatmap.py


示例16: ward_tree

def ward_tree(X, connectivity=None, n_components=None, n_clusters=None,
              return_distance=False):
    """Ward clustering based on a Feature matrix.

    Recursively merges the pair of clusters that minimally increases
    within-cluster variance.

    The inertia matrix uses a Heapq-based representation.

    This is the structured version, that takes into account some topological
    structure between samples.

    Read more in the :ref:`User Guide <hierarchical_clustering>`.

    Parameters
    ----------
    X : array, shape (n_samples, n_features)
        feature matrix  representing n_samples samples to be clustered

    connectivity : sparse matrix (optional).
        connectivity matrix. Defines for each sample the neighboring samples
        following a given structure of the data. The matrix is assumed to
        be symmetric and only the upper triangular half is used.
        Default is None, i.e, the Ward algorithm is unstructured.

    n_components : int (optional)
        Number of connected components. If None the number of connected
        components is estimated from the connectivity matrix.
        NOTE: This parameter is now directly determined directly
        from the connectivity matrix and will be removed in 0.18

    n_clusters : int (optional)
        Stop early the construction of the tree at n_clusters. This is
        useful to decrease computation time if the number of clusters is
        not small compared to the number of samples. In this case, the
        complete tree is not computed, thus the 'children' output is of
        limited use, and the 'parents' output should rather be used.
        This option is valid only when specifying a connectivity matrix.

    return_distance: bool (optional)
        If True, return the distance between the clusters.

    Returns
    -------
    children : 2D array, shape (n_nodes-1, 2)
        The children of each non-leaf node. Values less than `n_samples`
        correspond to leaves of the tree which are the original samples.
        A node `i` greater than or equal to `n_samples` is a non-leaf
        node and has children `children_[i - n_samples]`. Alternatively
        at the i-th iteration, children[i][0] and children[i][1]
        are merged to form node `n_samples + i`

    n_components : int
        The number of connected components in the graph.

    n_leaves : int
        The number of leaves in the tree

    parents : 1D array, shape (n_nodes, ) or None
        The parent of each node. Only returned when a connectivity matrix
        is specified, elsewhere 'None' is returned.

    distances : 1D array, shape (n_nodes-1, )
        Only returned if return_distance is set to True (for compatibility).
        The distances between the centers of the nodes. `distances[i]`
        corresponds to a weighted euclidean distance between
        the nodes `children[i, 1]` and `children[i, 2]`. If the nodes refer to
        leaves of the tree, then `distances[i]` is their unweighted euclidean
        distance. Distances are updated in the following way
        (from scipy.hierarchy.linkage):

        The new entry :math:`d(u,v)` is computed as follows,

        .. math::

           d(u,v) = \\sqrt{\\frac{|v|+|s|}
                               {T}d(v,s)^2
                        + \\frac{|v|+|t|}
                               {T}d(v,t)^2
                        - \\frac{|v|}
                               {T}d(s,t)^2}

        where :math:`u` is the newly joined cluster consisting of
        clusters :math:`s` and :math:`t`, :math:`v` is an unused
        cluster in the forest, :math:`T=|v|+|s|+|t|`, and
        :math:`|*|` is the cardinality of its argument. This is also
        known as the incremental algorithm.
    """
    X = np.asarray(X)
    if X.ndim == 1:
        X = np.reshape(X, (-1, 1))
    n_samples, n_features = X.shape

    if connectivity is None:
        from scipy.cluster import hierarchy     # imports PIL

        if n_clusters is not None:
            warnings.warn('Partial build of the tree is implemented '
                          'only for structured clustering (i.e. with '
                          'explicit connectivity). The algorithm '
#.........这里部分代码省略.........
开发者ID:NUMBLP7890Fly,项目名称:scikit-learn,代码行数:101,代码来源:hierarchical.py


示例17: ward_tree

def ward_tree(X, connectivity=None, n_components=None, copy=True):
    """Ward clustering based on a Feature matrix.

    The inertia matrix uses a Heapq-based representation.

    This is the structured version, that takes into account a some topological
    structure between samples.

    Parameters
    ----------
    X : array of shape (n_samples, n_features)
        feature matrix  representing n_samples samples to be clustered

    connectivity : sparse matrix.
        connectivity matrix. Defines for each sample the neigbhoring samples
        following a given structure of the data. The matrix is assumed to
        be symmetric and only the upper triangular half is used.
        Default is None, i.e, the Ward algorithm is unstructured.

    n_components : int (optional)
        Number of connected components. If None the number of connected
        components is estimated from the connectivity matrix.

    copy : bool (optional)
        Make a copy of connectivity or work inplace. If connectivity
        is not of LIL type there will be a copy in any case.

    Returns
    -------
    children : list of pairs. Lenght of n_nodes
               list of the children of each nodes.
               Leaves of the tree have empty list of children.

    n_components : sparse matrix.
        The number of connected components in the graph.

    n_leaves : int
        The number of leaves in the tree
    """
    X = np.asarray(X)
    n_samples, n_features = X.shape
    if X.ndim == 1:
        X = np.reshape(X, (-1, 1))

    if connectivity is None:
        out = hierarchy.ward(X)
        children_ = out[:, :2].astype(np.int)
        return children_, 1, n_samples

    # Compute the number of nodes
    if n_components is None:
        n_components, labels = cs_graph_components(connectivity)

    # Convert connectivity matrix to LIL with a copy if needed
    if sparse.isspmatrix_lil(connectivity) and copy:
        connectivity = connectivity.copy()
    else:
        connectivity = connectivity.tolil()

    if n_components > 1:
        warnings.warn("the number of connected components of the"
        " connectivity matrix is %d > 1. Completing it to avoid"
        " stopping the tree early."
        % n_components)
        connectivity = _fix_connectivity(X, connectivity,
                                            n_components, labels)
        n_components = 1

    n_nodes = 2 * n_samples - n_components

    if (connectivity.shape[0] != n_samples or
        connectivity.shape[1] != n_samples):
        raise ValueError('Wrong shape for connectivity matrix: %s '
                         'when X is %s' % (connectivity.shape, X.shape))

    # Remove diagonal from connectivity matrix
    connectivity.setdiag(np.zeros(connectivity.shape[0]))

    # create inertia matrix
    coord_row = []
    coord_col = []
    A = []
    for ind, row in enumerate(connectivity.rows):
        A.append(row)
        # We keep only the upper triangular for the moments
        # Generator expressions are faster than arrays on the following
        row = [i for i in row if i < ind]
        coord_row.extend(len(row) * [ind, ])
        coord_col.extend(row)

    coord_row = np.array(coord_row, dtype=np.int)
    coord_col = np.array(coord_col, dtype=np.int)

    # build moments as a list
    moments_1 = np.zeros(n_nodes)
    moments_1[:n_samples] = 1
    moments_2 = np.zeros((n_nodes, n_features))
    moments_2[:n_samples] = X
    inertia = np.empty(len(coord_row), dtype=np.float)
    _hierarchical.compute_ward_dist(moments_1, moments_2,
#.........这里部分代码省略.........
开发者ID:WeatherGod,项目名称:scikit-learn,代码行数:101,代码来源:hierarchical.py


示例18: get_country_vector

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
98. Ward法によるクラスタリング
96の単語ベクトルに対して,Ward法による階層型クラスタリングを実行せよ.さらに,クラスタリング結果をデンドログラムとして可視化せよ.
"""

from n90 import load_model
from n96 import get_country_vector
import numpy as np
from scipy.cluster.hierarchy import ward, dendrogram
import matplotlib.pyplot as plt
import sys


vector = get_country_vector(load_model(sys.argv[1]))
dendrogram(ward(np.array(list(vector.values()))), labels=list(vector.keys()))
plt.show()
开发者ID:chantera,项目名称:nlp100,代码行数:19,代码来源:n98.py


示例19: open

#samples = [2,3,4,5,6,7,8,9,10,11,13]
samples = open("FDC.csv", "r"). read().split(",")
samples = [ float(value) for value in samples]


"""
diff_samples = numpy.array(original_samples + [0])-numpy.array([0] + original_samples)
diff_samples = list(diff_samples)
difff_samples = numpy.array((diff_samples + [0])) - numpy.array([0] + diff_samples)
"""


#執行階層式分群作業
tsamples = numpy.array([samples]).transpose()
distance = distance_matrix(tsamples, tsamples)
hc = ward(distance)

#print(hc)
dendrogram(hc)

def find_majority( array, index):
    if index < 5:
        start = 0
    else:
        start = index - 5
    
    end = index + 6
    datas = array[start:end]
    
    counter = Counter(datas)
    [(majority, count)] = counter.most_common(1)
开发者ID:daniellllllll,项目名称:random_sample,代码行数:31,代码来源:clustering.3.py


示例20: similarity

		return summs

	def similarity(data):
		from sklearn.metrics.pairwise import cosine_similarity
		sims = cosine_similarity(data)
		return sims




if True:
	reduced, v = reduce_data(bdata)
	simplified = summ_subs(reduced)
	similar = similarity(simplified)
	from scipy.cluster.hierarchy import ward
	clusters = ward(similar)
	subnames = sorted(substance_count.keys())
	subcounts = [substance_count[key] for key in subnames]

if True:
	tree = jsontree(clusters,2*clusters.shape[0],subnames,subcounts,1,np.nan)
	#tree = jsontree(clusters,2*clusters.shape[0],subnames,subcounts,np.nan,1000)
	with open(path+"gh-pages/tagtree.json","wb") as j:
		import json
		json.dump(tree,j)

if True:
	reduced, v = reduce_data(ldata)
	similar = similarity(reduced.T)
	from scipy.cluster.hierarchy import ward
	clusters = ward(similar)
开发者ID:kidaak,项目名称:ineffable,代码行数:31,代码来源:analyze.py



注:本文中的scipy.cluster.hierarchy.ward函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python vq.kmeans函数代码示例发布时间:2022-05-27
下一篇:
Python hierarchy.to_tree函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap