本文整理汇总了Python中scipy.cluster.hierarchy.ward函数的典型用法代码示例。如果您正苦于以下问题:Python ward函数的具体用法?Python ward怎么用?Python ward使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了ward函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: demoFourGs
def demoFourGs():
'''
Demonstrate the performance of LCC
on points drawn from a four gaussians
'''
s=(640,480)
dat = genNormalClusters(N=100, size=s)
cList = ['red', 'blue','green','yellow']
img_truth = plotClusts(dat[0], dat[1], size=s,
colors=[cList[i] for i in dat[1]], window=None)
#generate normal hierarchical clustering off euclidean data points
print "Generating Hierarchical Clustering on Raw Data"
Z2 = spc.ward(scipy.array(dat[0]))
clusts2 = spc.fcluster(Z2, 4, criterion="maxclust")
img_HC = plotClusts(dat[0], clusts2, size=s,
colors=[cList[i-1] for i in clusts2], window=None)
#generate LCC clustering
print "Generating LCC Clustering"
(clusts, _,_,_) = pf.LatentConfigurationClustering(dat[0], pt_dist, 4, numtrees=27)
img_LCC = plotClusts(dat[0], clusts, size=s,
colors=[cList[i-1] for i in clusts], window=None)
im = pv.ImageMontage([img_truth, img_LCC, img_HC], layout=(1,3), gutter=3,
tileSize=(320,240), labels=None )
im.show(window="Truth vs. LCC vs. HC")
开发者ID:Sciumo,项目名称:ProximityForest,代码行数:27,代码来源:LatentConfigurationClustering_Demo.py
示例2: test_scikit_vs_scipy
def test_scikit_vs_scipy():
"""Test scikit ward with full connectivity (i.e. unstructured) vs scipy
"""
from scipy.sparse import lil_matrix
n, p, k = 10, 5, 3
rnd = np.random.RandomState(0)
connectivity = lil_matrix(np.ones((n, n)))
for i in range(5):
X = 0.1 * rnd.normal(size=(n, p))
X -= 4 * np.arange(n)[:, np.newaxis]
X -= X.mean(axis=1)[:, np.newaxis]
out = hierarchy.ward(X)
children_ = out[:, :2].astype(np.int)
children, _, n_leaves, _ = ward_tree(X, connectivity)
cut = _hc_cut(k, children, n_leaves)
cut_ = _hc_cut(k, children_, n_leaves)
assess_same_labelling(cut, cut_)
# Test error management in _hc_cut
assert_raises(ValueError, _hc_cut, n_leaves + 1, children, n_leaves)
开发者ID:VirgileFritsch,项目名称:scikit-learn,代码行数:25,代码来源:test_hierarchical.py
示例3: make_tree
def make_tree(X, C, method='single'):
if method == 'single':
tree = to_tree(single(C))
elif method == 'ward':
tree = to_tree(ward(X))
elif method == 'average':
tree = to_tree(average(C))
return Tree(root=construct_node(tree))
开发者ID:sharadmv,项目名称:trees,代码行数:8,代码来源:agglomerative.py
示例4: plotHierarchichalClusterGraph
def plotHierarchichalClusterGraph(tf_idf_matrix, headlines_utf):
dist = 1 - cosine_similarity(tf_idf_matrix)
linkage_matrix = ward(dist)
fig, ax = plt.subplots(figsize=(15, 20)) # set size
dendrogram(linkage_matrix, orientation="right", labels=headlines_utf);
plt.tick_params(axis= 'x', which='both', bottom='off', top='off', labelbottom='off')
plt.tight_layout()
plt.savefig('../plots/hierachichal_clusters.png', dpi=200)
开发者ID:rubyagarwal,项目名称:NewsClustering,代码行数:9,代码来源:clusterInfoProcessor.py
示例5: setUp
def setUp(self):
np.random.seed(0)
x = np.random.rand(10)
dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x-y))
lm = ward(dm.condensed_form())
ids = np.arange(len(x)).astype(np.str)
self.tree = TreeNode.from_linkage_matrix(lm, ids)
# initialize tree with branch length and named internal nodes
for i, n in enumerate(self.tree.postorder(include_self=True)):
n.length = 1
if not n.is_tip():
n.name = "y%d" % i
开发者ID:biocore,项目名称:gneiss,代码行数:13,代码来源:test_dendrogram.py
示例6: hierarchyCluster
def hierarchyCluster(dist,titles):
linkage_matrix = ward(dist) #define the linkage_matrix using ward clustering pre-computed distances
fig, ax = plt.subplots(figsize=(15, 20)) # set size
ax = dendrogram(linkage_matrix, orientation="right", labels=titles);
plt.tick_params(\
axis= 'x', # changes apply to the x-axis
which='major', # both major and minor ticks are affected
bottom='off', # ticks along the bottom edge are off
top='off', # ticks along the top edge are off
labelbottom='on')
plt.tight_layout() #show plot with tight layout
plt.show()
开发者ID:tuling56,项目名称:Python,代码行数:14,代码来源:mtextcluster_fun.py
示例7: _ward_cluster
def _ward_cluster(X):
"""Clusters 1-corr using Ward distance
Parameters
----------
X
Returns
-------
"""
# pairwise (1-corr) of zscores
D = pdist( X, metric="correlation" )
# return top branch split using ward linkage
return fcluster( ward(D), 2, criterion="maxclust" )
开发者ID:jknox13,项目名称:cortical_paper,代码行数:14,代码来源:clustering.py
示例8: hierachical_clustering
def hierachical_clustering(self):
linkage_matrix = ward(self.__dist_matrix) #define the linkage_matrix using ward clustering pre-computed distances
fig, ax = plt.subplots(figsize=(15, 9)) # set size
ax = dendrogram(linkage_matrix, orientation="right", labels=titles);
plt.tick_params(\
axis= 'x', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
bottom='off', # ticks along the bottom edge are off
top='off', # ticks along the top edge are off
labelbottom='off')
fig.set_tight_layout(True) #show plot with tight layout
plt.show()
开发者ID:adisorn711,项目名称:comp6237cw2,代码行数:15,代码来源:AJTokenizer.py
示例9: test_cache_ntips
def test_cache_ntips(self):
dm = DistanceMatrix.from_iterable([0, 1, 2, 3],
lambda x, y: np.abs(x-y))
lm = ward(dm.condensed_form())
ids = np.arange(4).astype(np.str)
t = mock.from_linkage_matrix(lm, ids)
t._cache_ntips()
self.assertEquals(t.leafcount, 4)
self.assertEquals(t.children[0].leafcount, 2)
self.assertEquals(t.children[1].leafcount, 2)
self.assertEquals(t.children[0].children[0].leafcount, 1)
self.assertEquals(t.children[0].children[1].leafcount, 1)
self.assertEquals(t.children[1].children[0].leafcount, 1)
self.assertEquals(t.children[1].children[1].leafcount, 1)
开发者ID:biocore,项目名称:gneiss,代码行数:16,代码来源:test_dendrogram.py
示例10: knn
def knn(df, axis=None, labels=None):
dist = 1 - cosine_similarity(df.values)
# define the linkage_matrix using ward clustering pre-computed distances
linkage_matrix = ward(dist)
fig, ax = plt.subplots(figsize=(15, 20)) # set size
ax = dendrogram(linkage_matrix, orientation="right", labels=labels)
plt.tick_params(
axis='x', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
bottom='off', # ticks along the bottom edge are off
top='off', # ticks along the top edge are off
labelbottom='off')
plt.tight_layout()
开发者ID:heggy231,项目名称:social-deprivation,代码行数:16,代码来源:dataAnalysis.py
示例11: create_hierarchy
def create_hierarchy(self, sim_matrix):
linkage_matrix = ward(sim_matrix)
fig, ax = plt.subplots(figsize=(15, 20)) # set size
ax = dendrogram(linkage_matrix, orientation="right", labels=self.titles);
plt.tick_params(\
axis= 'x', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
bottom='off', # ticks along the bottom edge are off
top='off', # ticks along the top edge are off
labelbottom='off')
plt.tight_layout() #show plot with tight layout
#uncomment below to save figure
plt.savefig('ward_clusters.png', dpi=200) #save figure as ward_clusters
return
开发者ID:MoizRauf,项目名称:OQuant_Wiki_Clustering,代码行数:17,代码来源:ClusteringAlgo.py
示例12: lsa_dendrogram
def lsa_dendrogram(lessonpath):
# document-term matrix and document indices
dtm, docindex, lessonname = dtm_matrix(lessonpath)
# reconstructed dtm matrix using LSA and a reduced subspace of dimension 3
dtm2 = LSA_dtm(dtm, 3)
# distance metric based on cosine similarity
dist = 1 - cosine_similarity(dtm)
dist = np.round(dist, 10)
# linkage matrix
linkage_matrix = ward(dist)
# dendrogram
show(dendrogram(linkage_matrix, orientation="right", labels=docindex))
开发者ID:dizcology,项目名称:cogitatio_2,代码行数:17,代码来源:LSA_code.py
示例13: find_clusters
def find_clusters(self, features):
''' Returns the clusters and their centroids.'''
# 1. Cluster the data.
totalClusters = int(round(features.shape[0] / 2))
distance = 1 - pairwise_distances(features, metric = "cosine")
# Ward minimizes the sum of squared differences within all clusters.
# It is a variance-minimizing approach, which is similar to the k-means objective function.
linkage_matrix = ward(distance)
clusters = fcluster(linkage_matrix, totalClusters, criterion = 'maxclust')
print "Number of clusters:", totalClusters
# 2. Find the centroid for each cluster.
centroid = np.empty([totalClusters, features.shape[1]])
for i in range(1, totalClusters + 1):
nCluster = np.where(clusters == i)
centroid[i-1,:] = np.mean(features[nCluster], axis = 0)
return (clusters, centroid)
开发者ID:yxy-github,项目名称:Twitter,代码行数:17,代码来源:twitterAlgorithms.py
示例14: get_clusters
def get_clusters(self, data, features=None, text_features=[], n_clusters=8, centroid_features=10, random_seeds=True,
weights=[]):
"""
Applies Agglomerative hierarchial clustering using Ward's linkage
Parameters
----------
data : Pandas DataFrame
Data on which on apply clustering
features : list, optional, default : all columns used as features
Subset of columns in the data frame to be used as features
text_features : list, optional, default : None
List of features that are of type text. These are then vectorizer using
TfidfVectorizer.
n_clusters : int, optional, default: 8
The number of clusters to form as well as the number of centroids to generate.
centroid_features : int, optional, default: 10
The number of most-important-features to return against each cluster centroid
random_seeds : boolean, optional, default: False
If False, uses clusters from kernel density estimation followed by thresholding
as initial seeds. The number of clusters is also determined by results of kde and
thus n_clusters parameter is ignored.
Returns
-------
result : tuple (labels, centroid_features)
labels :
cluster numbers against each row of the data passed
centroids : dictionary
map of most important features of each cluster
"""
X = self.encode_features(data, features, text_features)
ipshell()
dist = 1 - cosine_similarity(X)
self.linkage_matrix = ward(dist)
return (km.labels_, centroids)
开发者ID:fahadsultan,项目名称:datalib,代码行数:42,代码来源:ward.py
示例15: setUp
def setUp(self):
np.random.seed(0)
self.table = pd.DataFrame(np.random.random((5, 5)),
index=['0', '1', '2', '3', '4'],
columns=['0', '1', '2', '3', '4'])
num_otus = 5 # otus
x = np.random.rand(num_otus)
dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x-y))
lm = ward(dm.condensed_form())
t = TreeNode.from_linkage_matrix(lm, np.arange(len(x)).astype(np.str))
self.t = SquareDendrogram.from_tree(t)
self.md = pd.Series(['a', 'a', 'a', 'b', 'b'],
index=['0', '1', '2', '3', '4'])
for i, n in enumerate(t.postorder()):
if not n.is_tip():
n.name = "y%d" % i
n.length = np.random.rand()*3
self.highlights = pd.DataFrame({'y8': ['#FF0000', '#00FF00'],
'y6': ['#0000FF', '#F0000F']}).T
开发者ID:biocore,项目名称:gneiss,代码行数:21,代码来源:test_heatmap.py
示例16: ward_tree
def ward_tree(X, connectivity=None, n_components=None, n_clusters=None,
return_distance=False):
"""Ward clustering based on a Feature matrix.
Recursively merges the pair of clusters that minimally increases
within-cluster variance.
The inertia matrix uses a Heapq-based representation.
This is the structured version, that takes into account some topological
structure between samples.
Read more in the :ref:`User Guide <hierarchical_clustering>`.
Parameters
----------
X : array, shape (n_samples, n_features)
feature matrix representing n_samples samples to be clustered
connectivity : sparse matrix (optional).
connectivity matrix. Defines for each sample the neighboring samples
following a given structure of the data. The matrix is assumed to
be symmetric and only the upper triangular half is used.
Default is None, i.e, the Ward algorithm is unstructured.
n_components : int (optional)
Number of connected components. If None the number of connected
components is estimated from the connectivity matrix.
NOTE: This parameter is now directly determined directly
from the connectivity matrix and will be removed in 0.18
n_clusters : int (optional)
Stop early the construction of the tree at n_clusters. This is
useful to decrease computation time if the number of clusters is
not small compared to the number of samples. In this case, the
complete tree is not computed, thus the 'children' output is of
limited use, and the 'parents' output should rather be used.
This option is valid only when specifying a connectivity matrix.
return_distance: bool (optional)
If True, return the distance between the clusters.
Returns
-------
children : 2D array, shape (n_nodes-1, 2)
The children of each non-leaf node. Values less than `n_samples`
correspond to leaves of the tree which are the original samples.
A node `i` greater than or equal to `n_samples` is a non-leaf
node and has children `children_[i - n_samples]`. Alternatively
at the i-th iteration, children[i][0] and children[i][1]
are merged to form node `n_samples + i`
n_components : int
The number of connected components in the graph.
n_leaves : int
The number of leaves in the tree
parents : 1D array, shape (n_nodes, ) or None
The parent of each node. Only returned when a connectivity matrix
is specified, elsewhere 'None' is returned.
distances : 1D array, shape (n_nodes-1, )
Only returned if return_distance is set to True (for compatibility).
The distances between the centers of the nodes. `distances[i]`
corresponds to a weighted euclidean distance between
the nodes `children[i, 1]` and `children[i, 2]`. If the nodes refer to
leaves of the tree, then `distances[i]` is their unweighted euclidean
distance. Distances are updated in the following way
(from scipy.hierarchy.linkage):
The new entry :math:`d(u,v)` is computed as follows,
.. math::
d(u,v) = \\sqrt{\\frac{|v|+|s|}
{T}d(v,s)^2
+ \\frac{|v|+|t|}
{T}d(v,t)^2
- \\frac{|v|}
{T}d(s,t)^2}
where :math:`u` is the newly joined cluster consisting of
clusters :math:`s` and :math:`t`, :math:`v` is an unused
cluster in the forest, :math:`T=|v|+|s|+|t|`, and
:math:`|*|` is the cardinality of its argument. This is also
known as the incremental algorithm.
"""
X = np.asarray(X)
if X.ndim == 1:
X = np.reshape(X, (-1, 1))
n_samples, n_features = X.shape
if connectivity is None:
from scipy.cluster import hierarchy # imports PIL
if n_clusters is not None:
warnings.warn('Partial build of the tree is implemented '
'only for structured clustering (i.e. with '
'explicit connectivity). The algorithm '
#.........这里部分代码省略.........
开发者ID:NUMBLP7890Fly,项目名称:scikit-learn,代码行数:101,代码来源:hierarchical.py
示例17: ward_tree
def ward_tree(X, connectivity=None, n_components=None, copy=True):
"""Ward clustering based on a Feature matrix.
The inertia matrix uses a Heapq-based representation.
This is the structured version, that takes into account a some topological
structure between samples.
Parameters
----------
X : array of shape (n_samples, n_features)
feature matrix representing n_samples samples to be clustered
connectivity : sparse matrix.
connectivity matrix. Defines for each sample the neigbhoring samples
following a given structure of the data. The matrix is assumed to
be symmetric and only the upper triangular half is used.
Default is None, i.e, the Ward algorithm is unstructured.
n_components : int (optional)
Number of connected components. If None the number of connected
components is estimated from the connectivity matrix.
copy : bool (optional)
Make a copy of connectivity or work inplace. If connectivity
is not of LIL type there will be a copy in any case.
Returns
-------
children : list of pairs. Lenght of n_nodes
list of the children of each nodes.
Leaves of the tree have empty list of children.
n_components : sparse matrix.
The number of connected components in the graph.
n_leaves : int
The number of leaves in the tree
"""
X = np.asarray(X)
n_samples, n_features = X.shape
if X.ndim == 1:
X = np.reshape(X, (-1, 1))
if connectivity is None:
out = hierarchy.ward(X)
children_ = out[:, :2].astype(np.int)
return children_, 1, n_samples
# Compute the number of nodes
if n_components is None:
n_components, labels = cs_graph_components(connectivity)
# Convert connectivity matrix to LIL with a copy if needed
if sparse.isspmatrix_lil(connectivity) and copy:
connectivity = connectivity.copy()
else:
connectivity = connectivity.tolil()
if n_components > 1:
warnings.warn("the number of connected components of the"
" connectivity matrix is %d > 1. Completing it to avoid"
" stopping the tree early."
% n_components)
connectivity = _fix_connectivity(X, connectivity,
n_components, labels)
n_components = 1
n_nodes = 2 * n_samples - n_components
if (connectivity.shape[0] != n_samples or
connectivity.shape[1] != n_samples):
raise ValueError('Wrong shape for connectivity matrix: %s '
'when X is %s' % (connectivity.shape, X.shape))
# Remove diagonal from connectivity matrix
connectivity.setdiag(np.zeros(connectivity.shape[0]))
# create inertia matrix
coord_row = []
coord_col = []
A = []
for ind, row in enumerate(connectivity.rows):
A.append(row)
# We keep only the upper triangular for the moments
# Generator expressions are faster than arrays on the following
row = [i for i in row if i < ind]
coord_row.extend(len(row) * [ind, ])
coord_col.extend(row)
coord_row = np.array(coord_row, dtype=np.int)
coord_col = np.array(coord_col, dtype=np.int)
# build moments as a list
moments_1 = np.zeros(n_nodes)
moments_1[:n_samples] = 1
moments_2 = np.zeros((n_nodes, n_features))
moments_2[:n_samples] = X
inertia = np.empty(len(coord_row), dtype=np.float)
_hierarchical.compute_ward_dist(moments_1, moments_2,
#.........这里部分代码省略.........
开发者ID:WeatherGod,项目名称:scikit-learn,代码行数:101,代码来源:hierarchical.py
示例18: get_country_vector
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
98. Ward法によるクラスタリング
96の単語ベクトルに対して,Ward法による階層型クラスタリングを実行せよ.さらに,クラスタリング結果をデンドログラムとして可視化せよ.
"""
from n90 import load_model
from n96 import get_country_vector
import numpy as np
from scipy.cluster.hierarchy import ward, dendrogram
import matplotlib.pyplot as plt
import sys
vector = get_country_vector(load_model(sys.argv[1]))
dendrogram(ward(np.array(list(vector.values()))), labels=list(vector.keys()))
plt.show()
开发者ID:chantera,项目名称:nlp100,代码行数:19,代码来源:n98.py
示例19: open
#samples = [2,3,4,5,6,7,8,9,10,11,13]
samples = open("FDC.csv", "r"). read().split(",")
samples = [ float(value) for value in samples]
"""
diff_samples = numpy.array(original_samples + [0])-numpy.array([0] + original_samples)
diff_samples = list(diff_samples)
difff_samples = numpy.array((diff_samples + [0])) - numpy.array([0] + diff_samples)
"""
#執行階層式分群作業
tsamples = numpy.array([samples]).transpose()
distance = distance_matrix(tsamples, tsamples)
hc = ward(distance)
#print(hc)
dendrogram(hc)
def find_majority( array, index):
if index < 5:
start = 0
else:
start = index - 5
end = index + 6
datas = array[start:end]
counter = Counter(datas)
[(majority, count)] = counter.most_common(1)
开发者ID:daniellllllll,项目名称:random_sample,代码行数:31,代码来源:clustering.3.py
示例20: similarity
return summs
def similarity(data):
from sklearn.metrics.pairwise import cosine_similarity
sims = cosine_similarity(data)
return sims
if True:
reduced, v = reduce_data(bdata)
simplified = summ_subs(reduced)
similar = similarity(simplified)
from scipy.cluster.hierarchy import ward
clusters = ward(similar)
subnames = sorted(substance_count.keys())
subcounts = [substance_count[key] for key in subnames]
if True:
tree = jsontree(clusters,2*clusters.shape[0],subnames,subcounts,1,np.nan)
#tree = jsontree(clusters,2*clusters.shape[0],subnames,subcounts,np.nan,1000)
with open(path+"gh-pages/tagtree.json","wb") as j:
import json
json.dump(tree,j)
if True:
reduced, v = reduce_data(ldata)
similar = similarity(reduced.T)
from scipy.cluster.hierarchy import ward
clusters = ward(similar)
开发者ID:kidaak,项目名称:ineffable,代码行数:31,代码来源:analyze.py
注:本文中的scipy.cluster.hierarchy.ward函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论