本文整理汇总了Python中sklearn.cluster.Ward类的典型用法代码示例。如果您正苦于以下问题:Python Ward类的具体用法?Python Ward怎么用?Python Ward使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Ward类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __hieclu
def __hieclu(self):
#use Hierarchical clustering
print 'using hierarchical clustering......'
ac = Ward(n_clusters = self.k)
ac.fit(self.data_matrix)
result = ac.fit_predict(self.data_matrix)
return result
开发者ID:chenzheng128,项目名称:evaluate_cluster,代码行数:7,代码来源:evaluate_class.py
示例2: constraint
def constraint(self, nodes, edges, lables):
if len(nodes) != len(lables):
print("#nodes(%d) != #clusters(%d)" % (len(nodes), len(lables)))
N = len(nodes)
circles = {}
guidance_matrix = sp.zeros([N, N])
# guidance_matrix = {}
for i in range(len(nodes)):
if lables[i] in circles:
circles[lables[i]].append(nodes[i])
else:
circles[lables[i]] = [nodes[i]]
for key in circles.iterkeys():
print(key, len(circles[key]))
c = 36
for ni in circles[c]:
i = nodes.index(ni)
for nj in circles[c]:
j = nodes.index(nj)
guidance_matrix[i, j] = 1.0
guidance_matrix = sparse.lil_matrix(guidance_matrix)
# pos = sum(x > 0 for x in guidance_matrix)
print(guidance_matrix)
ward = Ward(n_clusters=6, n_components=2, connectivity=guidance_matrix)
predicts = ward.fit_predict(self.A)
print(predicts)
开发者ID:gnavvy,项目名称:ParaIF,代码行数:33,代码来源:Control.py
示例3: agglomerate
def agglomerate(self, nodes, edges, clusters):
if len(nodes) != len(clusters):
print("#nodes(%d) != #clusters(%d)" % (len(nodes), len(clusters)))
neighbors = {}
for edge in edges:
if edge[0] in neighbors:
neighbors[edge[0]].append(edge[1])
else:
neighbors[edge[0]] = [edge[1]]
node_clusters = {} # node: its cluster id
communities = {} # cluster id: all neighbors for its members
for i in range(len(nodes)):
if clusters[i] in communities:
communities[clusters[i]].extend(neighbors[nodes[i]])
else:
communities[clusters[i]] = neighbors[nodes[i]]
node_clusters[nodes[i]] = clusters[i]
N = len(communities)
affinity_matrix = sp.zeros([N, N])
for comm in communities:
members = [node_clusters[node] for node in communities[comm]]
degree = dict(Counter(members))
for key in degree:
affinity_matrix[comm, key] = degree[key]
ward = Ward(n_clusters=6)
predicts = ward.fit_predict(affinity_matrix)
return [predicts[node_clusters[node]] for node in nodes]
开发者ID:gnavvy,项目名称:ParaIF,代码行数:32,代码来源:Control.py
示例4: test_connectivity_popagation
def test_connectivity_popagation():
"""
Check that connectivity in the ward tree is propagated correctly during
merging.
"""
from sklearn.neighbors import NearestNeighbors
X = np.array(
[
(0.014, 0.120),
(0.014, 0.099),
(0.014, 0.097),
(0.017, 0.153),
(0.017, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.153),
(0.018, 0.152),
(0.018, 0.149),
(0.018, 0.144),
]
)
nn = NearestNeighbors(n_neighbors=10).fit(X)
connectivity = nn.kneighbors_graph(X)
ward = Ward(n_clusters=4, connectivity=connectivity)
# If changes are not propagated correctly, fit crashes with an
# IndexError
ward.fit(X)
开发者ID:VirgileFritsch,项目名称:scikit-learn,代码行数:32,代码来源:test_hierarchical.py
示例5: hierarchicalClustering
def hierarchicalClustering(x,k):
model = Ward(n_clusters=k)
labels = model.fit_predict(np.asarray(x))
# Centroids is a list of lists
centroids = []
for c in range(k):
base = []
for d in range(len(x[0])):
base.append(0)
centroids.append(base)
# Stores number of examples per cluster
ctrs = np.zeros(k)
# Sum up all vectors for each cluster
for c in range(len(x)):
centDex = labels[c]
for d in range(len(centroids[centDex])):
centroids[centDex][d] += x[c][d]
ctrs[centDex] += 1
# Average the vectors in each cluster to get the centroids
for c in range(len(centroids)):
for d in range(len(centroids[c])):
centroids[c][d] = centroids[c][d]/ctrs[c]
return (centroids,labels)
开发者ID:lbenning,项目名称:Load-Forecasting,代码行数:28,代码来源:clustering.py
示例6: hieclu
def hieclu(data_matrix, k):
#use Hierarchical clustering
print 'using hierarchical clustering......'
ac = Ward(n_clusters=k)
ac.fit(data_matrix)
result = ac.fit_predict(data_matrix)
return result
开发者ID:chenzheng128,项目名称:evaluate_cluster,代码行数:7,代码来源:evaluate.py
示例7: test_ward_clustering
def test_ward_clustering():
"""
Check that we obtain the correct number of clusters with Ward clustering.
"""
rnd = np.random.RandomState(0)
mask = np.ones([10, 10], dtype=np.bool)
X = rnd.randn(100, 50)
connectivity = grid_to_graph(*mask.shape)
clustering = Ward(n_clusters=10, connectivity=connectivity)
clustering.fit(X)
assert_true(np.size(np.unique(clustering.labels_)) == 10)
开发者ID:AlexLerman,项目名称:scikit-learn,代码行数:11,代码来源:test_hierarchical.py
示例8: test_connectivity_fixing_non_lil
def test_connectivity_fixing_non_lil():
"""
Check non regression of a bug if a non item assignable connectivity is
provided with more than one component.
"""
# create dummy data
x = np.array([[0, 0], [1, 1]])
# create a mask with several components to force connectivity fixing
m = np.array([[True, False], [False, True]])
c = grid_to_graph(n_x=2, n_y=2, mask=m)
w = Ward(connectivity=c)
w.fit(x)
开发者ID:bbabenko,项目名称:scikit-learn,代码行数:12,代码来源:test_hierarchical.py
示例9: cluster_ward
def cluster_ward(classif_data, vect_data):
ward = Ward(n_clusters=10)
np_arr_train = np.array(vect_data["train_vect"])
np_arr_label = np.array(classif_data["topics"])
np_arr_test = np.array(vect_data["test_vect"])
labels = ward.fit_predict(np_arr_train)
print "Ward"
sil_score = metrics.silhouette_score(np_arr_train, labels, metric='euclidean')
print sil_score
return labels
开发者ID:tangohead,项目名称:CS909-Project,代码行数:13,代码来源:helper.py
示例10: get_km_segments
def get_km_segments(x, image, sps, n_segments=25):
if len(x) == 2:
feats, edges = x
else:
feats, edges, _ = x
colors_ = get_colors(image, sps)
centers = get_centers(sps)
n_spixel = len(feats)
graph = sparse.coo_matrix((np.ones(edges.shape[0]), edges.T), shape=(n_spixel, n_spixel))
ward = Ward(n_clusters=n_segments, connectivity=graph + graph.T)
# km = KMeans(n_clusters=n_segments)
color_feats = np.hstack([colors_, centers * 0.5])
# return km.fit_predict(color_feats)
return ward.fit_predict(color_feats)
开发者ID:kod3r,项目名称:segmentation,代码行数:14,代码来源:hierarchical_segmentation.py
示例11: spectral_cluster
def spectral_cluster(data, n_clusters, method='sl'):
# 获取拉普拉斯矩阵
if method == 'NJW':
lap_matrix = get_lap_matrix_njw(data, 0.1)
eigenvalues, eigenvectors = np.linalg.eig(lap_matrix)
idx = eigenvalues.argsort()[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]
elif method == 'self-tuning':
lap_matrix = get_lap_matrix_self_tuning(data)
eigenvalues, eigenvectors = np.linalg.eig(lap_matrix)
idx = eigenvalues.argsort()[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]
else:
lap_matrix = get_lap_matrix_sl(data, 0.1)
eigenvalues, eigenvectors = np.linalg.eig(lap_matrix)
idx = eigenvalues.argsort()
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]
#print(eigenvalues)
# 获取前n_clusters个特征向量
x_matrix = eigenvectors[:, 0:n_clusters]
# 归一化特征向量矩阵
y_matrix = normal_eigen(x_matrix)
# 调用自己写的k_means函数
"""
k_dist_dic, k_centers_dic, cluster_group = kmeans.k_means(y_matrix, n_clusters)
mat_plot_cluster_sample(data, cluster_group, method)
"""
# 调用自己写的bi_k_means函数
"""center_list, cluster_assign = bikmeans.exe_bi_k_means(y_matrix, n_clusters)
labels = cluster_assign[:, 0]
mat_plot_cluster_sample(data, labels. method)
# 调用sklearn中的KMeans函数,效果比自己写的强了好多
k_means = KMeans(n_clusters)
k_means.fit(y_matrix)
#k_centers = k_means.cluster_centers_
#mat_plot_cluster_sample(data, k_means.labels_, method)
"""
# 调用sklearn中的hierarchical 聚类方法进行聚类
hie_cluster = Ward(n_clusters)
hie_cluster.fit(y_matrix)
mat_plot_cluster_sample(data, hie_cluster.labels_, method)
开发者ID:Yayong-guan,项目名称:mlcode,代码行数:49,代码来源:spectral_cluster.py
示例12: ward
def ward(self, X, n_clusters, plot=True):
k_means = Ward(n_clusters=n_clusters, copy=False, compute_full_tree=True, memory="cache")
k_means.fit(X)
labels = k_means.labels_
pl.close('all')
pl.figure(1)
pl.clf()
if plot:
colors = "rbgcmybgrcmybgrcmybgrcm" * 10
X2d = RandomizedPCA(n_components=2).fit_transform(X)
for i in xrange(len(X2d)):
x = X2d[i]
pl.plot(x[0], x[1], "o", markerfacecolor=colors[labels[i]], markeredgecolor=colors[labels[i]], alpha=0.035)
pl.show()
return k_means.labels_
开发者ID:zaycev,项目名称:n7,代码行数:18,代码来源:cluster.py
示例13: cluster_ward
def cluster_ward(self, calpha=True):
'''
cluster the positively predicted residues using the Ward method.
Returns a list of cluster labels the same length as the number of positively predicted residues.
'''
if calpha:
data_atoms = self.positive_surface_residues.ca
#else:
# data_atoms = self.positive_surface_residues.select('ca or sidechain').copy()
if data_atoms.getCoords().shape[0] < 4:
print self.pdbid, data_atoms.getCoords().shape
return {}
connectivity = kneighbors_graph(data_atoms.getCoords(), 5)
ward = Ward(n_clusters=self.WARD_N_CLUSTERS, connectivity=connectivity)
ward.fit(data_atoms.getCoords())
resnums = data_atoms.getResnums()
reslabels = ward.labels_
clusters = sorted([resnums[reslabels==i] for i in set(reslabels)], key=len, reverse=True)
return dict(enumerate(clusters))
开发者ID:asaladin,项目名称:peptalk,代码行数:20,代码来源:peptalk.py
示例14: compute_clusters
def compute_clusters(dataset,features_vector):
"""
Apply clustering method
"""
labels = dataset.target
true_k = np.unique(labels).shape[0]
# Run clustering method
print "Performing clustering with method ", cmd_options.clust_method.upper()
print
if(cmd_options.clust_method == "hclust"):
result = features_vector.toarray()
ward = Ward(n_clusters=true_k)
ward.fit(result)
return ward
if(cmd_options.clust_method == "kmeans"):
km = KMeans(n_clusters=true_k, init='k-means++', max_iter=1000, verbose=1)
km.fit(features_vector)
return km
开发者ID:arianpasquali,项目名称:textmining,代码行数:24,代码来源:text_clustering.py
示例15: ward
def ward(X, n_clust):
"H"
ward = Ward(n_clusters=n_clust)
ward.fit(X)
return ward
开发者ID:lixiangchun,项目名称:MCP05,代码行数:6,代码来源:utils.py
示例16: Ward
"""
Benchmark scikit-learn's Ward implement compared to SciPy's
"""
import time
import numpy as np
from scipy.cluster import hierarchy
import pylab as pl
from sklearn.cluster import Ward
ward = Ward(n_clusters=3)
n_samples = np.logspace(.5, 3, 9)
n_features = np.logspace(1, 3.5, 7)
N_samples, N_features = np.meshgrid(n_samples,
n_features)
scikits_time = np.zeros(N_samples.shape)
scipy_time = np.zeros(N_samples.shape)
for i, n in enumerate(n_samples):
for j, p in enumerate(n_features):
X = np.random.normal(size=(n, p))
t0 = time.time()
ward.fit(X)
scikits_time[j, i] = time.time() - t0
t0 = time.time()
hierarchy.ward(X)
scipy_time[j, i] = time.time() - t0
开发者ID:2011200799,项目名称:scikit-learn,代码行数:30,代码来源:bench_plot_ward.py
示例17: print
print("Homogeneity k-means: %0.3f" % metrics.homogeneity_score(labels, km.labels_))
print("Completeness k-means: %0.3f" % metrics.completeness_score(labels, km.labels_))
print("V-measure k-means: %0.3f" % metrics.v_measure_score(labels, km.labels_))
print("Silhouette Coefficient k-means: %0.3f" % metrics.silhouette_score(clustering, km.labels_, sample_size = 8000))
# DBSCAN
# Structured hierarchical clustering
db = DBSCAN()
db.fit(clustering)
print 'DBSCAN clusters created..'
print("Homogeneity DBSCAN: %0.3f" % metrics.homogeneity_score(labels, db.labels_))
print("Completeness DBSCAN: %0.3f" % metrics.completeness_score(labels, db.labels_))
print("V-measure DBSCAN: %0.3f" % metrics.v_measure_score(labels, db.labels_))
print("Silhouette Coefficient DBSCAN: %0.3f" % metrics.silhouette_score(clustering, db.labels_, sample_size = 5000))
# Structured hierarchical clustering
ward = Ward(n_clusters = 9)
ward.fit(clustering)
print 'Hierarchical clusters created..'
print("Homogeneity hierarchical: %0.3f" % metrics.homogeneity_score(labels, ward.labels_))
print("Completeness hierarchical: %0.3f" % metrics.completeness_score(labels, ward.labels_))
print("V-measure hierarchical: %0.3f" % metrics.v_measure_score(labels, ward.labels_))
print("Silhouette Coefficient hierarchical: %0.3f" % metrics.silhouette_score(clustering, ward.labels_, sample_size = 5000))
开发者ID:colibri17,项目名称:TextCategorization,代码行数:25,代码来源:main.py
示例18: hierarchical
def hierarchical(self, n_clusters):
ward = Ward(n_clusters=n_clusters)
return ward.fit_predict(sp.array(self.A))
开发者ID:gnavvy,项目名称:ParaIF,代码行数:3,代码来源:Control.py
示例19: encode
def encode(self, interm_rep, neighborhood_size = 26,
clust_ratio=10,
encoding='geometrical',
similarity_measure='pearson',
threshold=0.3, n_jobs=1, **kwds):
"""
Parameters
----------
interm_rep: IntermRep
IntermRep object containing the arr_xyz and arr_voxel matrixes.
neighborhood_size: int
Number of neighbors each voxel will be connected to.
clust_ratio: int
The number of clusters will be equal to n/clust_ratio, where n is
the number of voxels.
encoding: string
Type of encoding. 'geometrical' and 'functional' are allowed.
similarity_measure: string
Similarity measure used to compare the representative value of each
parcel (cluster). 'pearson' or the measures available in scikit-learn
are allowed.
threshold: float
Threshold applied to the similarity values in order to define the
edges in the graph.
Returns
-------
g: Graph
Networkx graph representing the graph encoding of the data.
"""
#computing the connectivity matrix, each voxel is connected to
#"neighborhood_size" neighbors.
#
conn = kneighbors_graph(interm_rep.arr_xyz, n_neighbors=neighborhood_size)
# conn_n = kneighbors_graph(interm_rep.arr_xyz, n_neighbors=neighborhood_size)
# conn_r = radius_neighbors_graph(interm_rep.arr_xyz, radius=10)
# conn = conn_n * conn_r
#Hierarchical clustering algorithm. The number of clusters is defined
#accoring to the parameter "clust_ratio".
ward = Ward(n_clusters=len(interm_rep.arr_xyz)/clust_ratio, connectivity=conn)
#ward = Ward(n_clusters=60, connectivity=conn)
#Type of encoding: geometrical (only xyz data is used) or
# functional (voxel time series is used).
if encoding=='geometrical':
ward.fit(interm_rep.arr_xyz)
elif encoding=='functional':
ward.fit(interm_rep.arr_voxels)
labels = ward.labels_
#Plotting the voxels with the cluster labels.
#pp.plot_clustering_intermediate_representation(interm_rep, labels*10)
#Computing the unique cluster indentifiers
l_unique = np.unique(labels)
mean_voxels = np.zeros((len(l_unique), interm_rep.arr_voxels.shape[1]))
mean_xyz = np.zeros((len(l_unique), interm_rep.arr_xyz.shape[1]))
cont = 0
for i in l_unique:
#Taking the possitions corresponding to the same cluster.
pos = np.where(labels == i)[0]
#Taking data from these possitions and computing the mean time serie
m_voxel = interm_rep.arr_voxels[pos].mean(0)
#Taking the xyz from these positions and computing the mean value
m_xyz = interm_rep.arr_xyz[pos].mean(0)
mean_voxels[cont] = m_voxel
mean_xyz[cont] = m_xyz
cont += 1
#plotting the voxels time series for each cluster
#pp.plot_interm_representation_time_series(ir.IntermRep(mean_voxels, mean_xyz))
#The new intermediate representation is given by mean_voxels and
# mean_xyz.
#Computing similarity matrix and applying the threshold
adj_mat = np.zeros((len(mean_voxels), len(mean_voxels)),
dtype = np.byte)
for j in range(len(mean_voxels) - 1):
for k in range(j + 1, len(mean_voxels)):
if similarity_measure == 'pearson':
aux = st.pearsonr(mean_voxels[j], mean_voxels[k])[0]
else:
aux = skpw.pairwise_kernel(mean_voxels[j], mean_voxels[k],
metric = similarity_measure,
n_jobs = n_jobs)
if aux >= threshold:
adj_mat[j,k] = 1
adj_mat[k,j] = 1
#.........这里部分代码省略.........
开发者ID:svegapons,项目名称:PyBDGK,代码行数:101,代码来源:GE_NeighConst_HCA.py
示例20: test_ward_clustering
def test_ward_clustering():
"""
Check that we obtain the correct number of clusters with Ward clustering.
"""
rnd = np.random.RandomState(0)
mask = np.ones([10, 10], dtype=np.bool)
X = rnd.randn(100, 50)
connectivity = grid_to_graph(*mask.shape)
clustering = Ward(n_clusters=10, connectivity=connectivity)
clustering.fit(X)
# test caching
clustering = Ward(n_clusters=10, connectivity=connectivity,
memory=mkdtemp())
clustering.fit(X)
labels = clustering.labels_
assert_true(np.size(np.unique(labels)) == 10)
# Turn caching off now
clustering = Ward(n_clusters=10, connectivity=connectivity)
# Check that we obtain the same solution with early-stopping of the
# tree building
clustering.compute_full_tree = False
clustering.fit(X)
np.testing.assert_array_equal(clustering.labels_, labels)
clustering.connectivity = None
clustering.fit(X)
assert_true(np.size(np.unique(clustering.labels_)) == 10)
# Check that we raise a TypeError on dense matrices
clustering = Ward(n_clusters=10,
connectivity=connectivity.todense())
assert_raises(TypeError, clustering.fit, X)
clustering = Ward(n_clusters=10,
connectivity=sparse.lil_matrix(
connectivity.todense()[:10, :10]))
assert_raises(ValueError, clustering.fit, X)
开发者ID:2011200799,项目名称:scikit-learn,代码行数:34,代码来源:test_hierarchical.py
注:本文中的sklearn.cluster.Ward类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论