本文整理汇总了Python中sklearn.cluster.spectral_clustering函数的典型用法代码示例。如果您正苦于以下问题:Python spectral_clustering函数的具体用法?Python spectral_clustering怎么用?Python spectral_clustering使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了spectral_clustering函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: ClusterBalance
def ClusterBalance(self, indexesToPick, stopCount, kmeansFlag=True):
print "ClusterBalancing..."
indexesPicked = []
obs1 = self.observations[indexesToPick]
obs = normalize(obs1, axis=0)
if len(indexesToPick) != 0:
if kmeansFlag:
if(len(indexesToPick) < self.numClusters):
cluster = KMeans(init='k-means++', n_clusters=len(obs), n_init=10)
else:
cluster = KMeans(init='k-means++', n_clusters=self.numClusters, n_init=10)
else:
if(len(indexesToPick) < self.numClusters):
cluster = spectral_clustering(n_clusters=len(obs), n_init=10)
else:
cluster = spectral_clustering(n_clusters=self.numClusters, n_init=10)
cluster.fit(obs)
labels = cluster.labels_
whenToStop = max(2, stopCount)
count = 0
while count != whenToStop:
cluster_list = range(self.numClusters)
index = 0
for j in labels:
if j in cluster_list:
indexesPicked.append(indexesToPick[index])
cluster_list.remove(j)
count += 1
if count == whenToStop:
break
labels[index] = -1
if len(cluster_list) == 0:
break
index += 1
return indexesPicked
开发者ID:koshkabb,项目名称:MitosisDetection,代码行数:35,代码来源:Balancer.py
示例2: test_spectral_clustering_with_arpack_amg_solvers
def test_spectral_clustering_with_arpack_amg_solvers():
# Test that spectral_clustering is the same for arpack and amg solver
# Based on toy example from plot_segmentation_toy.py
# a small two coin image
x, y = np.indices((40, 40))
center1, center2 = (14, 12), (20, 25)
radius1, radius2 = 8, 7
circle1 = (x - center1[0]) ** 2 + (y - center1[1]) ** 2 < radius1 ** 2
circle2 = (x - center2[0]) ** 2 + (y - center2[1]) ** 2 < radius2 ** 2
circles = circle1 | circle2
mask = circles.copy()
img = circles.astype(float)
graph = img_to_graph(img, mask=mask)
graph.data = np.exp(-graph.data / graph.data.std())
labels_arpack = spectral_clustering(
graph, n_clusters=2, eigen_solver='arpack', random_state=0)
assert len(np.unique(labels_arpack)) == 2
if amg_loaded:
labels_amg = spectral_clustering(
graph, n_clusters=2, eigen_solver='amg', random_state=0)
assert adjusted_rand_score(labels_arpack, labels_amg) == 1
else:
assert_raises(
ValueError, spectral_clustering,
graph, n_clusters=2, eigen_solver='amg', random_state=0)
开发者ID:manhhomienbienthuy,项目名称:scikit-learn,代码行数:33,代码来源:test_spectral.py
示例3: __init__
def __init__(self, laplacian,ncluster,classesnames):
self.laplacian = laplacian
self.ncluster = ncluster
m,n=laplacian.shape
print 'size Laplacian_matrix: ',m, n
labels = spectral_clustering(laplacian, n_clusters=ncluster)
x=range(n+1)
wordsall=zip(x, classesnames)
lc= zip(labels,x)
print "labels", lc
allwordsclustered=[]
for m in range(ncluster):
sort=[item[1] for item in lc if item[0] == m]
wordsclustered=[]
for y in sort:
for item in wordsall:
if item[0] == y:
wordsclustered.append(item[1])
if len(wordsclustered) >1:
allwordsclustered.append(wordsclustered)
print'clusteredwords'
print allwordsclustered
self.cluster= len(allwordsclustered),allwordsclustered
开发者ID:omedranoc,项目名称:ThesisPreprocessing,代码行数:28,代码来源:cluster.py
示例4: test_spectral_amg_mode
def test_spectral_amg_mode():
# Test the amg mode of SpectralClustering
centers = np.array([
[0., 0., 0.],
[10., 10., 10.],
[20., 20., 20.],
])
X, true_labels = make_blobs(n_samples=100, centers=centers,
cluster_std=1., random_state=42)
D = pairwise_distances(X) # Distance matrix
S = np.max(D) - D # Similarity matrix
S = sparse.coo_matrix(S)
try:
from pyamg import smoothed_aggregation_solver
amg_loaded = True
except ImportError:
amg_loaded = False
if amg_loaded:
labels = spectral_clustering(S, n_clusters=len(centers),
random_state=0, mode="amg")
# We don't care too much that it's good, just that it *worked*.
# There does have to be some lower limit on the performance though.
assert_greater(np.mean(labels == true_labels), .3)
else:
assert_raises(ValueError, spectral_embedding, S,
n_components=len(centers), random_state=0, mode="amg")
开发者ID:osdf,项目名称:scikit-learn,代码行数:26,代码来源:test_spectral.py
示例5: spectral
def spectral(tweetfile,npmifile,dictfile,k,noc):
Ptmp=textscan(npmifile,'([^ ]*) ([^ ]*) ([^ ]*)');
PP=textscan(dictfile,'(.*) (.*)',(int,str));
PP[0] -= 1
PMI=ssp.coo_matrix(
(Ptmp[2],(Ptmp[0]-1,Ptmp[1]-1)),
(PP[0].shape[0],PP[0].shape[0])
).tocsr();
W=knnmatrix(PMI,k);
# This is hidious and wrong and it must be fixed
W=ssp.csr_matrix(minimum(W.todense(),W.T.todense()))
s,comp = ssp.csgraph.connected_components(W,directed=False)
comp_mode = mstats.mode(comp)[0]
inds = comp==comp_mode
inds = [x for x in range(W.shape[0]) if inds[x]]
WW = W[inds,:][:,inds]
P=PP[1][inds];
ids = P;
X = WW;
c = spectral_clustering(X,n_clusters=noc, eigen_solver='arpack')
fid=file("".join(['cl.',tweetfile,'-',str(noc)]),'w');
for i in range(max(c)+1):
cl=[x for x in range(len(c)) if c[x] == i]
b,wordsix = centralityn(cl,X,ids);
for j in range(len(b)):
word=wordsix[j];
fid.write('%s %d %.5f\n'%(word,i,b[j]));
开发者ID:danielpreotiuc,项目名称:trendminer-clustering,代码行数:31,代码来源:spectral.py
示例6: getPairwiseDistanceMatrix
def getPairwiseDistanceMatrix(self):
"""
It is sloghtly slower but memory efficient, fast implementation is not tractable in terms of memory for such a scale
"""
self.clusters = []
dataSize = self.data_points.shape
self.PDistMat = sp.sparse.csr_matrix((dataSize[0],dataSize[0]))
for k in range(dataSize[0]):
CurrentPoint = self.data_points[k,:]
Dist = sp.spatial.distance.cdist(np.reshape(CurrentPoint,(1,dataSize[1])),self.data_points,'euclidean')
kMins = []
kDists = []
maxD = np.max(Dist)+1
while len(kMins)<5:
cMins = np.argmin(Dist)
kMins.append(cMins)
kDists.append(Dist[0,cMins])
Dist[0,cMins]=maxD
for pt in range(len(kMins)):
#print kMins[pt],k,self.PDistMat.shape,kDists[pt],pt,kDists
self.PDistMat[k,kMins[pt]]=kDists[pt]
self.PDistMat[kMins[pt],k]=kDists[pt]
SM=self.PDistMat.data.mean()
self.PDistMat.data[:] = np.exp(((-1)*self.PDistMat.data)/SM)
#Here we go a bit low-level and apply the e^(-1.x) to the data array
#self.PDistMat.data = np.exp((-1)*self.PDistMat.data)
pickle.dump(self.PDistMat,open('pdist.bnbb','wb'))
labs = spectral_clustering(self.PDistMat,n_clusters=20)
pickle.dump(labs,open('labs.bnbb','wb'))
开发者ID:caomw,项目名称:RecipeWatch,代码行数:31,代码来源:coCluster.py
示例7: classifySpeCluLsa
def classifySpeCluLsa(self, class_num):
from draw_data import draw_data
draw_title = draw_data()
lsa = models.LsiModel.load('model.lsa', mmap='r')
logging.info("load lsa model!!")
index = similarities.MatrixSimilarity.load('model_lsa.index')
self.get_data(num=3000)
(tfidf, dictionary) = self.get_tfidf(True, num=3000)
hash_id2list = dict() # 保存id -> 下标 similar_matrix中对应使用
for i in range(len(self.title_id)):
hash_id2list[self.title_id[i]] = i
logging.info('开始创建相似矩阵...')
similar_matrix = np.zeros((len(tfidf),len(tfidf))) #存放相似度
for i in range(len(tfidf)):
sims = index[lsa[tfidf[i]]]
for j,v in enumerate(sims):
similar_matrix[i][j] = v
similar_matrix[j][i] = v
logging.info('done,相似矩阵建立完成,使用普聚类进行分类...')
labels = spectral_clustering(similar_matrix, n_clusters=class_num, eigen_solver='arpack')
self.vector_table = [[] for i in range(class_num)]
for i in range(len(labels)):
self.vector_table[labels[i]].append(self.title_id[i])
logging.info("print set... "+str(len(self.vector_table)))
self.printTitleTOfile(hash_id2list)
draw_title.draw_topic(self.vector_table, 30, '2015-09-25', '2015-12-25')
开发者ID:yxzero,项目名称:qq_comment,代码行数:28,代码来源:classify_sc.py
示例8: community_clustering
def community_clustering():
path = settings.COMMUNITY_PATH
index = 0
communities = []
merged_communities = {}
for root, dirs, files in os.walk(path):
for year in files:
merged_communities[int(year)] = [[] for i in range(200)]
comm_dict = {}
input = open(os.path.join(path,year))
for line in input:
x = line.strip().split(' ')
author = int(x[0])
id = int(x[1])
if not comm_dict.has_key(id):
comm_dict[id] = Community(int(year),id,index)
index+=1
comm_dict[id].append_member(author)
for id in comm_dict.keys():
communities.append(comm_dict[id])
verbose.debug("num of communities: "+str(len(communities)))
adjacency = np.ndarray(shape=(len(communities),len(communities)), dtype=int)
for i in range(len(communities)):
for j in range(i+1,len(communities)):
affinity = communities[i].intersect(communities[j])
adjacency[i,j]=affinity
adjacency[j,i]=affinity
labels = spectral_clustering(adjacency, n_clusters = 200)
verbose.debug("clustering finished")
for i in range(len(labels)):
merged_communities[communities[i].year][labels[i]].extend(communities[i].members)
for year in merged_communities.keys():
cluster_file = open(settings.DATA_PATH+"\\clusters\\"+str(year), 'w')
for i in range(len(merged_communities[year])):
[cluster_file.write(str(member)+',') for member in merged_communities[year][i]]
开发者ID:neozhangthe1,项目名称:ringnet,代码行数:35,代码来源:cluster.py
示例9: speclu
def speclu(data_matrix, k):
#use spectral clustering
print 'using spectral clustering......'
E_matrix = getEMatrix(data_matrix)
result_total = spectral_clustering(E_matrix, n_clusters = k)
result = result_total[ : len(data_matrix)]
return result
开发者ID:chenzheng128,项目名称:evaluate_cluster,代码行数:7,代码来源:evaluate.py
示例10: compute
def compute(n):
G , nodes , ego = build_graph(n)
A = nx.to_numpy_matrix(G)
C = connectedness(A)
row , col = A.shape
if row >= 350:
clus = 10
else:
clus = 6
L = spectral_clustering(C , n_clusters = clus)
circles = []
for x in range(0,clus):
circles += [[]]
tmp = 0
for node in nodes:
circles[L[tmp]] += [node]
tmp += 1
final_circle = []
for circle in circles:
if len(circles) == 1:
final_circle += [circle]
continue
den = compute_density(circle , nodes , A)
if den + 1e-9 < .250:
continue
final_circle += [circle]
# print(final_circle)
return ego , final_circle
开发者ID:ark1790,项目名称:circles,代码行数:31,代码来源:preprocess.py
示例11: image_features_labels
def image_features_labels(img,n_clusters,maxPixel):
# X is the feature vector with one row of features per image
#
imageSize=maxPixel*maxPixel
img = resize(img, (maxPixel, maxPixel))
mask = img.astype(bool)
# Convert the image into a graph with the value of the gradient on the
# edges.
graph = s_im.img_to_graph(img, mask=mask)
# Take a decreasing function of the gradient: we take it weakly
# dependent from the gradient the segmentation is close to a voronoi
graph.data = np.exp(-graph.data / graph.data.std())
# Force the solver to be arpack, since amg is numerically
# unstable on this example
labels = spectral_clustering(graph, n_clusters, eigen_solver='arpack')
label_im = -np.ones(mask.shape)
label_im[mask] = labels
X=np.zeros(imageSize, dtype=float)
# Store the rescaled image pixels
X[0:imageSize] = np.reshape(label_im,(1, imageSize))
return X
开发者ID:kailex,项目名称:Bowl,代码行数:25,代码来源:Prepare_Features.py
示例12: cluster_nodes
def cluster_nodes(dist_laplacian, clusters=3, show=False):
norm_laplacian = Lapl_normalize(dist_laplacian)
norm_laplacian.setdiag(0)
norm_laplacian = -norm_laplacian
if show:
plt.imshow(norm_laplacian.toarray(), cmap='jet', interpolation="nearest")
plt.colorbar()
plt.show()
labels = spectral_clustering(norm_laplacian, n_clusters=clusters, eigen_solver='arpack')
return np.reshape(labels, (dist_laplacian.shape[0], 1))
开发者ID:chiffa,项目名称:chiffatools,代码行数:10,代码来源:Linalg_routines.py
示例13: cluster_spatial_data
def cluster_spatial_data(X, n_parcels, xyz=None, shape=None, mask=None,
method='ward', verbose=False):
"""Cluster the data using Ward's algorithm
Parameters
==========
X: array of shape(n_voxels, n_subjects)
the functional data, across subjects
n_parcels: int, the desired number of parcels
xyz: array of shape (n_voxels, 3), optional
positions of the voxels in grid coordinates
shape: tuple: the domain shape (assuming a grid structure), optional
alternative specification of positions
mask: arbitrary array of arbitrary dimension,optional
alternative specification of positions
method: string, one of ['ward', 'spectral', 'kmeans'], optional
clustering method
Returns
=======
label: array of shape(n_voxels): the resulting cluster assignment
Note
====
One of xyz, shape or mask needs to be provided
"""
from sklearn.cluster import spectral_clustering, k_means
if mask is not None:
connectivity = grid_to_graph(*shape, mask=mask)
elif shape is not None:
connectivity = grid_to_graph(*shape)
elif xyz is not None:
from sklearn.neighbors import kneighbors_graph
n_neighbors = 2 * xyz.shape[1]
connectivity = kneighbors_graph(xyz, n_neighbors=n_neighbors)
else:
raise ValueError('One of mask, shape or xyz has to be provided')
if n_parcels == 1:
return np.zeros(X.shape[0])
if method == 'ward':
connectivity = connectivity.tocsr()
ward = Ward(n_clusters=n_parcels, connectivity=connectivity).fit(X)
label = ward.labels_
elif method == 'spectral':
i, j = connectivity.nonzero()
sigma = np.sum((X[i] - X[j]) ** 2, 1).mean()
connectivity.data = np.exp(- np.sum((X[i] - X[j]) ** 2, 1) /
(2 * sigma))
label = spectral_clustering(connectivity, n_clusters=n_parcels)
elif method == 'kmeans':
_, label, _ = k_means(X, n_parcels)
else:
raise ValueError('Unknown method for parcellation')
return label
开发者ID:HaxbyLab,项目名称:frontiers_2014,代码行数:55,代码来源:mixed_effects_parcel.py
示例14: spectralClusteringTest01
def spectralClusteringTest01():
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction import image
from sklearn.cluster import spectral_clustering
l = 100
x,y = np.indices((l, l)) #x,y 都是二维矩阵, 表示了某点的x 和 y的坐标
center1 = (28, 24)
center2 = (40, 50)
center3 = (67, 58)
center4 = (24, 70)
radius1, radius2, radius3, radius4 = 16, 14, 15, 14
circle1 = (x - center1[0]) ** 2 + (y - center1[1]) ** 2 < radius1 ** 2
circle2 = (x - center2[0]) ** 2 + (y - center2[1]) ** 2 < radius2 ** 2
circle3 = (x - center3[0]) ** 2 + (y - center3[1]) ** 2 < radius3 ** 2
circle4 = (x - center4[0]) ** 2 + (y - center4[1]) ** 2 < radius4 ** 2
img = circle1 + circle2 + circle3 + circle4
mask = img.astype(bool)
img = img.astype(float)
img += 1 + 0.2 * np.random.randn(*img.shape)
#Convert the image into a graph with the value of the gradient on the edges
#img就是一个100 * 100的图片
#mask是一个bool型的100 * 100模板
#graph是一个稀疏矩阵 -- 不过为什么是2678 * 2678 ?
#估计这一步里面计算了梯度
graph = image.img_to_graph(img, mask = mask)
print graph.shape
graph.data = np.exp(-graph.data / graph.data.std())
#这里还是指定了聚类的中心数目
#这里是只对mask内的点进行聚类
labels = spectral_clustering(graph, n_clusters = 4, eigen_solver = "arpack")
print labels
label_im = -np.ones(mask.shape)
label_im[mask] = labels
plt.matshow(img)
plt.matshow(label_im)
plt.show()
开发者ID:hyliu0302,项目名称:scikit-learn-notes,代码行数:55,代码来源:myScikitLearnFcns.py
示例15: cluster_and_rank_demos
def cluster_and_rank_demos(sm, n_clusters, eigen_solver='arpack', assign_labels='discretize'):
"""
Clusters demos based on similarity matrix.
"""
labels = spectral_clustering(sm, n_clusters = n_clusters, eigen_solver=eigen_solver,assign_labels=assign_labels)
clusters = {i:[] for i in xrange(n_clusters)}
for i,l in enumerate(labels):
clusters[l].append(i)
# Maybe re-cluster large demos
return rank_demos_in_cluster(clusters, sm)
开发者ID:rishabh-battulwar,项目名称:human_demos,代码行数:11,代码来源:cluster_demos.py
示例16: spectralcluster
def spectralcluster(correlations,n_clusters,names):
labels=cluster.spectral_clustering(correlations,n_clusters=n_clusters, eigen_solver=None, random_state=0, n_init=10, k=None, eigen_tol=0.0,
assign_labels='kmeans', mode=None)
#print labels
clusdict=[]
print ""
print "Spectral Clustering - shape: " + str(correlations.shape)
for i in range(labels.max()+1):
print 'Cluster %i: %s' % ((i+1),', '.join(names[labels==i]))
clusdict.append(names[labels==i])
#print clusdict
return clusdict
开发者ID:winteram,项目名称:FashionPins,代码行数:12,代码来源:spectralcluster.py
示例17: test_spectral_lobpcg_mode
def test_spectral_lobpcg_mode():
# Test the lobpcg mode of SpectralClustering
# We need a fairly big data matrix, as lobpcg does not work with
# small data matrices
centers = np.array([[0.0, 0.0], [10.0, 10.0]])
X, true_labels = make_blobs(n_samples=100, centers=centers, cluster_std=0.1, random_state=42)
D = pairwise_distances(X) # Distance matrix
S = np.max(D) - D # Similarity matrix
labels = spectral_clustering(S, n_clusters=len(centers), random_state=0, eigen_solver="lobpcg")
# We don't care too much that it's good, just that it *worked*.
# There does have to be some lower limit on the performance though.
assert_greater(np.mean(labels == true_labels), 0.3)
开发者ID:smorfopoulou,项目名称:viral_denovo_pipeline,代码行数:12,代码来源:test_spectral.py
示例18: affin_sclustering
def affin_sclustering(X,n_clust, distance='euclid', gamma=0.1, std=1):
print 'Basic spectral clustering using affinity matrix'
if distance=='cosine':
similarity=cos(X)#pairwise_distances(X, metric='cosine')
elif distance=='euclid':
dist=euclidean_distances(X)
if std:
similarity = np.exp(-gamma * dist/dist.std())
else:
similarity = np.exp(-gamma * dist)
labels = cluster.spectral_clustering(similarity,n_clusters=n_clust, eigen_solver='arpack')
return labels
开发者ID:ivri,项目名称:DiffVec,代码行数:12,代码来源:cluster.py
示例19: __speclu
def __speclu(self):
#use spectral clustering
print 'using spectral clustering......'
data_matrix = self.data_matrix
if len(data_matrix) == len(data_matrix[0]):
print "Donot need to use E_matrix"
E_matrix = data_matrix
else:
E_matrix = self.__getEMatrix()
result_total = spectral_clustering(E_matrix, n_clusters = self.k)
result = result_total[ : len(data_matrix)]
return result
开发者ID:chenzheng128,项目名称:evaluate_cluster,代码行数:12,代码来源:evaluate_class.py
示例20: test_spectral_clustering
def test_spectral_clustering(self):
N = 50
m = np.random.random_integers(1, 200, size=(N, N))
m = (m + m.T) / 2
df = pdml.ModelFrame(m)
result = df.cluster.spectral_clustering(random_state=self.random_state)
expected = cluster.spectral_clustering(m, random_state=self.random_state)
self.assertTrue(isinstance(result, pdml.ModelSeries))
self.assert_index_equal(result.index, df.index)
self.assert_numpy_array_equal(result.values, expected)
开发者ID:Sandy4321,项目名称:pandas-ml,代码行数:12,代码来源:test_cluster.py
注:本文中的sklearn.cluster.spectral_clustering函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论