本文整理汇总了Python中sklearn.cluster.k_means函数的典型用法代码示例。如果您正苦于以下问题:Python k_means函数的具体用法?Python k_means怎么用?Python k_means使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了k_means函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_k_means_function
def test_k_means_function():
# test calling the k_means function directly
# catch output
old_stdout = sys.stdout
sys.stdout = StringIO()
try:
cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters,
verbose=True)
finally:
sys.stdout = old_stdout
centers = cluster_centers
assert_equal(centers.shape, (n_clusters, n_features))
labels = labels
assert_equal(np.unique(labels).shape[0], n_clusters)
# check that the labels assignements are perfect (up to a permutation)
assert_equal(v_measure_score(true_labels, labels), 1.0)
assert_greater(inertia, 0.0)
# check warning when centers are passed
with warnings.catch_warnings(record=True) as w:
k_means(X, n_clusters=n_clusters, init=centers)
assert_equal(len(w), 1)
# to many clusters desired
assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1)
开发者ID:Big-Data,项目名称:scikit-learn,代码行数:27,代码来源:test_k_means.py
示例2: kmeans_analysis
def kmeans_analysis(G):
block = nx.get_node_attributes(G,'block').values()
xA, xL = get_embedding(G,2)
cA,kmA,_ = k_means(xA,2)
cB,kmL,_ = k_means(xL,2)
# plt.subplot(221); plt.scatter(xA[:,0],xA[:,1],c=block)
# plt.subplot(222); plt.scatter(xA[:,0],xA[:,1],c=kmA)
# plt.subplot(223); plt.scatter(xL[:,0],xL[:,1],c=block)
# plt.subplot(224); plt.scatter(xL[:,0],xL[:,1],c=kmL)
ax = plt.subplot(121); plt.scatter(xA[:,0],xA[:,1],c=block,marker='x')
ax.set_aspect('equal','datalim')
lim = plt.axis()
a = cA[0,:]-cA[1,:]
a = np.array([1, -a[0]/a[1]])
b = np.mean(cA,axis=0)
x = np.array([b+a,b-a])
plt.plot(x[:,0],x[:,1],'k--',linewidth=1)
plt.axis(lim)
ax = plt.subplot(122); plt.scatter(xL[:,0],xL[:,1],c=block,marker='x')
ax.set_aspect('equal','datalim')
lim = plt.axis()
a = cB[0,:]-cB[1,:]
a = np.array([1, -a[0]/a[1]])
b = np.mean(cB,axis=0)
x = np.array([b+a,b-a])
plt.plot(x[:,0],x[:,1],'k--',linewidth=1)
plt.axis(lim)
compare_results(block,kmA,kmL)
_,kmA,_ = k_means(xA,5)
_,kmL,_ = k_means(xL,5)
print "ALL FIVE"
num_diff = vn.num_diff_w_perms(block, kmA)
ari = adjusted_rand_score(block,kmA)
print "Adjacency: num error="+repr(num_diff)+" ari="+repr(ari)
num_diff = vn.num_diff_w_perms(block, kmL)
ari = adjusted_rand_score(block,kmL)
print "Laplacian: num error="+repr(num_diff)+" ari="+repr(ari)
开发者ID:dpmcsuss,项目名称:stfpSim,代码行数:48,代码来源:wikigraph.py
示例3: getCenteroidsByGapStats
def getCenteroidsByGapStats(dataToCluster, maxCluster):
# plot data
xminData = np.min(dataToCluster[:,0])
xmaxData = np.max(dataToCluster[:,0])
yminData = np.min(dataToCluster[:,1])
ymaxData = np.max(dataToCluster[:,1])
numOfClusterRuns = maxCluster - 1
sumSqMetricSave = np.zeros((1, numOfClusterRuns))
wksMetricSave = np.zeros((1, numOfClusterRuns))
wkbsMetricSave = np.zeros((1, numOfClusterRuns))
kMetricSave = np.zeros((1, numOfClusterRuns), dtype=np.int32)
skMetricSave = np.zeros((1, numOfClusterRuns))
centeroidSave = []
labelSave = []
for clusterRun in xrange(1, maxCluster):
centroids, labels, inertia = k_means(dataToCluster, n_clusters = clusterRun)
centeroidSave.append(centroids)
labelSave.append(labels)
kMetricSave[0, clusterRun-1] = clusterRun
# calculate gap stattistics for selecting the number of clusters
tempVar = calculateWk(centroids, labels, dataToCluster)
sumSqMetricSave[0, clusterRun-1] = tempVar
wksMetricSave[0, clusterRun-1] = np.log(tempVar)
# ref data set
bRef = 10
BWkbs = np.zeros((1, bRef))
for iRun in xrange(bRef):
refData = np.zeros_like(dataToCluster)
for dataRun in xrange(dataToCluster.shape[0]):
refData[dataRun,:] = np.array([np.random.uniform(xminData ,xmaxData), np.random.uniform(yminData, ymaxData)])
centroidsRef, labelsRef, inertiaRef = k_means(refData, n_clusters = clusterRun)
BWkbs[0, iRun] = np.log(calculateWk(centroidsRef, labelsRef, refData))
wkbsMetricSave[0, clusterRun-1] = np.sum(BWkbs)/float(bRef)
skMetricSave[0, clusterRun-1] = np.sqrt(np.sum((BWkbs - wkbsMetricSave[0, clusterRun-1])**2)/float(bRef))
skMetricSave = skMetricSave*np.sqrt(1 + 1/float(bRef))
# gap statistics
gap = (wkbsMetricSave - wksMetricSave)
gap= gap.reshape(1, -1)
finalMetric = np.zeros((1, numOfClusterRuns))
for iRun in xrange(1, maxCluster-1):
# gapofk-gapofk+1-sk
finalMetric[0, iRun-1] = gap[0, iRun-1] - (gap[0, iRun] - skMetricSave[0, iRun])
indeNonZero = np.where(finalMetric>0)[1]
selectIndex = np.min(indeNonZero)
# final clustering pics
selectCenteroids = np.array(centeroidSave[selectIndex])
selectLabels = np.array(labelSave[selectIndex])
return selectCenteroids
开发者ID:jgera,项目名称:masterThesis,代码行数:48,代码来源:mserHelper.py
示例4: check_cluster
def check_cluster(cluster):
n = len(cluster)
if n < 2:
return True, []
# Run k_means on two centers
children, labels, _ = k_means(cluster, 2)
# Let v = c1 - c2 be a d-dimensional vector that connects the two centers. This is the direction that k-means
# believes to be important for clustering.
v = children[1]-children[0]
# Then project X onto v: x'i = hxi, vi/||v||2. X0 is a 1-dimensional
# representation of the data projected onto v.
x_prime = [np.dot(point, v) for point in cluster]
# Transform X0 so that it has mean 0 and variance 1.
x_prime = zscore(x_prime)
# Let zi = F(x0(i)). If A2*(Z) is in the range of non-critical values at confidence level alpha, then accept H0,
# keep the original center, and discard {c1, c2}. Otherwise, reject H0 and keep {c1, c2} in place of the original
# center.
a2, critical, sig = anderson(x_prime)
a2 *= (1+4.0/n-25.0/(n**2))
return a2 < critical[0], children
开发者ID:armsnyder,项目名称:SickBeetz,代码行数:26,代码来源:clusterer.py
示例5: main
def main():
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
n_centers = 3
X, y = make_blobs(n_samples=1000, centers=n_centers, n_features=2,
cluster_std=0.7, random_state=0)
# Run this K-Means
import kmeans
t0 = time.time()
y_pred, centers, obj_val_seq = kmeans.kmeans(X, n_centers)
t1 = time.time()
print("Final obj val: {}".format(obj_val_seq[-1]))
print("Time taken (this implementation): {}".format(t1 - t0))
# Run scikit-learn's K-Means
from sklearn.cluster import k_means
t0 = time.time()
centers, y_pred, obj_val = k_means(X, n_centers, random_state=0)
t1 = time.time()
print("Final obj val: {}".format(obj_val))
print("Time taken (Scikit, 1 job): {}".format(t1 - t0))
# Plot change in objective value over iteration
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(obj_val_seq, 'b-', marker='*')
fig.suptitle("Change in K-means objective value across iterations")
ax.set_xlabel("Iteration")
ax.set_ylabel("Objective value")
fig.show()
# Plot data
from itertools import cycle
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
fig = plt.figure(figsize=plt.figaspect(0.5)) # Make twice as wide to accomodate both plots
ax = fig.add_subplot(121)
ax.set_title("Data with true labels and final centers")
for k, color in zip(range(n_centers), colors):
ax.plot(X[y==k, 0], X[y==k, 1], color + '.')
initial_centers = kmeans.init_centers(X, n_centers, 2) # This is valid because we always use the same random seed.
# Plot initial centers
for x in initial_centers:
ax.plot(x[0], x[1], "mo", markeredgecolor="k", markersize=8)
# Plot final centers
for x in centers:
ax.plot(x[0], x[1], "co", markeredgecolor="k", markersize=8)
# Plot assignments
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
ax = fig.add_subplot(122)
ax.set_title("Data with final assignments")
for k, color in zip(range(n_centers), colors):
ax.plot(X[y_pred==k, 0], X[y_pred==k, 1], color + '.')
fig.tight_layout()
fig.gca()
fig.show()
开发者ID:lightalchemist,项目名称:ML-algorithms,代码行数:60,代码来源:kmeans.py
示例6: k_means_classifier
def k_means_classifier(image):
n_clusters = 8
# blur and take local maxima
blur_image = gaussian(image, sigma=8)
blur_image = ndi.maximum_filter(blur_image, size=3)
# get texture features
feats = local_binary_pattern(blur_image, P=40, R=5, method="uniform")
feats_r = feats.reshape(-1, 1)
# cluster the texture features
km = k_means(n_clusters=n_clusters, batch_size=500)
clus = km.fit(feats_r)
# copy relevant attributes
labels = clus.labels_
clusters = clus.cluster_centers_
# reshape label arrays
labels = labels.reshape(blur_image.shape[0], blur_image.shape[1])
# segment shadow
img = blur_image.ravel()
shadow_seg = img.copy()
for i in range(0, n_clusters):
# set up array of pixel indices matching cluster
mask = np.nonzero((labels.ravel() == i) == True)[0]
if len(mask) > 0:
thresh = threshold_otsu(img[mask])
shadow_seg[mask] = shadow_seg[mask] < thresh
shadow_seg = shadow_seg.reshape(*image.shape)
return shadow_seg
开发者ID:charlienewey,项目名称:shadow-detection-notebook,代码行数:34,代码来源:shadow.py
示例7: kmeans_estimate
def kmeans_estimate(x):
labels = k_means(x, 2)[1]
# print labels
cluster = [[], []]
for i in xrange(len(labels)):
cluster[labels[i]].append(x[i])
cluster[0] = np.array(cluster[0])
cluster[1] = np.array(cluster[1])
pi = len(cluster[0]) * 1.0 / len(labels)
pis = [pi, 1 - pi]
means = []
sigmas = []
for i in xrange(2):
curr = cluster[i]
mean = np.average(curr, axis=0)
means.append(mean)
sigma = np.zeros((2, 2))
for i in xrange(len(curr)):
y = np.reshape(curr[i] - mean, (2, 1))
sigma += np.dot(y, y.T)
sigma /= len(curr)
sigmas.append(sigma)
pis = np.array(pis)
means = np.array(means)
sigmas = np.array(sigmas)
print means
return pis, means, sigmas
开发者ID:wbcustc,项目名称:ModernAnalysis,代码行数:29,代码来源:EM_algorithm.py
示例8: clust
def clust(X,vectorfile,clust_file,clust_number):
fid2fname = {}
for line in open(vectorfile) :
line = line.strip().split('\t')
fid2fname.setdefault(int(line[0]), line[1:])
# Force the solver to be arpack, since amg is numerically
# unstable on this example
# labels = spectral_clustering(graph, n_clusters=160, eigen_solver='arpack')
a, labels,inertia = cluster.k_means(X,n_clusters=clust_number)
print ("inertia=",inertia)
C=np.column_stack((X,labels))
easy_data=open(clust_file, 'w')
for pnt1 in C :
strx=""
for charest in pnt1:
strx+=str(int(charest))+"\t"
print >> easy_data, strx
easy_data.close()
return inertia
开发者ID:DvHuang,项目名称:Wifi_Clust,代码行数:25,代码来源:b.py
示例9: KMeansCluster
def KMeansCluster(matrix):
"""
Performs the K-Means cluster given a matrix of data
@param[in]: matrix, List of List(s)
"""
# Possibly need to scale the data first
data = scale(matrix)
# Approximate the number of clusters using c = root(n/2)
# num_clusters = int(sqrt(len(matrix) / 2))
num_clusters = 5
number_init = 10 # Default
number_iter = 300
num_cpus = 2
print "==================="
print "Training KMeans with (num_clusters, num_init, num_iters, num_cpus)"
print num_clusters, number_init, number_iter, num_cpus
# estimator = KMeans(init='k-means++', n_clusters = num_clusters, n_init = number_init)
# estimator.fit(data)
# clusters = k_means(data, n_clusters = num_clusters, max_iter=number_iter, n_init = number_iter,
# init='k-means++', n_jobs = num_cpus)
clusters = k_means(data, n_clusters = num_clusters, max_iter=number_iter, n_init = number_iter, n_jobs = num_cpus)
return clusters
开发者ID:zezhouliu,项目名称:leagueoflegends-analysis,代码行数:28,代码来源:kmeans_trainer.py
示例10: spectral_clustering
def spectral_clustering(road_map=None, a=None, use_ncut=False, num_clusters=2):
print("Building adjacency matrix")
if(a==None):
a = build_adjacency_matrix(road_map)
print("Computing laplacian")
l = build_laplacian(a, normalize=use_ncut)
print("Spectral embedding")
#e_vals, e_vects = eigsh(l, k=num_clusters, which='SM', tol=0.01, sigma=2.01)
X = np.random.rand(l.shape[0], num_clusters+1)
e_vals, e_vects = lobpcg(l, X, tol=1e-15,
largest=False, maxiter=2000)
embedded_data = e_vects[:,1:]
print e_vals
print("Clustering")
centroid, label, intertia = k_means(embedded_data, num_clusters)
for i in xrange(len(label)):
road_map.nodes[i].region_id = label[i]
开发者ID:Lab-Work,项目名称:taxisim,代码行数:27,代码来源:partition_graph.py
示例11: clusters
def clusters(n = 100):
from sklearn.cluster import k_means
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
from matplotlib.patches import PathPatch
from mapFuncts import USbasemap
from matplotlib import cm, colors, colorbar
fig = plt.figure()
ax = fig.add_subplot(111)
fig, ax, m = USbasemap(fig = fig, ax = ax)
alldivDF, _, _, _, _ = seasonal_setup(season = 'MAM')
centroid, label, inertia = k_means(alldivDF.T, n_clusters = n, init = 'k-means++')
names = alldivDF.columns
codes = reverseStates(importStates())
cmap = cm.Accent
norm = colors.Normalize(vmin = 0, vmax = n, clip = True)
mapper = cm.ScalarMappable(norm = norm, cmap = cmap)
for cluster in range(n):
idx = label==cluster
patches = []
color = mapper.to_rgba(cluster)
for name in names[idx]:
divcode = codes[name[:-3]]+name[-2:]
print name, divcode
for info, shape in zip(m.divs_info, m.divs):
if info['CLIMDIV']==int(divcode):
patches.append(Polygon(np.array(shape),True))
ax.add_collection(PatchCollection(patches, facecolor = color, edgecolor='k', linewidths=1., zorder=2))
return fig, ax
开发者ID:bgzimmerman,项目名称:nipa,代码行数:32,代码来源:one_month_fcst.py
示例12: example_4
def example_4():
"""
compare to scikitlearn implementation of kmeans
"""
import sklearn.cluster as skc
import time
ndata = 50000
dimension = 10
ncentroids = 1000
data = npr.randn(ndata, dimension).astype(np.float64)
centroids0 = data[0:ncentroids, :]
t0 = time.time()
kmeans.get_clustering(X = data, init = centroids0, n_clusters = ncentroids, algorithm = 'auto', verbose = 1, n_threads = 1)
t1 = time.time()
sklearner = skc.k_means(X = data, n_clusters = ncentroids, max_iter = 1000, n_init = 1, init = centroids0, precompute_distances = False, verbose = True, n_jobs = 1, return_n_iter = True, tol = 0.0)
t2 = time.time()
kmeans_time = t1 - t0
sklearner_time = t2 - t1
print "sklearn : ", sklearner_time, " s"
print "this kmeans: ", kmeans_time, " s"
开发者ID:evelynmitchell,项目名称:eakmeans,代码行数:27,代码来源:examples.py
示例13: test_k_means_function
def test_k_means_function():
# test calling the k_means function directly
# catch output
old_stdout = sys.stdout
sys.stdout = StringIO()
try:
cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters,
sample_weight=None,
verbose=True)
finally:
sys.stdout = old_stdout
centers = cluster_centers
assert_equal(centers.shape, (n_clusters, n_features))
labels = labels
assert_equal(np.unique(labels).shape[0], n_clusters)
# check that the labels assignment are perfect (up to a permutation)
assert_equal(v_measure_score(true_labels, labels), 1.0)
assert_greater(inertia, 0.0)
# check warning when centers are passed
assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters,
sample_weight=None, init=centers)
# to many clusters desired
assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1,
sample_weight=None)
# kmeans for algorithm='elkan' raises TypeError on sparse matrix
assert_raise_message(TypeError, "algorithm='elkan' not supported for "
"sparse input X", k_means, X=X_csr, n_clusters=2,
sample_weight=None, algorithm="elkan")
开发者ID:daniel-perry,项目名称:scikit-learn,代码行数:33,代码来源:test_k_means.py
示例14: smythEmissionDistribution
def smythEmissionDistribution(pair):
"""
Given a pair (S: list of sequences, target_m: int), get the emission
distribution for Smyth's "default" HMM. target_m is an upper bound on the
number of states -- if we can only have m' distinct observation values, then
the distribution for a m' state HMM is returned.
@param pair: A tuple of the form (S: list of sequences, m: int)
@return: The corresponding emission distribution encoded as a list
of (mu, stddev) pairs
"""
S, target_m = pair
merged, distinct = prepareSeqs(S)
m_prime = min(target_m, len(distinct))
centroids, labels, inertia = k_means(merged, m_prime, init='k-means++')
clusters = partition(merged, labels)
B = []
has_zero = False
for cluster in clusters:
assert len(cluster) > 0
mu = mean(cluster)
stddev = std(cluster)
B.append((mu, stddev))
if stddev < 0.001:
has_zero = True
return (B, labels, has_zero)
开发者ID:japplebaum,项目名称:thesis,代码行数:26,代码来源:smyth.py
示例15: twoClassTrain
def twoClassTrain(self,data1,data2):
#y is a N * 1 matrix
data = data1 + data2
y = [[1 if d[0] == data1[0][0] else -1] for d in data]
X = [d[1:] for d in data]
centroid, label, inertia = cluster.k_means(X,self.k)
phi = self.genTransferMatrix(X,centroid)
w = numpy.dot(numpy.linalg.pinv(phi),y)
return w, centroid
开发者ID:raychin4563,项目名称:MLFinal,代码行数:9,代码来源:RBFtry2.py
示例16: kmeans
def kmeans(xs, k):
assert xs.ndim == 2
try:
from sklearn.cluster import k_means
_, labels, _ = k_means(xs.astype("float64"), k)
except ImportError:
from scipy.cluster.vq import kmeans2
_, labels = kmeans2(xs, k, missing='raise')
return labels
开发者ID:AnishShah,项目名称:keras,代码行数:9,代码来源:neural_doodle.py
示例17: initial_means
def initial_means(self,X,n_clusters):
mean_KMeans_initial,label, intertia = k_means(X, n_clusters, random_state = 1)
KernelKMeans_model = KernelKMeans(n_clusters=n_clusters, random_state=1,
kernel="rbf", gamma=None, coef0=1,
verbose=0)
KernelKMeans_model.fit(X)
mean_KernelKMeans_initial = self.pre_image(X, KernelKMeans_model.labels_, KernelKMeans_model.gamma, n_clusters, 100)
return mean_KMeans_initial , mean_KernelKMeans_initial
开发者ID:Joaggi,项目名称:Machine-Learning,代码行数:9,代码来源:GaussianExperiment.py
示例18: learn
def learn(self, Xtrain, ytrain):
""" Learns using the traindata """
Xless = Xtrain[:,self.features]
lam = 100
self.centers = cluster.k_means(Xless, 10)[0]
Xless = np.dot(Xless, self.centers.T)
self.weights = np.dot(np.dot(np.linalg.inv(np.dot(Xless.T,Xless) + lam*np.identity(Xless.shape[1])), Xless.T),ytrain)
开发者ID:PawPatel,项目名称:Learning,代码行数:9,代码来源:P2algorithms.py
示例19: get_point_centroids
def get_point_centroids(indata,K,D):
mean = numpy.zeros((indata.shape[1],D))
for n in xrange(0,(indata.shape[1])):
for i in xrange(0,(indata.shape[2])):
for j in xrange(0,D):
mean[n][j] = mean[n][j] + indata[j][n][i]
mean[n] = mean[n]/(indata.shape[2])
(centroids,x,y)=k_means(mean,K) #random order. change n_jobs to speed up
return centroids
开发者ID:Deepak-Prashanth,项目名称:dhmm-gesture,代码行数:9,代码来源:HMM.py
示例20: gap
def gap(data, refs=None, nrefs=20, ks=range(1,11)):
"""
I: NumPy array, reference matrix, number of reference boxes, number of clusters to test
O: Gaps NumPy array, Ks input list
Give the list of k-values for which you want to compute the statistic in ks. By Gap Statistic
from Tibshirani, Walther.
"""
shape = data.shape
if not refs:
tops = data.max(axis=0)
bottoms = data.min(axis=0)
dists = scipy.matrix(scipy.diag(tops - bottoms))
rands = scipy.random.random_sample(size=(shape[0], shape[1], nrefs))
for i in range(nrefs):
rands[:, :, i] = rands[:, :, i] * dists + bottoms
else:
rands = refs
gaps = scipy.zeros((len(ks),))
for (i,k) in enumerate(ks):
k_means_args_dict['n_clusters'] = k
kmeans = k_means(**k_means_args_dict)
kmeans.fit(data)
(cluster_centers, point_labels) = kmeans.cluster_centers_, kmeans.labels_
disp = sum([dst(data[current_row_index, :], cluster_centers[point_labels[current_row_index],:]) for current_row_index in range(shape[0])])
refdisps = scipy.zeros((rands.shape[2],))
for j in range(rands.shape[2]):
kmeans = k_means(**k_means_args_dict)
kmeans.fit(rands[:, : ,j])
(cluster_centers, point_labels) = kmeans.cluster_centers_, kmeans.labels_
refdisps[j] = sum([dst(rands[current_row_index,:,j], cluster_centers[point_labels[current_row_index],:]) for current_row_index in range(shape[0])])
#let k be the index of the array 'gaps'
gaps[i] = scipy.mean(scipy.log(refdisps)) - scipy.log(disp)
return ks, gaps
开发者ID:asharma567,项目名称:cool_tools,代码行数:43,代码来源:gap_statistic_implementation.py
注:本文中的sklearn.cluster.k_means函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论