• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python cluster.k_means函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.cluster.k_means函数的典型用法代码示例。如果您正苦于以下问题:Python k_means函数的具体用法?Python k_means怎么用?Python k_means使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了k_means函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_k_means_function

def test_k_means_function():
    # test calling the k_means function directly
    # catch output
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters,
                                                   verbose=True)
    finally:
        sys.stdout = old_stdout
    centers = cluster_centers
    assert_equal(centers.shape, (n_clusters, n_features))

    labels = labels
    assert_equal(np.unique(labels).shape[0], n_clusters)

    # check that the labels assignements are perfect (up to a permutation)
    assert_equal(v_measure_score(true_labels, labels), 1.0)
    assert_greater(inertia, 0.0)

    # check warning when centers are passed
    with warnings.catch_warnings(record=True) as w:
        k_means(X, n_clusters=n_clusters, init=centers)
        assert_equal(len(w), 1)

    # to many clusters desired
    assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1)
开发者ID:Big-Data,项目名称:scikit-learn,代码行数:27,代码来源:test_k_means.py


示例2: kmeans_analysis

def kmeans_analysis(G):
    block = nx.get_node_attributes(G,'block').values()  
    
    xA, xL = get_embedding(G,2)
    
    cA,kmA,_ = k_means(xA,2)
    cB,kmL,_ = k_means(xL,2)
    
#    plt.subplot(221); plt.scatter(xA[:,0],xA[:,1],c=block)
#    plt.subplot(222); plt.scatter(xA[:,0],xA[:,1],c=kmA)
#    plt.subplot(223); plt.scatter(xL[:,0],xL[:,1],c=block)
#    plt.subplot(224); plt.scatter(xL[:,0],xL[:,1],c=kmL)

    ax = plt.subplot(121); plt.scatter(xA[:,0],xA[:,1],c=block,marker='x')
    ax.set_aspect('equal','datalim')
    lim = plt.axis()
    a = cA[0,:]-cA[1,:]
    a = np.array([1, -a[0]/a[1]])
    b = np.mean(cA,axis=0)
    x = np.array([b+a,b-a])
    plt.plot(x[:,0],x[:,1],'k--',linewidth=1)
    plt.axis(lim)
    
    ax = plt.subplot(122); plt.scatter(xL[:,0],xL[:,1],c=block,marker='x')
    ax.set_aspect('equal','datalim')
    lim = plt.axis()
    a = cB[0,:]-cB[1,:]
    a = np.array([1, -a[0]/a[1]])
    b = np.mean(cB,axis=0)
    x = np.array([b+a,b-a])
    plt.plot(x[:,0],x[:,1],'k--',linewidth=1)
    plt.axis(lim)
    
    
    
    compare_results(block,kmA,kmL)
    
    _,kmA,_ = k_means(xA,5)
    _,kmL,_ = k_means(xL,5)
    
    print "ALL FIVE"
    num_diff = vn.num_diff_w_perms(block, kmA)
    ari = adjusted_rand_score(block,kmA)
    print "Adjacency: num error="+repr(num_diff)+" ari="+repr(ari)
    
    num_diff = vn.num_diff_w_perms(block, kmL)
    ari = adjusted_rand_score(block,kmL)
    print "Laplacian: num error="+repr(num_diff)+" ari="+repr(ari)
开发者ID:dpmcsuss,项目名称:stfpSim,代码行数:48,代码来源:wikigraph.py


示例3: getCenteroidsByGapStats

def getCenteroidsByGapStats(dataToCluster, maxCluster):
    # plot data
    xminData = np.min(dataToCluster[:,0])
    xmaxData = np.max(dataToCluster[:,0])
    yminData = np.min(dataToCluster[:,1])
    ymaxData = np.max(dataToCluster[:,1])
    numOfClusterRuns = maxCluster - 1
    sumSqMetricSave = np.zeros((1, numOfClusterRuns))
    wksMetricSave = np.zeros((1, numOfClusterRuns))
    wkbsMetricSave = np.zeros((1, numOfClusterRuns))
    kMetricSave = np.zeros((1, numOfClusterRuns), dtype=np.int32)
    skMetricSave = np.zeros((1, numOfClusterRuns))
    centeroidSave = []
    labelSave = []
    for clusterRun in  xrange(1, maxCluster):
        centroids, labels, inertia = k_means(dataToCluster, n_clusters = clusterRun)
        centeroidSave.append(centroids)
        labelSave.append(labels)
        kMetricSave[0, clusterRun-1] = clusterRun
        # calculate gap stattistics for selecting the number of clusters
        tempVar = calculateWk(centroids, labels, dataToCluster)
        sumSqMetricSave[0, clusterRun-1] = tempVar
        wksMetricSave[0, clusterRun-1] = np.log(tempVar)
        # ref data set 
        bRef = 10
        BWkbs = np.zeros((1, bRef))
        for iRun in xrange(bRef):
            refData = np.zeros_like(dataToCluster) 
            for dataRun in xrange(dataToCluster.shape[0]):
                refData[dataRun,:] = np.array([np.random.uniform(xminData ,xmaxData), np.random.uniform(yminData, ymaxData)])
            centroidsRef, labelsRef, inertiaRef = k_means(refData, n_clusters = clusterRun)
            BWkbs[0, iRun] = np.log(calculateWk(centroidsRef, labelsRef, refData))
        wkbsMetricSave[0, clusterRun-1] = np.sum(BWkbs)/float(bRef)
        skMetricSave[0, clusterRun-1] = np.sqrt(np.sum((BWkbs - wkbsMetricSave[0, clusterRun-1])**2)/float(bRef))
    skMetricSave = skMetricSave*np.sqrt(1 + 1/float(bRef))
    # gap statistics
    gap = (wkbsMetricSave - wksMetricSave)
    gap= gap.reshape(1, -1)
    finalMetric = np.zeros((1, numOfClusterRuns))
    for iRun in xrange(1, maxCluster-1):
        # gapofk-gapofk+1-sk
        finalMetric[0, iRun-1] = gap[0, iRun-1] - (gap[0, iRun] - skMetricSave[0, iRun])
        indeNonZero = np.where(finalMetric>0)[1]
    selectIndex = np.min(indeNonZero)
    # final clustering pics
    selectCenteroids =  np.array(centeroidSave[selectIndex])
    selectLabels = np.array(labelSave[selectIndex])
    return selectCenteroids
开发者ID:jgera,项目名称:masterThesis,代码行数:48,代码来源:mserHelper.py


示例4: check_cluster

def check_cluster(cluster):
    n = len(cluster)
    if n < 2:
        return True, []

    # Run k_means on two centers
    children, labels, _ = k_means(cluster, 2)

    # Let v = c1 - c2 be a d-dimensional vector that connects the two centers. This is the direction that k-means
    # believes to be important for clustering.
    v = children[1]-children[0]

    # Then project X onto v: x'i = hxi, vi/||v||2. X0 is a 1-dimensional
    # representation of the data projected onto v.
    x_prime = [np.dot(point, v) for point in cluster]

    # Transform X0 so that it has mean 0 and variance 1.
    x_prime = zscore(x_prime)

    # Let zi = F(x0(i)). If A2*(Z) is in the range of non-critical values at confidence level alpha, then accept H0,
    # keep the original center, and discard {c1, c2}. Otherwise, reject H0 and keep {c1, c2} in place of the original
    # center.
    a2, critical, sig = anderson(x_prime)
    a2 *= (1+4.0/n-25.0/(n**2))

    return a2 < critical[0], children
开发者ID:armsnyder,项目名称:SickBeetz,代码行数:26,代码来源:clusterer.py


示例5: main

def main():
    import matplotlib.pyplot as plt
    from sklearn.datasets.samples_generator import make_blobs
    n_centers = 3
    X, y = make_blobs(n_samples=1000, centers=n_centers, n_features=2,
                    cluster_std=0.7, random_state=0)

    # Run this K-Means
    import kmeans
    t0 = time.time()
    y_pred, centers, obj_val_seq = kmeans.kmeans(X, n_centers)
    t1 = time.time()
    print("Final obj val: {}".format(obj_val_seq[-1]))
    print("Time taken (this implementation): {}".format(t1 - t0))

    # Run scikit-learn's K-Means
    from sklearn.cluster import k_means
    t0 = time.time()
    centers, y_pred, obj_val = k_means(X, n_centers, random_state=0)
    t1 = time.time()
    print("Final obj val: {}".format(obj_val))
    print("Time taken (Scikit, 1 job): {}".format(t1 - t0))

    # Plot change in objective value over iteration
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(obj_val_seq, 'b-', marker='*')
    fig.suptitle("Change in K-means objective value across iterations")
    ax.set_xlabel("Iteration")
    ax.set_ylabel("Objective value")
    fig.show()

    # Plot data
    from itertools import cycle
    colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
    fig = plt.figure(figsize=plt.figaspect(0.5))  # Make twice as wide to accomodate both plots
    ax = fig.add_subplot(121)
    ax.set_title("Data with true labels and final centers")
    for k, color in zip(range(n_centers), colors):
        ax.plot(X[y==k, 0], X[y==k, 1], color + '.')

    initial_centers = kmeans.init_centers(X, n_centers, 2) # This is valid because we always use the same random seed.
    # Plot initial centers
    for x in initial_centers:
        ax.plot(x[0], x[1], "mo", markeredgecolor="k", markersize=8)

    # Plot final centers
    for x in centers:
        ax.plot(x[0], x[1], "co", markeredgecolor="k", markersize=8)

    # Plot assignments
    colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
    ax = fig.add_subplot(122)
    ax.set_title("Data with final assignments")
    for k, color in zip(range(n_centers), colors):
        ax.plot(X[y_pred==k, 0], X[y_pred==k, 1], color + '.')

    fig.tight_layout()
    fig.gca()
    fig.show()
开发者ID:lightalchemist,项目名称:ML-algorithms,代码行数:60,代码来源:kmeans.py


示例6: k_means_classifier

def k_means_classifier(image):
        n_clusters = 8

        # blur and take local maxima
        blur_image = gaussian(image, sigma=8)
        blur_image = ndi.maximum_filter(blur_image, size=3)

        # get texture features
        feats = local_binary_pattern(blur_image, P=40, R=5, method="uniform")
        feats_r = feats.reshape(-1, 1)

        # cluster the texture features
        km = k_means(n_clusters=n_clusters, batch_size=500)
        clus = km.fit(feats_r)

        # copy relevant attributes
        labels = clus.labels_
        clusters = clus.cluster_centers_

        # reshape label arrays
        labels = labels.reshape(blur_image.shape[0], blur_image.shape[1])

        # segment shadow
        img = blur_image.ravel()
        shadow_seg = img.copy()
        for i in range(0, n_clusters):
            # set up array of pixel indices matching cluster
            mask = np.nonzero((labels.ravel() == i) == True)[0]
            if len(mask) > 0:
                thresh = threshold_otsu(img[mask])
                shadow_seg[mask] = shadow_seg[mask] < thresh
        shadow_seg = shadow_seg.reshape(*image.shape)

        return shadow_seg
开发者ID:charlienewey,项目名称:shadow-detection-notebook,代码行数:34,代码来源:shadow.py


示例7: kmeans_estimate

def kmeans_estimate(x):
    labels = k_means(x, 2)[1]
    # print labels
    cluster = [[], []]
    for i in xrange(len(labels)):
        cluster[labels[i]].append(x[i])
    cluster[0] = np.array(cluster[0])
    cluster[1] = np.array(cluster[1])
    pi = len(cluster[0]) * 1.0 / len(labels)
    pis = [pi, 1 - pi]
    means = []
    sigmas = []
    for i in xrange(2):
        curr = cluster[i]
        mean = np.average(curr, axis=0)
        means.append(mean)

        sigma = np.zeros((2, 2))
        for i in xrange(len(curr)):
            y = np.reshape(curr[i] - mean, (2, 1))
            sigma += np.dot(y, y.T)
        sigma /= len(curr)
        sigmas.append(sigma)

    pis = np.array(pis)
    means = np.array(means)
    sigmas = np.array(sigmas)
    print means
    return pis, means, sigmas
开发者ID:wbcustc,项目名称:ModernAnalysis,代码行数:29,代码来源:EM_algorithm.py


示例8: clust

def clust(X,vectorfile,clust_file,clust_number):

    fid2fname = {}
    for line in open(vectorfile) :
        line = line.strip().split('\t')
        fid2fname.setdefault(int(line[0]), line[1:])

    # Force the solver to be arpack, since amg is numerically
    # unstable on this example
    # labels = spectral_clustering(graph, n_clusters=160, eigen_solver='arpack')

    a, labels,inertia = cluster.k_means(X,n_clusters=clust_number)

    print ("inertia=",inertia)
    C=np.column_stack((X,labels))

    easy_data=open(clust_file, 'w')
    for pnt1 in C :
        strx=""
        for charest in pnt1:
            strx+=str(int(charest))+"\t"
        print >> easy_data, strx
    easy_data.close()

    return inertia
开发者ID:DvHuang,项目名称:Wifi_Clust,代码行数:25,代码来源:b.py


示例9: KMeansCluster

def KMeansCluster(matrix):
    """
    Performs the K-Means cluster given a matrix of data
    @param[in]: matrix, List of List(s)
    """

    # Possibly need to scale the data first
    data = scale(matrix)

    # Approximate the number of clusters using c = root(n/2)
    # num_clusters = int(sqrt(len(matrix) / 2))
    num_clusters = 5
    number_init = 10 # Default
    number_iter = 300
    num_cpus = 2

    print "==================="
    print "Training KMeans with (num_clusters, num_init, num_iters, num_cpus)"
    print num_clusters, number_init, number_iter, num_cpus

    # estimator = KMeans(init='k-means++', n_clusters = num_clusters, n_init = number_init)
    # estimator.fit(data)
    # clusters = k_means(data, n_clusters = num_clusters, max_iter=number_iter, n_init = number_iter, 
    #     init='k-means++', n_jobs = num_cpus)
    clusters = k_means(data, n_clusters = num_clusters, max_iter=number_iter, n_init = number_iter, n_jobs = num_cpus)


    return clusters
开发者ID:zezhouliu,项目名称:leagueoflegends-analysis,代码行数:28,代码来源:kmeans_trainer.py


示例10: spectral_clustering

def spectral_clustering(road_map=None, a=None, use_ncut=False, num_clusters=2):
    print("Building adjacency matrix")
    if(a==None):
        a = build_adjacency_matrix(road_map)
    
    print("Computing laplacian")
    l = build_laplacian(a, normalize=use_ncut)
    
    print("Spectral embedding")
    #e_vals, e_vects = eigsh(l, k=num_clusters, which='SM', tol=0.01, sigma=2.01)
    X = np.random.rand(l.shape[0], num_clusters+1)
    e_vals, e_vects = lobpcg(l, X, tol=1e-15,
                                            largest=False, maxiter=2000)    
    
    
    
    embedded_data = e_vects[:,1:]
    
    print e_vals
    
    

    print("Clustering")
    centroid, label, intertia = k_means(embedded_data, num_clusters)
    
    for i in xrange(len(label)):
        road_map.nodes[i].region_id = label[i]
开发者ID:Lab-Work,项目名称:taxisim,代码行数:27,代码来源:partition_graph.py


示例11: clusters

def clusters(n = 100):
    from sklearn.cluster import k_means
    import numpy as np
    import matplotlib.pyplot as plt
    from mpl_toolkits.basemap import Basemap
    from matplotlib.patches import Polygon
    from matplotlib.collections import PatchCollection
    from matplotlib.patches import PathPatch
    from mapFuncts import USbasemap
    from matplotlib import cm, colors, colorbar
    fig = plt.figure()
    ax = fig.add_subplot(111)
    fig, ax, m = USbasemap(fig = fig, ax = ax)
    alldivDF, _, _, _, _ = seasonal_setup(season = 'MAM')
    centroid, label, inertia = k_means(alldivDF.T, n_clusters = n, init = 'k-means++')
    names = alldivDF.columns
    codes = reverseStates(importStates())
    cmap = cm.Accent
    norm = colors.Normalize(vmin = 0, vmax = n, clip = True)
    mapper = cm.ScalarMappable(norm = norm, cmap = cmap)
    for cluster in range(n):
        idx = label==cluster
        patches = []
        color = mapper.to_rgba(cluster)
        for name in names[idx]:
            divcode = codes[name[:-3]]+name[-2:]
            print name, divcode
            for info, shape in zip(m.divs_info, m.divs):
                if info['CLIMDIV']==int(divcode):
                    patches.append(Polygon(np.array(shape),True))
        ax.add_collection(PatchCollection(patches, facecolor = color,  edgecolor='k', linewidths=1., zorder=2))
    return fig, ax
开发者ID:bgzimmerman,项目名称:nipa,代码行数:32,代码来源:one_month_fcst.py


示例12: example_4

def example_4():
	"""
	compare to scikitlearn implementation of kmeans
	"""

	import sklearn.cluster as skc
	import time
	
	ndata = 50000
	dimension = 10
	ncentroids = 1000
	data = npr.randn(ndata, dimension).astype(np.float64)

	centroids0 = data[0:ncentroids, :]

	t0 = time.time()
	kmeans.get_clustering(X = data, init = centroids0, n_clusters = ncentroids, algorithm = 'auto', verbose = 1, n_threads = 1)
	t1 = time.time()

	sklearner = skc.k_means(X = data, n_clusters = ncentroids, max_iter = 1000, n_init = 1, init = centroids0, precompute_distances = False, verbose = True, n_jobs = 1, return_n_iter = True, tol = 0.0)
	t2 = time.time()	
	
	kmeans_time = t1 - t0
	sklearner_time = t2 - t1
	
	print "sklearn : ", sklearner_time, " s"
	print "this kmeans: ",  kmeans_time, " s"
开发者ID:evelynmitchell,项目名称:eakmeans,代码行数:27,代码来源:examples.py


示例13: test_k_means_function

def test_k_means_function():
    # test calling the k_means function directly
    # catch output
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters,
                                                   sample_weight=None,
                                                   verbose=True)
    finally:
        sys.stdout = old_stdout
    centers = cluster_centers
    assert_equal(centers.shape, (n_clusters, n_features))

    labels = labels
    assert_equal(np.unique(labels).shape[0], n_clusters)

    # check that the labels assignment are perfect (up to a permutation)
    assert_equal(v_measure_score(true_labels, labels), 1.0)
    assert_greater(inertia, 0.0)

    # check warning when centers are passed
    assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters,
                 sample_weight=None, init=centers)

    # to many clusters desired
    assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1,
                  sample_weight=None)

    # kmeans for algorithm='elkan' raises TypeError on sparse matrix
    assert_raise_message(TypeError, "algorithm='elkan' not supported for "
                         "sparse input X", k_means, X=X_csr, n_clusters=2,
                         sample_weight=None, algorithm="elkan")
开发者ID:daniel-perry,项目名称:scikit-learn,代码行数:33,代码来源:test_k_means.py


示例14: smythEmissionDistribution

def smythEmissionDistribution(pair):
	"""
	Given a pair (S: list of sequences, target_m: int), get the emission
	distribution for Smyth's "default" HMM. target_m is an upper bound on the
	number of states -- if we can only have m' distinct observation values, then
	the distribution for a m' state HMM is returned.

	@param pair: A tuple of the form (S: list of sequences, m: int)
	@return: The corresponding emission distribution encoded as a list
		of (mu, stddev) pairs
	"""
	S, target_m = pair
	merged, distinct = prepareSeqs(S)
	m_prime = min(target_m, len(distinct))
	centroids, labels, inertia = k_means(merged, m_prime, init='k-means++')
	clusters = partition(merged, labels)
	B = []
	has_zero = False
	for cluster in clusters:
		assert len(cluster) > 0
		mu = mean(cluster)
		stddev = std(cluster)
		B.append((mu, stddev))
		if stddev < 0.001:
			has_zero = True
	return (B, labels, has_zero)
开发者ID:japplebaum,项目名称:thesis,代码行数:26,代码来源:smyth.py


示例15: twoClassTrain

	def twoClassTrain(self,data1,data2):
		#y is a N * 1 matrix
		data = data1 + data2
		y = [[1 if d[0] == data1[0][0] else -1] for d in data]
		X = [d[1:] for d in data]
		centroid, label, inertia = cluster.k_means(X,self.k)
		phi = self.genTransferMatrix(X,centroid)
		w = numpy.dot(numpy.linalg.pinv(phi),y)
		return w, centroid
开发者ID:raychin4563,项目名称:MLFinal,代码行数:9,代码来源:RBFtry2.py


示例16: kmeans

def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
        _, labels, _ = k_means(xs.astype("float64"), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
    return labels
开发者ID:AnishShah,项目名称:keras,代码行数:9,代码来源:neural_doodle.py


示例17: initial_means

 def initial_means(self,X,n_clusters):
     mean_KMeans_initial,label, intertia = k_means(X, n_clusters, random_state = 1)
     
     KernelKMeans_model = KernelKMeans(n_clusters=n_clusters, random_state=1,
                          kernel="rbf", gamma=None, coef0=1,
                          verbose=0)        
     KernelKMeans_model.fit(X)
     mean_KernelKMeans_initial = self.pre_image(X, KernelKMeans_model.labels_, KernelKMeans_model.gamma, n_clusters, 100)
     return mean_KMeans_initial , mean_KernelKMeans_initial 
开发者ID:Joaggi,项目名称:Machine-Learning,代码行数:9,代码来源:GaussianExperiment.py


示例18: learn

    def learn(self, Xtrain, ytrain):
        """ Learns using the traindata """
        Xless = Xtrain[:,self.features]
        lam = 100

        self.centers = cluster.k_means(Xless, 10)[0]
        Xless = np.dot(Xless, self.centers.T)

        self.weights = np.dot(np.dot(np.linalg.inv(np.dot(Xless.T,Xless) + lam*np.identity(Xless.shape[1])), Xless.T),ytrain)
开发者ID:PawPatel,项目名称:Learning,代码行数:9,代码来源:P2algorithms.py


示例19: get_point_centroids

def get_point_centroids(indata,K,D):
    mean = numpy.zeros((indata.shape[1],D))
    for n in xrange(0,(indata.shape[1])):
        for i in xrange(0,(indata.shape[2])):
            for j in xrange(0,D):
                mean[n][j] = mean[n][j] + indata[j][n][i]
        mean[n] = mean[n]/(indata.shape[2])
    (centroids,x,y)=k_means(mean,K) #random order. change n_jobs to speed up
    return centroids
开发者ID:Deepak-Prashanth,项目名称:dhmm-gesture,代码行数:9,代码来源:HMM.py


示例20: gap

def gap(data, refs=None, nrefs=20, ks=range(1,11)):
    """
    I: NumPy array, reference matrix, number of reference boxes, number of clusters to test
    O: Gaps NumPy array, Ks input list
    
    Give the list of k-values for which you want to compute the statistic in ks. By Gap Statistic 
    from Tibshirani, Walther.
    """
    shape = data.shape
    
    if not refs: 
        tops = data.max(axis=0)
        bottoms = data.min(axis=0)
        dists = scipy.matrix(scipy.diag(tops - bottoms))
        rands = scipy.random.random_sample(size=(shape[0], shape[1], nrefs))
        for i in range(nrefs):
            rands[:, :, i] = rands[:, :, i] * dists + bottoms
    else:
        rands = refs
 
    gaps = scipy.zeros((len(ks),))
    
    for (i,k) in enumerate(ks):
        k_means_args_dict['n_clusters'] = k
        kmeans = k_means(**k_means_args_dict)
        kmeans.fit(data)
        (cluster_centers, point_labels) = kmeans.cluster_centers_, kmeans.labels_

        disp = sum([dst(data[current_row_index, :], cluster_centers[point_labels[current_row_index],:]) for current_row_index in range(shape[0])])

        refdisps = scipy.zeros((rands.shape[2],))

        for j in range(rands.shape[2]):

            kmeans = k_means(**k_means_args_dict)
            kmeans.fit(rands[:, : ,j])
            (cluster_centers, point_labels) = kmeans.cluster_centers_, kmeans.labels_
            refdisps[j] = sum([dst(rands[current_row_index,:,j], cluster_centers[point_labels[current_row_index],:]) for current_row_index in range(shape[0])])
        
        #let k be the index of the array 'gaps'
        gaps[i] = scipy.mean(scipy.log(refdisps)) - scipy.log(disp)
    
    return ks, gaps
开发者ID:asharma567,项目名称:cool_tools,代码行数:43,代码来源:gap_statistic_implementation.py



注:本文中的sklearn.cluster.k_means函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python cluster.spectral_clustering函数代码示例发布时间:2022-05-27
下一篇:
Python cluster.estimate_bandwidth函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap