• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python vq.kmeans函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中scipy.cluster.vq.kmeans函数的典型用法代码示例。如果您正苦于以下问题:Python kmeans函数的具体用法?Python kmeans怎么用?Python kmeans使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了kmeans函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: clustering_scipy_kmeans

def clustering_scipy_kmeans(features, n_clust = 8):
  """
  """
  whitened = whiten(features)
  print whitened.shape
  
  initial = [kmeans(whitened,i) for i in np.arange(1,12)]
  plt.plot([var for (cent,var) in initial])
  plt.show()
  
  #cent, var = initial[3]
  ##use vq() to get as assignment for each obs.
  #assignment,cdist = vq(whitened,cent)
  #plt.scatter(whitened[:,0], whitened[:,1], c=assignment)
  #plt.show()
  
  codebook, distortion = kmeans(whitened, n_clust)
  print codebook, distortion
  assigned_label, dist = vq(whitened, codebook)
  for ii in range(8):
    plt.subplot(4,2,ii+1)
    plt.plot(codebook[ii])
  plt.show()
  
  centroid, label = kmeans2(whitened, n_clust, minit = 'points')
  print centroid, label
  for ii in range(8):
    plt.subplot(4,2,ii)
    plt.plot(centroid[ii])
  plt.show()
开发者ID:kaustuvkanti,项目名称:Experiments,代码行数:30,代码来源:dump_transition_for_clustering.py


示例2: custom

def custom():
	_items = {}
	users = []

	for line in open('my_items_likehood.txt'):
		user, item = keys(line)
		users.append(user)
		if item in _items:
			_items[item].append(user)
		else:
			_items[item] = [user]


	sorted_users = sorted(users)
	l = len(sorted_users)
	items={}
	count=0
	features=[]
	for item in _items:
	
		features.append(user_matrix(l, _items[item], sorted_users))
		if count == 100: break
		count += 1

	print 'whiten'
	whitened = whiten(array(features))
	print 'kmeans'
	print kmeans(whitened)
	print "%d items voted by %d users" % (len(items), len(users))
开发者ID:mcaprari,项目名称:rt-pub,代码行数:29,代码来源:t.py


示例3: kmeans_net

def kmeans_net(net, layers, num_c=16, initials=None):
    # net: 网络
    # layers: 需要量化的层
    # num_c: 各层的量化级别
    # initials: 初始聚类中心
    codebook = {} # 量化码表
    if type(num_c) == type(1):
        num_c = [num_c] * len(layers)
    else:
        assert len(num_c) == len(layers)

    # 对各层进行聚类分析
    print "==============Perform K-means============="
    for idx, layer in enumerate(layers):
        print "Eval layer:", layer
        W = net.params[layer][0].data.flatten()
        W = W[np.where(W != 0)] # 筛选不为0的权重
        # 默认情况下,聚类中心为线性分布中心
        if initials is None:  # Default: uniform sample
            min_W = np.min(W)
            max_W = np.max(W)
            initial_uni = np.linspace(min_W, max_W, num_c[idx] - 1)
            codebook[layer], _ = scv.kmeans(W, initial_uni)
        elif type(initials) == type(np.array([])):
            codebook[layer], _ = scv.kmeans(W, initials)
        elif initials == 'random':
            codebook[layer], _ = scv.kmeans(W, num_c[idx] - 1)
        else:
            raise Exception

        # 将0权重值附上
        codebook[layer] = np.append(0.0, codebook[layer])
        print "codebook size:", len(codebook[layer])

    return codebook
开发者ID:zhang405744522,项目名称:Caffe-Python-Tutorial,代码行数:35,代码来源:quantize.py


示例4: kmeans2

def kmeans2():
    features = locations()
    whitened = whiten(features)
    book = array((whitened[0],whitened[2]))
    kmeans(whitened,book)
    (array([[ 2.3110306 ,  2.86287398],
           [ 0.93218041,  1.24398691]]), 0.85684700941625547)
开发者ID:btoffoli,项目名称:data_mining_examples,代码行数:7,代码来源:locactionKmeans1.py


示例5: kmeans1

def kmeans1():
    features  = array([[ 1.9,2.3], [ 1.5,2.5], [ 0.8,0.6], [ 0.4,1.8], [ 0.1,0.1], [ 0.2,1.8], [ 2.0,0.5], [ 0.3,1.5], [ 1.0,1.0]])
    whitened = whiten(features)
    book = array((whitened[0],whitened[2]))
    kmeans(whitened,book)
    (array([[ 2.3110306 ,  2.86287398],
           [ 0.93218041,  1.24398691]]), 0.85684700941625547)
开发者ID:btoffoli,项目名称:data_mining_examples,代码行数:7,代码来源:locactionKmeans1.py


示例6: _get_larger_chroms

def _get_larger_chroms(ref_file):
    """Retrieve larger chromosomes, avoiding the smaller ones for plotting.
    """
    from scipy.cluster.vq import kmeans, vq
    all_sizes = []
    for c in ref.file_contigs(ref_file):
        all_sizes.append(float(c.size))
    all_sizes.sort()
    # separate out smaller chromosomes and haplotypes with kmeans
    centroids, _ = kmeans(np.array(all_sizes), 2)
    idx, _ = vq(np.array(all_sizes), centroids)
    little_sizes = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx, all_sizes)))
    little_sizes = [x[1] for x in little_sizes]
    # create one more cluster with the smaller, removing the haplotypes
    centroids2, _ = kmeans(np.array(little_sizes), 2)
    idx2, _ = vq(np.array(little_sizes), centroids2)
    little_sizes2 = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx2, little_sizes)))
    little_sizes2 = [x[1] for x in little_sizes2]
    # get any chromosomes not in haplotype/random bin
    thresh = max(little_sizes2)
    larger_chroms = []
    for c in ref.file_contigs(ref_file):
        if c.size > thresh:
            larger_chroms.append(c.name)
    return larger_chroms
开发者ID:Kisun,项目名称:bcbio-nextgen,代码行数:25,代码来源:cnvkit.py


示例7: cluster

def cluster(df, means, csv_min, csv_max):
    data = []
    for i in range(csv_min, csv_max):
        a = array(df.ix[:, i].values)
        b = a[a != "--"]
        print np.sort(kmeans(b.astype(np.float), means)[0])
        data.append(np.sort(kmeans(b.astype(np.float), means)[0]))
    return data
开发者ID:JasonAHeron,项目名称:Data-Visualization,代码行数:8,代码来源:cluster.py


示例8: test_kmeans_lost_cluster

    def test_kmeans_lost_cluster(self):
        # This will cause kmean to have a cluster with no points.
        data = np.fromfile(DATAFILE1, sep=", ")
        data = data.reshape((200, 2))
        initk = np.array([[-1.8127404, -0.67128041], [2.04621601, 0.07401111], [-2.31149087, -0.05160469]])

        kmeans(data, initk)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", UserWarning)
            kmeans2(data, initk, missing="warn")

        assert_raises(ClusterError, kmeans2, data, initk, missing="raise")
开发者ID:ymarfoq,项目名称:outilACVDesagregation,代码行数:12,代码来源:test_vq.py


示例9: test_kmeans_lost_cluster

    def test_kmeans_lost_cluster(self):
        # This will cause kmean to have a cluster with no points.
        data = TESTDATA_2D
        initk = np.array([[-1.8127404, -0.67128041],
                         [2.04621601, 0.07401111],
                         [-2.31149087,-0.05160469]])

        kmeans(data, initk)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', UserWarning)
            kmeans2(data, initk, missing='warn')

        assert_raises(ClusterError, kmeans2, data, initk, missing='raise')
开发者ID:dyao-vu,项目名称:meta-core,代码行数:13,代码来源:test_vq.py


示例10: clusterkmeans

 def clusterkmeans(self):
     wh = whiten(self.counts) #normalizes the counts for easier clustering
     scale = self.counts[0] / wh[0]
     #compute kmeans for  k = 1,2 compare the distortions and choose the better one
     one = kmeans(wh, 1)
     two = kmeans(wh, 2)
     if one[1] < two[1]:
         print 'found only one cluser'
         threshold = None
     else:
         km = two
         threshold = scale * km[0].mean() #set threshold to be the average of two centers
     return threshold
开发者ID:HaeffnerLab,项目名称:sqip,代码行数:13,代码来源:dataProcessor_ionSwap_backup2.py


示例11: test_kmeans_lost_cluster

    def test_kmeans_lost_cluster(self):
        # This will cause kmeans to have a cluster with no points.
        data = TESTDATA_2D
        initk = np.array([[-1.8127404, -0.67128041],
                         [2.04621601, 0.07401111],
                         [-2.31149087,-0.05160469]])

        with suppress_warnings() as sup:
            sup.filter(UserWarning,
                       "One of the clusters is empty. Re-run kmean with a different initialization")
            kmeans(data, initk)
            kmeans2(data, initk, missing='warn')

        assert_raises(ClusterError, kmeans2, data, initk, missing='raise')
开发者ID:Brucechen13,项目名称:scipy,代码行数:14,代码来源:test_vq.py


示例12: createdatabase

def createdatabase():
	X_train = detectcompute(train1)

	print "Clustering the data with K-means"
	codebook,distortion = kmeans(whiten(X_train),k)
	print "Done.\n"
	
	imtrain = singledetect(test1)
	Pdatabase = bow(imtrain,codebook,k) #Pseudo database with list structure


	#Writing to html.table
	print "Converting the database into a HTML file"
	htmltable = open("table.htm","r+") 
	begin = "<htm><body><table cellpadding=5><tr><th>Filename</th><th>Histogram</th></tr>"
	htmltable.write(begin)

	for i in range(len(Pdatabase)):
	    middle = "<tr><td>%(filename)s</td><td>%(histogram)s</td></tr>" % {"filename": Pdatabase[i][0], "histogram": Pdatabase[i][-1]}
	    htmltable.write(middle)

	end = "</table></body></html>"    
	htmltable.write(end)
	htmltable.close()
	print "Done.\n"

	codebook_to_file(codebook)
开发者ID:MariaBarrett,项目名称:VIPExam2,代码行数:27,代码来源:main.py


示例13: worldplot

 def worldplot(self,kmeans=None,proj='merc'):
     """
     plots customer GPS location on a map with state and national boundaries.
     IN
         kmeans (int) number of means for k-means clustering, default=None
         proj (string) the map projection to use, use 'robin' to plot the whole earth, default='merc'
     """
     # create a matplotlib Basemap object
     if proj == 'robin':
         my_map = Basemap(projection=proj,lat_0=0,lon_0=0,resolution='l',area_thresh=1000)
     else:
         my_map = Basemap(projection=proj,lat_0=33.,lon_0=-125.,resolution='l',area_thresh=1000.,
                 llcrnrlon=-130.,llcrnrlat=25,urcrnrlon=-65., urcrnrlat=50)
     my_map.drawcoastlines(color='grey')
     my_map.drawcountries(color='grey')
     my_map.drawstates(color='grey')
     my_map.drawlsmask(land_color='white',ocean_color='white')
     my_map.drawmapboundary() #my_map.fillcontinents(color='black')
     x,y = my_map(np.array(self.data['lon']),np.array(self.data['lat']))
     my_map.plot(x,y,'ro',markersize=3,alpha=.4,linewidth=0)
     if kmeans:
         # k-means clustering algorithm---see wikipedia for details
         data_in = self.data.drop(['id','clv','level'],axis=1)
         # vq is scipy's vector quantization module
         output,distortion = vq.kmeans(data_in,kmeans)
         x1,y1 = my_map(output[:,1],output[:,0])
         my_map.plot(x1,y1,'ko',markersize=20,alpha=.4,linewidth=0)
     plt.show()
     return output
开发者ID:dgmiller,项目名称:portfolio,代码行数:29,代码来源:tools.py


示例14: test_large_features

    def test_large_features(self):
        # Generate a data set with large values, and run kmeans on it to
        # (regression for 1077).
        d = 300
        n = 100

        m1 = np.random.randn(d)
        m2 = np.random.randn(d)
        x = 10000 * np.random.randn(n, d) - 20000 * m1
        y = 10000 * np.random.randn(n, d) + 20000 * m2

        data = np.empty((x.shape[0] + y.shape[0], d), np.double)
        data[:x.shape[0]] = x
        data[x.shape[0]:] = y

        kmeans(data, 2)
开发者ID:beiko-lab,项目名称:gengis,代码行数:16,代码来源:test_vq.py


示例15: classify_kmeans

def classify_kmeans(infile, clusternumber):
    '''
    apply kmeans
    '''
    
    #Load infile in data array    
    driver = gdal.GetDriverByName('GTiff')
    driver.Register()
    ds = gdal.Open(infile, gdal.GA_Update)
    databand = ds.GetRasterBand(1)
    
    #Read input raster into array
    data = ds.ReadAsArray() 
    #replace no data value with numpy.nan
    #data[data==-999.0]=numpy.nan 
    
    pixel = numpy.reshape(data,(data.shape[0]*data.shape[1]))
    centroids, variance = kmeans(pixel, clusternumber)
    code, distance = vq(pixel,centroids)
    centers_idx = numpy.reshape(code,(data.shape[0],data.shape[1]))
    clustered = centroids[centers_idx]
    
    # Write outraster to file
    databand.WriteArray(clustered)
    databand.FlushCache()        
    
    #Close file
    databand = None
    clustered = None
    ds = None  
开发者ID:NatPi,项目名称:RemoteSensing,代码行数:30,代码来源:GlacierSurfaceType_kmeans.py


示例16: connected_regions

def connected_regions(image):
    """
    Converts image into grayscale, quantizes, counts connected regions
    """
    # render_image(image)

    colors = 2

    # Quantization into two colors
    image_rgb = np.dstack(image)
    pixels = np.reshape(
        image_rgb,
        (image_rgb.shape[0] * image_rgb.shape[1], image_rgb.shape[2])
    )
    centroids, _ = vq.kmeans(pixels, colors)
    quantized, _ = vq.vq(pixels, centroids)
    quantized_idx = quantized.reshape(
        (image_rgb.shape[0], image_rgb.shape[1])
    )

    if len(centroids) > 1:
        # for_render = (quantized_idx * 255).astype(np.uint8)
        # render_image(for_render)
        regions = len(region_sizes(quantized_idx))
        regions_inverted = len(region_sizes(1 - quantized_idx))
        # import pdb; pdb.set_trace()

        # if regions == 0:
        #     regions = image[0].shape[0] * image[0].shape[1]
        # print regions
        return max([regions, regions_inverted])
    else:
        return 0
开发者ID:tomasra,项目名称:ga_sandbox,代码行数:33,代码来源:analysis.py


示例17: kmeans

def kmeans(iData, clustNumber, oPrefix, norm=False):
    '''Perform k-means cluster analysis and return MAP of zones'''
    print 'Run K-Means'
    
    height, width = iData.shape[1:3]
    #reshape 3D cube of data into 2D matrix and get indeces of valid pixels
    iData, notNanDataI = cube2flat(iData)
    if norm:
        #center and norm
        iDataMean = iData[:, notNanDataI].mean(axis=1)
        iDataStd  = iData[:, notNanDataI].std(axis=1)
        iData = np.subtract(iData.T, iDataMean).T
        iData = np.divide(iData.T, iDataStd).T

    #perform kmeans on valid data and return codebook
    codeBook = vq.kmeans(iData[:, notNanDataI].astype('f8').T, clustNumber)[0]
    #perform vector quantization of input data uzing the codebook
    #return vector of labels (for each valid pixel)
    labelVec = vq.vq(iData[:, notNanDataI].astype('f8').T, codeBook)[0]+1
    #create and fill MAP of zones
    zoneMap = np.zeros(width*height) + np.nan
    zoneMap[notNanDataI] = labelVec
    zoneMap = zoneMap.reshape(height, width)
    
    #visualize map of zones
    plt.imsave(oPrefix + 'zones.png', zoneMap)
    
    return zoneMap
开发者ID:geogradient,项目名称:zoning,代码行数:28,代码来源:zoning.py


示例18: main

def main():
    args = get_args()
    # This catches files sent in with stdin
    if isinstance(args.infile, TextIOWrapper):
        data = JSONFile(args.infile, True)
    else:
        data = args.infile

    points = np.array([
        [point.get('lon'), point.get('lat')]
        for point in data
    ])

    # In testing, found that a higher number of iterations led to less
    # errors due to missing centroids (Note: whitening led to worse results)
    centroids, distortion = kmeans(points, args.number_of_vans, 2000)
    index, distortion = vq(points, centroids)

    vans = [[] for _ in range(args.number_of_vans)]

    for i, point in enumerate(data):
        vans[index[i]].append(point)

    vans = distribute(vans, len(data), centroids)


    create_output(args.outfile, vans)
开发者ID:adregan,项目名称:point-grouping-test,代码行数:27,代码来源:main.py


示例19: spectral_clustering

def spectral_clustering(W, k):

    # ====================== ADD YOUR CODE HERE ======================
    # Instructions: Perform spectral clustering to partition the
    #               data into k clusters. Implement the steps that
    #               are described in Algorithm 2 on the assignment.

    L = diag(sum(W, axis=0)) - W
    w, v = linalg.eig(L)

    y = real(v[:, w.argsort()[:k]])

    clusters, _ = kmeans(y, k)

    labels = zeros(y.shape[0])
    for i in range(y.shape[0]):
        dist = inf
        for j in range(k):
            distance = euclideanDistance(y[i], clusters[j])
            if distance < dist:
                dist = distance
                labels[i] = j
    # =============================================================

    return labels
开发者ID:omoindrot,项目名称:INF582,代码行数:25,代码来源:spectralClustering.py


示例20: train

  def train(self, featurefiles, k=100, subsampling=10):
    """Train a vocabulary from features in files listed in |featurefiles| using
    k-means with k words. Subsampling of training data can be used for speedup.
    """
    image_count = len(featurefiles)

    descr = []
    descr.append(sift.read_features_from_file(featurefiles[0])[1])
    descriptors = descr[0]  # Stack features for k-means.
    for i in numpy.arange(1, image_count):
      descr.append(sift.read_features_from_file(featurefiles[i])[1])
      descriptors = numpy.vstack((descriptors, descr[i]))

    # Run k-means.
    self.voc, distortion = vq.kmeans(descriptors[::subsampling, :], k, 1)
    self.word_count = self.voc.shape[0]

    # Project training data on vocabulary.
    imwords = numpy.zeros((image_count, self.word_count))
    for i in range(image_count):
      imwords[i] = self.project(descr[i])

    occurence_count = numpy.sum((imwords > 0)*1, axis=0)
    
    self.idf = numpy.log(image_count / (occurence_count + 1.0))
    self.trainingdata = featurefiles
开发者ID:PhilomontPhlea,项目名称:PhleaBytesCV,代码行数:26,代码来源:vocabulary.py



注:本文中的scipy.cluster.vq.kmeans函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python vq.kmeans2函数代码示例发布时间:2022-05-27
下一篇:
Python hierarchy.ward函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap