本文整理汇总了Python中scipy.cluster.vq.kmeans函数的典型用法代码示例。如果您正苦于以下问题:Python kmeans函数的具体用法?Python kmeans怎么用?Python kmeans使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了kmeans函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: clustering_scipy_kmeans
def clustering_scipy_kmeans(features, n_clust = 8):
"""
"""
whitened = whiten(features)
print whitened.shape
initial = [kmeans(whitened,i) for i in np.arange(1,12)]
plt.plot([var for (cent,var) in initial])
plt.show()
#cent, var = initial[3]
##use vq() to get as assignment for each obs.
#assignment,cdist = vq(whitened,cent)
#plt.scatter(whitened[:,0], whitened[:,1], c=assignment)
#plt.show()
codebook, distortion = kmeans(whitened, n_clust)
print codebook, distortion
assigned_label, dist = vq(whitened, codebook)
for ii in range(8):
plt.subplot(4,2,ii+1)
plt.plot(codebook[ii])
plt.show()
centroid, label = kmeans2(whitened, n_clust, minit = 'points')
print centroid, label
for ii in range(8):
plt.subplot(4,2,ii)
plt.plot(centroid[ii])
plt.show()
开发者ID:kaustuvkanti,项目名称:Experiments,代码行数:30,代码来源:dump_transition_for_clustering.py
示例2: custom
def custom():
_items = {}
users = []
for line in open('my_items_likehood.txt'):
user, item = keys(line)
users.append(user)
if item in _items:
_items[item].append(user)
else:
_items[item] = [user]
sorted_users = sorted(users)
l = len(sorted_users)
items={}
count=0
features=[]
for item in _items:
features.append(user_matrix(l, _items[item], sorted_users))
if count == 100: break
count += 1
print 'whiten'
whitened = whiten(array(features))
print 'kmeans'
print kmeans(whitened)
print "%d items voted by %d users" % (len(items), len(users))
开发者ID:mcaprari,项目名称:rt-pub,代码行数:29,代码来源:t.py
示例3: kmeans_net
def kmeans_net(net, layers, num_c=16, initials=None):
# net: 网络
# layers: 需要量化的层
# num_c: 各层的量化级别
# initials: 初始聚类中心
codebook = {} # 量化码表
if type(num_c) == type(1):
num_c = [num_c] * len(layers)
else:
assert len(num_c) == len(layers)
# 对各层进行聚类分析
print "==============Perform K-means============="
for idx, layer in enumerate(layers):
print "Eval layer:", layer
W = net.params[layer][0].data.flatten()
W = W[np.where(W != 0)] # 筛选不为0的权重
# 默认情况下,聚类中心为线性分布中心
if initials is None: # Default: uniform sample
min_W = np.min(W)
max_W = np.max(W)
initial_uni = np.linspace(min_W, max_W, num_c[idx] - 1)
codebook[layer], _ = scv.kmeans(W, initial_uni)
elif type(initials) == type(np.array([])):
codebook[layer], _ = scv.kmeans(W, initials)
elif initials == 'random':
codebook[layer], _ = scv.kmeans(W, num_c[idx] - 1)
else:
raise Exception
# 将0权重值附上
codebook[layer] = np.append(0.0, codebook[layer])
print "codebook size:", len(codebook[layer])
return codebook
开发者ID:zhang405744522,项目名称:Caffe-Python-Tutorial,代码行数:35,代码来源:quantize.py
示例4: kmeans2
def kmeans2():
features = locations()
whitened = whiten(features)
book = array((whitened[0],whitened[2]))
kmeans(whitened,book)
(array([[ 2.3110306 , 2.86287398],
[ 0.93218041, 1.24398691]]), 0.85684700941625547)
开发者ID:btoffoli,项目名称:data_mining_examples,代码行数:7,代码来源:locactionKmeans1.py
示例5: kmeans1
def kmeans1():
features = array([[ 1.9,2.3], [ 1.5,2.5], [ 0.8,0.6], [ 0.4,1.8], [ 0.1,0.1], [ 0.2,1.8], [ 2.0,0.5], [ 0.3,1.5], [ 1.0,1.0]])
whitened = whiten(features)
book = array((whitened[0],whitened[2]))
kmeans(whitened,book)
(array([[ 2.3110306 , 2.86287398],
[ 0.93218041, 1.24398691]]), 0.85684700941625547)
开发者ID:btoffoli,项目名称:data_mining_examples,代码行数:7,代码来源:locactionKmeans1.py
示例6: _get_larger_chroms
def _get_larger_chroms(ref_file):
"""Retrieve larger chromosomes, avoiding the smaller ones for plotting.
"""
from scipy.cluster.vq import kmeans, vq
all_sizes = []
for c in ref.file_contigs(ref_file):
all_sizes.append(float(c.size))
all_sizes.sort()
# separate out smaller chromosomes and haplotypes with kmeans
centroids, _ = kmeans(np.array(all_sizes), 2)
idx, _ = vq(np.array(all_sizes), centroids)
little_sizes = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx, all_sizes)))
little_sizes = [x[1] for x in little_sizes]
# create one more cluster with the smaller, removing the haplotypes
centroids2, _ = kmeans(np.array(little_sizes), 2)
idx2, _ = vq(np.array(little_sizes), centroids2)
little_sizes2 = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx2, little_sizes)))
little_sizes2 = [x[1] for x in little_sizes2]
# get any chromosomes not in haplotype/random bin
thresh = max(little_sizes2)
larger_chroms = []
for c in ref.file_contigs(ref_file):
if c.size > thresh:
larger_chroms.append(c.name)
return larger_chroms
开发者ID:Kisun,项目名称:bcbio-nextgen,代码行数:25,代码来源:cnvkit.py
示例7: cluster
def cluster(df, means, csv_min, csv_max):
data = []
for i in range(csv_min, csv_max):
a = array(df.ix[:, i].values)
b = a[a != "--"]
print np.sort(kmeans(b.astype(np.float), means)[0])
data.append(np.sort(kmeans(b.astype(np.float), means)[0]))
return data
开发者ID:JasonAHeron,项目名称:Data-Visualization,代码行数:8,代码来源:cluster.py
示例8: test_kmeans_lost_cluster
def test_kmeans_lost_cluster(self):
# This will cause kmean to have a cluster with no points.
data = np.fromfile(DATAFILE1, sep=", ")
data = data.reshape((200, 2))
initk = np.array([[-1.8127404, -0.67128041], [2.04621601, 0.07401111], [-2.31149087, -0.05160469]])
kmeans(data, initk)
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
kmeans2(data, initk, missing="warn")
assert_raises(ClusterError, kmeans2, data, initk, missing="raise")
开发者ID:ymarfoq,项目名称:outilACVDesagregation,代码行数:12,代码来源:test_vq.py
示例9: test_kmeans_lost_cluster
def test_kmeans_lost_cluster(self):
# This will cause kmean to have a cluster with no points.
data = TESTDATA_2D
initk = np.array([[-1.8127404, -0.67128041],
[2.04621601, 0.07401111],
[-2.31149087,-0.05160469]])
kmeans(data, initk)
with warnings.catch_warnings():
warnings.simplefilter('ignore', UserWarning)
kmeans2(data, initk, missing='warn')
assert_raises(ClusterError, kmeans2, data, initk, missing='raise')
开发者ID:dyao-vu,项目名称:meta-core,代码行数:13,代码来源:test_vq.py
示例10: clusterkmeans
def clusterkmeans(self):
wh = whiten(self.counts) #normalizes the counts for easier clustering
scale = self.counts[0] / wh[0]
#compute kmeans for k = 1,2 compare the distortions and choose the better one
one = kmeans(wh, 1)
two = kmeans(wh, 2)
if one[1] < two[1]:
print 'found only one cluser'
threshold = None
else:
km = two
threshold = scale * km[0].mean() #set threshold to be the average of two centers
return threshold
开发者ID:HaeffnerLab,项目名称:sqip,代码行数:13,代码来源:dataProcessor_ionSwap_backup2.py
示例11: test_kmeans_lost_cluster
def test_kmeans_lost_cluster(self):
# This will cause kmeans to have a cluster with no points.
data = TESTDATA_2D
initk = np.array([[-1.8127404, -0.67128041],
[2.04621601, 0.07401111],
[-2.31149087,-0.05160469]])
with suppress_warnings() as sup:
sup.filter(UserWarning,
"One of the clusters is empty. Re-run kmean with a different initialization")
kmeans(data, initk)
kmeans2(data, initk, missing='warn')
assert_raises(ClusterError, kmeans2, data, initk, missing='raise')
开发者ID:Brucechen13,项目名称:scipy,代码行数:14,代码来源:test_vq.py
示例12: createdatabase
def createdatabase():
X_train = detectcompute(train1)
print "Clustering the data with K-means"
codebook,distortion = kmeans(whiten(X_train),k)
print "Done.\n"
imtrain = singledetect(test1)
Pdatabase = bow(imtrain,codebook,k) #Pseudo database with list structure
#Writing to html.table
print "Converting the database into a HTML file"
htmltable = open("table.htm","r+")
begin = "<htm><body><table cellpadding=5><tr><th>Filename</th><th>Histogram</th></tr>"
htmltable.write(begin)
for i in range(len(Pdatabase)):
middle = "<tr><td>%(filename)s</td><td>%(histogram)s</td></tr>" % {"filename": Pdatabase[i][0], "histogram": Pdatabase[i][-1]}
htmltable.write(middle)
end = "</table></body></html>"
htmltable.write(end)
htmltable.close()
print "Done.\n"
codebook_to_file(codebook)
开发者ID:MariaBarrett,项目名称:VIPExam2,代码行数:27,代码来源:main.py
示例13: worldplot
def worldplot(self,kmeans=None,proj='merc'):
"""
plots customer GPS location on a map with state and national boundaries.
IN
kmeans (int) number of means for k-means clustering, default=None
proj (string) the map projection to use, use 'robin' to plot the whole earth, default='merc'
"""
# create a matplotlib Basemap object
if proj == 'robin':
my_map = Basemap(projection=proj,lat_0=0,lon_0=0,resolution='l',area_thresh=1000)
else:
my_map = Basemap(projection=proj,lat_0=33.,lon_0=-125.,resolution='l',area_thresh=1000.,
llcrnrlon=-130.,llcrnrlat=25,urcrnrlon=-65., urcrnrlat=50)
my_map.drawcoastlines(color='grey')
my_map.drawcountries(color='grey')
my_map.drawstates(color='grey')
my_map.drawlsmask(land_color='white',ocean_color='white')
my_map.drawmapboundary() #my_map.fillcontinents(color='black')
x,y = my_map(np.array(self.data['lon']),np.array(self.data['lat']))
my_map.plot(x,y,'ro',markersize=3,alpha=.4,linewidth=0)
if kmeans:
# k-means clustering algorithm---see wikipedia for details
data_in = self.data.drop(['id','clv','level'],axis=1)
# vq is scipy's vector quantization module
output,distortion = vq.kmeans(data_in,kmeans)
x1,y1 = my_map(output[:,1],output[:,0])
my_map.plot(x1,y1,'ko',markersize=20,alpha=.4,linewidth=0)
plt.show()
return output
开发者ID:dgmiller,项目名称:portfolio,代码行数:29,代码来源:tools.py
示例14: test_large_features
def test_large_features(self):
# Generate a data set with large values, and run kmeans on it to
# (regression for 1077).
d = 300
n = 100
m1 = np.random.randn(d)
m2 = np.random.randn(d)
x = 10000 * np.random.randn(n, d) - 20000 * m1
y = 10000 * np.random.randn(n, d) + 20000 * m2
data = np.empty((x.shape[0] + y.shape[0], d), np.double)
data[:x.shape[0]] = x
data[x.shape[0]:] = y
kmeans(data, 2)
开发者ID:beiko-lab,项目名称:gengis,代码行数:16,代码来源:test_vq.py
示例15: classify_kmeans
def classify_kmeans(infile, clusternumber):
'''
apply kmeans
'''
#Load infile in data array
driver = gdal.GetDriverByName('GTiff')
driver.Register()
ds = gdal.Open(infile, gdal.GA_Update)
databand = ds.GetRasterBand(1)
#Read input raster into array
data = ds.ReadAsArray()
#replace no data value with numpy.nan
#data[data==-999.0]=numpy.nan
pixel = numpy.reshape(data,(data.shape[0]*data.shape[1]))
centroids, variance = kmeans(pixel, clusternumber)
code, distance = vq(pixel,centroids)
centers_idx = numpy.reshape(code,(data.shape[0],data.shape[1]))
clustered = centroids[centers_idx]
# Write outraster to file
databand.WriteArray(clustered)
databand.FlushCache()
#Close file
databand = None
clustered = None
ds = None
开发者ID:NatPi,项目名称:RemoteSensing,代码行数:30,代码来源:GlacierSurfaceType_kmeans.py
示例16: connected_regions
def connected_regions(image):
"""
Converts image into grayscale, quantizes, counts connected regions
"""
# render_image(image)
colors = 2
# Quantization into two colors
image_rgb = np.dstack(image)
pixels = np.reshape(
image_rgb,
(image_rgb.shape[0] * image_rgb.shape[1], image_rgb.shape[2])
)
centroids, _ = vq.kmeans(pixels, colors)
quantized, _ = vq.vq(pixels, centroids)
quantized_idx = quantized.reshape(
(image_rgb.shape[0], image_rgb.shape[1])
)
if len(centroids) > 1:
# for_render = (quantized_idx * 255).astype(np.uint8)
# render_image(for_render)
regions = len(region_sizes(quantized_idx))
regions_inverted = len(region_sizes(1 - quantized_idx))
# import pdb; pdb.set_trace()
# if regions == 0:
# regions = image[0].shape[0] * image[0].shape[1]
# print regions
return max([regions, regions_inverted])
else:
return 0
开发者ID:tomasra,项目名称:ga_sandbox,代码行数:33,代码来源:analysis.py
示例17: kmeans
def kmeans(iData, clustNumber, oPrefix, norm=False):
'''Perform k-means cluster analysis and return MAP of zones'''
print 'Run K-Means'
height, width = iData.shape[1:3]
#reshape 3D cube of data into 2D matrix and get indeces of valid pixels
iData, notNanDataI = cube2flat(iData)
if norm:
#center and norm
iDataMean = iData[:, notNanDataI].mean(axis=1)
iDataStd = iData[:, notNanDataI].std(axis=1)
iData = np.subtract(iData.T, iDataMean).T
iData = np.divide(iData.T, iDataStd).T
#perform kmeans on valid data and return codebook
codeBook = vq.kmeans(iData[:, notNanDataI].astype('f8').T, clustNumber)[0]
#perform vector quantization of input data uzing the codebook
#return vector of labels (for each valid pixel)
labelVec = vq.vq(iData[:, notNanDataI].astype('f8').T, codeBook)[0]+1
#create and fill MAP of zones
zoneMap = np.zeros(width*height) + np.nan
zoneMap[notNanDataI] = labelVec
zoneMap = zoneMap.reshape(height, width)
#visualize map of zones
plt.imsave(oPrefix + 'zones.png', zoneMap)
return zoneMap
开发者ID:geogradient,项目名称:zoning,代码行数:28,代码来源:zoning.py
示例18: main
def main():
args = get_args()
# This catches files sent in with stdin
if isinstance(args.infile, TextIOWrapper):
data = JSONFile(args.infile, True)
else:
data = args.infile
points = np.array([
[point.get('lon'), point.get('lat')]
for point in data
])
# In testing, found that a higher number of iterations led to less
# errors due to missing centroids (Note: whitening led to worse results)
centroids, distortion = kmeans(points, args.number_of_vans, 2000)
index, distortion = vq(points, centroids)
vans = [[] for _ in range(args.number_of_vans)]
for i, point in enumerate(data):
vans[index[i]].append(point)
vans = distribute(vans, len(data), centroids)
create_output(args.outfile, vans)
开发者ID:adregan,项目名称:point-grouping-test,代码行数:27,代码来源:main.py
示例19: spectral_clustering
def spectral_clustering(W, k):
# ====================== ADD YOUR CODE HERE ======================
# Instructions: Perform spectral clustering to partition the
# data into k clusters. Implement the steps that
# are described in Algorithm 2 on the assignment.
L = diag(sum(W, axis=0)) - W
w, v = linalg.eig(L)
y = real(v[:, w.argsort()[:k]])
clusters, _ = kmeans(y, k)
labels = zeros(y.shape[0])
for i in range(y.shape[0]):
dist = inf
for j in range(k):
distance = euclideanDistance(y[i], clusters[j])
if distance < dist:
dist = distance
labels[i] = j
# =============================================================
return labels
开发者ID:omoindrot,项目名称:INF582,代码行数:25,代码来源:spectralClustering.py
示例20: train
def train(self, featurefiles, k=100, subsampling=10):
"""Train a vocabulary from features in files listed in |featurefiles| using
k-means with k words. Subsampling of training data can be used for speedup.
"""
image_count = len(featurefiles)
descr = []
descr.append(sift.read_features_from_file(featurefiles[0])[1])
descriptors = descr[0] # Stack features for k-means.
for i in numpy.arange(1, image_count):
descr.append(sift.read_features_from_file(featurefiles[i])[1])
descriptors = numpy.vstack((descriptors, descr[i]))
# Run k-means.
self.voc, distortion = vq.kmeans(descriptors[::subsampling, :], k, 1)
self.word_count = self.voc.shape[0]
# Project training data on vocabulary.
imwords = numpy.zeros((image_count, self.word_count))
for i in range(image_count):
imwords[i] = self.project(descr[i])
occurence_count = numpy.sum((imwords > 0)*1, axis=0)
self.idf = numpy.log(image_count / (occurence_count + 1.0))
self.trainingdata = featurefiles
开发者ID:PhilomontPhlea,项目名称:PhleaBytesCV,代码行数:26,代码来源:vocabulary.py
注:本文中的scipy.cluster.vq.kmeans函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论