本文整理汇总了Python中sklearn.cluster.KMeans类的典型用法代码示例。如果您正苦于以下问题:Python KMeans类的具体用法?Python KMeans怎么用?Python KMeans使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了KMeans类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: Kmeans_cluster_analysis
def Kmeans_cluster_analysis(x,y,n_clusters):
X = np.hstack((x.reshape((x.shape[0],1)),y.reshape((y.shape[0],1))))
X = Scaler().fit_transform(X)
km = KMeans(n_clusters)
km.fit(X)
labels = km.labels_
cluster_centers = km.cluster_centers_
labels_unique = set(labels) #np.unique(labels)
n_clusters_ = len(labels_unique)
#print("number of estimated clusters : %d" % n_clusters_)
colors = 'bgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbgrcmyk' #cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
#colors = pl.cm.Spectral(np.linspace(0, 1, len(labels_unique)))
for i in xrange(len(labels_unique)):
my_members = labels == i
cluster_center = cluster_centers[i]
plt.scatter(X[my_members, 0], X[my_members, 1],s=90,c=colors[i],alpha=0.7)
plt.scatter(cluster_center[0], cluster_center[1],marker='+',s=280,c=colors[i])
tolx = (X[:,0].max()-X[:,0].min())*0.03
toly = (X[:,1].max()-X[:,1].min())*0.03
plt.xlim(X[:,0].min()-tolx,X[:,0].max()+tolx)
plt.ylim(X[:,1].min()-toly,X[:,1].max()+toly)
plt.show()
return labels
开发者ID:armatita,项目名称:GEOMS2,代码行数:25,代码来源:cerena_multivariate_utils.py
示例2: update_clusters
def update_clusters():
num_reviews = Review.objects.count()
update_step = ((num_reviews/100)+1) * 5
if num_reviews % update_step == 0:
# Create a sparse matrix from user reviews
all_usernames = map(lambda x: x.username, User.objects.only("username"))
all_wine_ids = set(map(lambda x: x.wine.id, Review.objects.only("wine")))
num_users = len(all_usernames)
# m is often used to denote a matrix
ratings_m = dok_matrix((num_users, max(all_wine_ids)+1), dtype=np.float32)
for i in range(num_users):
# each user corresponds to a row, in the order of all_usernames
user_reviews = Review.objects.filter(user_name=all_usernames[i])
for user_review in user_reviews:
ratings_m[i,user_review.wine.id] = user_review.rating
# Perform kmeans clustering
k = int(num_users / 10) + 2
kmeans = KMeans(n_clusters=k)
clustering = kmeans.fit(ratings_m.tocsr())
# Update clusters
Cluster.objects.all().delete()
new_clusters = {i: Cluster(name=i) for i in range(k)}
for cluster in new_clusters.values(): # clusters need to be saved before referring to users
cluster.save()
for i,cluster_label in enumerate(clustering.labels_):
new_clusters[cluster_label].users.add(User.objects.get(username=all_usernames[i]))
开发者ID:WilliamLynch,项目名称:wine-recommender,代码行数:28,代码来源:suggestions.py
示例3: csv_parser
def csv_parser(fileName):
data = open(fileName, 'rU').readlines()
outfile = fileName[:-4] + '_kmeans.csv'
fhout = open(outfile, 'w')
outfile = data[0].strip() + ',Label' + '\n'
fhout.write(outfile)
vaf = []
for line in data[1:]:
flds = line.split(',')
vaf.append([float(flds[7]), float(flds[8])])
print vaf[:5]
vaf_np = np.array(vaf)
print len(vaf_np)
print vaf_np[:5]
kmeansModel = KMeans(k=6, init='k-means++', n_init=100, max_iter=3000)
labels = kmeansModel.fit_predict(vaf_np)
## clustDist = model.transform(vaf_np)
print labels[:30]
for j in range(1, len(data)):
outline = data[j].strip() + ',' + str(labels[j-1]) + '\n'
fhout.write(outline)
fhout.close()
开发者ID:B-Rich,项目名称:gsinghal_python_src,代码行数:31,代码来源:VAF_plots_kmeans.py
示例4: fit
def fit(self, X):
"""
:param X:
:return:
"""
lcl = range(1, self._maxc+1)
# compute the fractal dimension
ldistorsion = []
for i in range(1, self._maxc+1):
cluster = KMeans(n_clusters=i, n_jobs=-1)
cluster.fit(X)
ldistorsion.append(within_scatter_matrix_score(X, cluster.labels_))
print(X.shape[1])
print(ldistorsion)
PCF = []
for x,y in zip(ldistorsion, lcl):
print(x,y, np.power(y, 2.0/X.shape[1]))
PCF.append(x * np.power(y, 2.0/X.shape[1]))
print(PCF)
self._M = np.argmin(PCF)
print(self._M)
开发者ID:allinox,项目名称:kemlglearn,代码行数:29,代码来源:Xu.py
示例5: treeGenerator
def treeGenerator(self, rootLabel, points,names):
# rootLabel is label of root
# points is list of Feature Vectors
# names is the name of the image corresponding Feature vector is in
# print rootLabel, len(points)
if len(points) < self.threshold:
self.adjancency[rootLabel]=[]
if rootLabel not in self.leafLabels:
self.leafLabels.append(rootLabel)
return
else:
localModel = KMeans(n_clusters = self.branches,n_jobs=4)
localModel.fit(points)
adj = []
localTree = {}
for i in localModel.cluster_centers_:
self.treeMap[self.nodes]=i
self.nodeImages[self.nodes]=[] # A map for node and the Images It has
localTree[tuple(i)]=self.nodes
adj.append(self.nodes)
self.nodes = self.nodes + 1
self.adjancency[rootLabel]=adj
localClusterPoints = [[] for i in range(self.branches)]
localClusterImgNames = [[] for i in range(self.branches)]
# A local array to store which FV is in which cluster
for i in range(len(points)):
localClusterPoints[localModel.labels_[i]].append(points[i])
localClusterImgNames[localModel.labels_[i]].append(names[i])
if names[i] not in self.nodeImages[localTree[tuple(localModel.cluster_centers_[localModel.labels_[i]])]]:
self.nodeImages[localTree[tuple(localModel.cluster_centers_[localModel.labels_[i]])]].append(names[i])
for i in range(self.branches):
thisClusterCenter = tuple(localModel.cluster_centers_[i])
self.treeGenerator(localTree[thisClusterCenter],localClusterPoints[i],localClusterImgNames[i])
开发者ID:manikantareddyd,项目名称:VisualWordRepresentation,代码行数:33,代码来源:tree.py
示例6: kmeans_cluster
def kmeans_cluster(G, graph_name, num_clusters):
subgraphs = []
#Find a way to figure out clusters number automatically
write_directory = os.path.join(Constants.KMEANS_PATH,graph_name)
if not os.path.exists(write_directory):
os.makedirs(write_directory)
nodeList = G.nodes()
matrix_data = nx.to_numpy_matrix(G, nodelist = nodeList)
kmeans = KMeans(init='k-means++', n_clusters=num_clusters, n_init=10)
kmeans.fit(matrix_data)
label = kmeans.labels_
clusters = {}
for nodeIndex, nodeLabel in enumerate(label):
if nodeLabel not in clusters:
clusters[nodeLabel] = []
clusters[nodeLabel].append(nodeList[nodeIndex])
#countNodes is used to test whether we have all the nodes in the clusters
countNodes = 0
for clusterIndex, subGraphNodes in enumerate(clusters.keys()):
subgraph = G.subgraph(clusters[subGraphNodes])
subgraphs.append(subgraph)
nx.write_gexf(subgraph, os.path.join(write_directory,graph_name+str(clusterIndex)+Constants.GEXF_FORMAT))
#countNodes = countNodes + len(clusters[subGraphNodes])
pass
return num_clusters
开发者ID:subincm,项目名称:hierarchical_nw_align,代码行数:27,代码来源:kmeans_cluster.py
示例7: make_tsne_plot
def make_tsne_plot(model, rel_wds, plot_lims, title):
dim = 30
X, keys = make_data_matrix(model)
# first we actually do PCA to reduce the
# dimensionality to make tSNE easier to calculate
X_std = StandardScaler().fit_transform(X)
sklearn_pca = PCA(n_components=2)
X = sklearn_pca.fit_transform(X_std)[:,:dim]
# do downsample
k = 5000
sample = []
important_words = []
r_wds = [word[0] for word in rel_wds]
for i, key in enumerate(keys):
if key in r_wds:
sample.append(i)
sample = np.concatenate((np.array(sample),
np.random.choice(len(keys), k-10, replace = False),
))
X = X[sample,:]
keys = [keys[i] for i in sample]
# Do tSNE
tsne = TSNE(n_components=2, random_state=0, metric="cosine")
X_transf = tsne.fit_transform(X)
k_means = KMeans(n_clusters=8)
labels = k_means.fit_predict(X_transf)
scatter_plot(X_transf[:,0], X_transf[:,1], rel_wds, labels, title, keys, plot_lims)
开发者ID:quinngroup,项目名称:sm_w2v,代码行数:35,代码来源:plot_utils.py
示例8: run_kmeans
def run_kmeans(gene_folder, n_clusters):
pars, fitness = load_all_generations_as_DataFrame(gene_folder)
kmeans = KMeans(n_clusters=n_clusters)
kmeans.fit(pars)
means = map(lambda c: fitness[kmeans.labels_ == c].mean()['longest_interval_within_margin'], range(n_clusters))
stds = map(lambda c: fitness[kmeans.labels_ == c].std()['longest_interval_within_margin'], range(n_clusters))
return kmeans, means, stds
开发者ID:halfdanrump,项目名称:MarketSimulation,代码行数:7,代码来源:data_analysis.py
示例9: create_fiveline
def create_fiveline(image):
edges = cv2.Canny(image, 50, 150, apertureSize=3)
ys = list()
minLineLength = 1
maxLineGap = 10
lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 70, minLineLength, maxLineGap)
for line in lines:
for x1, y1, x2, y2 in line:
cv2.line(image, (x1,y1), (x2,y2), (0, 255, 0), 2)
if (abs(y1 - y2 < 4)):
innerlist = list()
innerlist.append((y1 + y2) / 2)
ys.append(innerlist)
cv2.imwrite('images/houghlines.jpg', image)
display_image(image)
kmeans = KMeans(init='k-means++', n_clusters=5, n_init=10)
kmeans.fit(np.asarray(ys))
fiveline = list()
for innerlist in kmeans.cluster_centers_:
fiveline.append(innerlist[0])
fiveline.sort()
print "K-MEANS centers"
print fiveline
return fiveline
开发者ID:nikolalsvk,项目名称:note-play,代码行数:31,代码来源:note-play.py
示例10: partition_FOV_KMeans
def partition_FOV_KMeans(self,tradeoff_weight=.5,fx=.25,fy=.25,n_clusters=4,max_iter=500):
"""
Partition the FOV in clusters that are grouping pixels close in space and in mutual correlation
Parameters
------------------------------
tradeoff_weight:between 0 and 1 will weight the contributions of distance and correlation in the overall metric
fx,fy: downsampling factor to apply to the movie
n_clusters,max_iter: KMeans algorithm parameters
Outputs
-------------------------------
fovs:array 2D encoding the partitions of the FOV
mcoef: matric of pairwise correlation coefficients
distanceMatrix: matrix of picel distances
Example
"""
_,h1,w1=self.shape
self.resize(fx,fy)
T,h,w=self.shape
Y=np.reshape(self,(T,h*w))
mcoef=np.corrcoef(Y.T)
idxA,idxB = np.meshgrid(list(range(w)),list(range(h)));
coordmat=np.vstack((idxA.flatten(),idxB.flatten()))
distanceMatrix=euclidean_distances(coordmat.T);
distanceMatrix=old_div(distanceMatrix,np.max(distanceMatrix))
estim=KMeans(n_clusters=n_clusters,max_iter=max_iter);
kk=estim.fit(tradeoff_weight*mcoef-(1-tradeoff_weight)*distanceMatrix)
labs=kk.labels_
fovs=np.reshape(labs,(h,w))
fovs=cv2.resize(np.uint8(fovs),(w1,h1),old_div(1.,fx),old_div(1.,fy),interpolation=cv2.INTER_NEAREST)
return np.uint8(fovs), mcoef, distanceMatrix
开发者ID:agiovann,项目名称:Constrained_NMF,代码行数:35,代码来源:movies.py
示例11: perform_cluster_analysis
def perform_cluster_analysis(dataset):
filename = 'elbow_plot.dat'
if os.path.exists(cpath + filename):
data = joblib.load(cpath + filename)
K = data[0]
meandistortions = data[1]
else:
X = dataset
print 'X Shape: ', X.shape
#K = range(1, 50, 5)
K = [1, 2, 5, 10, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
#K = [1, 2, 5, 10, 50, 100]
meandistortions = []
cluster_centers = []
for k in K:
print k
kmeans = KMeans(n_clusters=k, n_jobs=3)
kmeans.fit(X)
#import ipdb; ipdb.set_trace() # debugging code
#meandistortions.append(sum(np.min(cdist(X, kmeans.cluster_centers_, 'euclidean'), axis=1))/X.shape[0])
meandistortions.append(kmeans.inertia_)
cluster_centers.append(kmeans.cluster_centers_)
#print 'k: ', k, ' Cluster Centers: ', kmeans.cluster_centers_
data = [K, meandistortions]
joblib.dump(data, cpath + filename, compress=8)
plot_name = "elbow_plot.png"
title = 'Selecting k with the Elbow Method'
xlabel = 'Number of Clusters (k)'
ylabel = 'Average Distortion'
xyplot(K, meandistortions, 0, 0, 0, 0, title, xlabel, ylabel, staticpath + plot_name, line=1, y_log=0)
开发者ID:tilanukwatta,项目名称:scicano,代码行数:34,代码来源:arxiv_analysis_v3.py
示例12: reduce_colors
def reduce_colors(image, n_clusters):
image = img_as_float(image)
height = len(image)
width = len(image[0])
image = image.reshape((height*width,3))
image_mean = {}
image_median = {}
kmeans = KMeans(n_clusters = n_clusters, init='k-means++', random_state=241)
classes = kmeans.fit_predict(image)
means, medians = [], []
for cl in range(n_clusters):
means.append( np.mean(image[classes == cl], axis = 0))
medians.append( np.median(image[classes == cl], axis = 0))
image_mean = image.copy().astype(float)
image_median = image.copy().astype(float)
for cl in range(n_clusters):
image_mean[classes == cl] = means[cl]
image_median[classes == cl] = medians[cl]
logging.info('Clusters: %s, PSNR(mean): %s, PSRN(median): %s'%(n_clusters, PSNR(image, image_mean), PSNR(image, image_median)))
image_mean = image_mean.reshape(height,width,3)
string_image = StringIO()
plt.imsave(string_image, image_mean)
return string_image
开发者ID:smblance,项目名称:photoclust,代码行数:32,代码来源:clustering.py
示例13: makecluster
def makecluster():
n_points=6
n_dim=2
n_clusters=6
model=KMeans(init='k-means++',n_clusters=4,n_init=10)
data=np.zeros((16,2))
#print data
data1=np.array(temp)
data[0:4,:]=2
data[4:8,:]=1
data[8:12:,:]=-1
data[12:16,:]=-2
data[(0,4,8,12),1]=2
data[(1,5,9,13),1]=1
data[(2,6,10,14),1]=-1
data[(3,7,11,15),1]=-2
#data[3,1]=2
#data[4,1]=3
#data[5,1]=2
#data[0,1]=3
model.fit(data1)
print data1
print model.labels_
开发者ID:vamsikrishnacs,项目名称:PERSONALISEDTUTORING,代码行数:28,代码来源:readfromfile.py
示例14: findColor
def findColor(frame):
t = time()
# dim = np.array(frame.size)/2
# frame.thumbnail(dim, Image.ANTIALIAS)
# print "Thumbnail in %0.3f seconds." % (time() - t)
# t = time()
points = imresize(np.array(frame, dtype=np.float64), 0.3)
w,h,d = points.shape
data = np.reshape(points, (w*h, d))
sample = shuffle(data, random_state=0)[:len(data)/3]
print "Reshape and shuffle in %0.3f seconds." % (time() - t)
t = time()
kmeans = KMeans(n_clusters=k_colors, n_jobs=jobs).fit(sample)
labels = kmeans.predict(data)
print "Fit and predict in %0.3f seconds." % (time() - t)
t = time()
colors = [map(int, color) for color in kmeans.cluster_centers_]
# hsvs = np.array([rgb_to_hsv(*values) for values in colors])
# frequent = np.argmax(hsvs[:,1])
# frequent = colors[frequent]
print "Found in %0.3f seconds." % (time() - t)
frequents = defaultdict(int)
for l in labels:
frequents[l] += 1
frequents = sorted(frequents.items(), key=lambda x:x[1], reverse=True)
frequents = [colors[i[0]] for i in frequents[:3]]
# print "Counted in %0.3f seconds." % (time() - t)
# print "Top 3 colors [RGB]: ", frequents[:3]
return frequents[2] if len(frequents) == 3 else frequents[0]
开发者ID:Jeffery-W,项目名称:movie-segment,代码行数:29,代码来源:processFrame.py
示例15: match_line_cluster
def match_line_cluster(gdf1, gdf2):
"""
Try to match two layers of linestrings with KMeans cluster analysis based
on a triplet of descriptive attributes :
(centroid coords., rounded length, approximate bearing)
Parameters
----------
gdf1: GeoDataFrame
The reference dataset.
gdf2: GeoDataFrame
The collection of LineStrings to match.
Returns
-------
matching_table: pandas.Series
A table (index-based on *gdf1*) containing the id of the matching
feature found in *gdf2*.
"""
param1, param2 = list(map(mparams, [gdf1, gdf2]))
k_means = KMeans(init='k-means++', n_clusters=len(gdf1),
n_init=10, max_iter=1000)
k_means.fit(np.array((param1+param2)))
df1 = pd.Series(k_means.labels_[len(gdf1):])
df2 = pd.Series(k_means.labels_[len(gdf1):])
# gdf1['fid_layer2'] = \
# df1.apply(lambda x: df2.where(gdf2['key'] == x).notnull().nonzero()[0][0])
return pd.DataFrame(
index=list(range(len(gdf1))),
data=df1.apply(
lambda x: df2.where(df2 == x).notnull().nonzero())
)
开发者ID:mthh,项目名称:gpd_lite_toolbox,代码行数:32,代码来源:core.py
示例16: iris_h2o_vs_sciKmeans
def iris_h2o_vs_sciKmeans(ip,port):
# Connect to a pre-existing cluster
h2o.init(ip,port) # connect to localhost:54321
iris_h2o = h2o.import_frame(path=h2o.locate("smalldata/iris/iris.csv"))
iris_sci = np.genfromtxt(h2o.locate("smalldata/iris/iris.csv"), delimiter=',')
iris_sci = iris_sci[:,0:4]
s =[[4.9,3.0,1.4,0.2],
[5.6,2.5,3.9,1.1],
[6.5,3.0,5.2,2.0]]
start = h2o.H2OFrame(s)
start_key = start.send_frame()
h2o_km = h2o.kmeans(x=iris_h2o[0:4], k=3, user_points=start_key, standardize=False)
sci_km = KMeans(n_clusters=3, init=np.asarray(s), n_init=1)
sci_km.fit(iris_sci)
# Log.info("Cluster centers from H2O:")
print "Cluster centers from H2O:"
h2o_centers = h2o_km.centers()
print h2o_centers
# Log.info("Cluster centers from scikit:")
print "Cluster centers from scikit:"
sci_centers = sci_km.cluster_centers_.tolist()
print sci_centers
for hcenter, scenter in zip(h2o_centers, sci_centers):
for hpoint, spoint in zip(hcenter,scenter):
assert (hpoint- spoint) < 1e-10, "expected centers to be the same"
开发者ID:OspreyX,项目名称:h2o-dev,代码行数:33,代码来源:pyunit_iris_h2o_vs_sciKmeans.py
示例17: showClustering
def showClustering(data):
kmeans = KMeans()
kmeans.fit(data)
labels = kmeans.labels_
uniqueLabels = numpy.unique(labels)
nCluster = len(uniqueLabels)
centers = kmeans.cluster_centers_
import matplotlib.pyplot as plt
from itertools import cycle
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
plt.figure(1)
plt.clf()
for center in centers:
print center
for k,col in zip(range(nCluster),colors):
members = labels == k
print "plotting %dth cluster" % k
print "label type" ,labels, type(labels)
print "members are:", members, type(members)
print "data[members,0]",data[members,0],type(data[members,0])
center = centers[k]
plt.plot(data[members,0],data[members,1],col +'.')
plt.plot(center[0],center[1],'o',markerfacecolor=col,
markeredgecolor = 'k',markersize = 14)
plt.title("clusters")
plt.show()
开发者ID:proboscis,项目名称:GradProject,代码行数:26,代码来源:clustering.py
示例18: main
def main():
songIds = open("songIDsofFirst100Users.txt","r")
try:
for line in songIds:
songIDsToCluster.append(int(line))
finally:
songIds.close()
print len(songIDsToCluster)
f= sio.loadmat('/home/dmitriy/workspace/MLFinalProject/MatlabFiles/finalVectors.mat')
full = np.nan_to_num(np.matrix(f['finalVectors']))
# fullSplit = np.array_split(full, 360)
# print("Done Reading")
# mtx = fullSplit[0]
# print(len(mtx))
mtx = full[songIDsToCluster]
mtx /= np.max(np.abs(mtx),axis=0)
for clusters in range(25,50):
errors = 0
num_clusters = clusters
ClusteringKmeans = KMeans(n_clusters=num_clusters)
ClusteringKmeans.fit(mtx)
result = ClusteringKmeans.labels_
#silhouette = metrics.silhouette_score(mtx,result,metric='euclidean')
#plot(mtx,result)
writeSongIDandClusterToFile(result,clusters)
print("Clusters:", clusters, "Retest Error:", errors)
开发者ID:dmiafa,项目名称:mlFinalProject,代码行数:29,代码来源:clusterSongVectors.py
示例19: kmeans_clustering
def kmeans_clustering(matrix, N):
km = KMeans(n_clusters=N, n_jobs=-1)
clusters = km.fit_predict(matrix)
res = [[] for _ in range(N) ]
for i, c in enumerate(clusters):
res[c].append(i)
return res
开发者ID:seba-1511,项目名称:specialists,代码行数:7,代码来源:specialist.py
示例20: AdvancedModel
class AdvancedModel():
clusters = []
# price class regression
price_reg = LinearRegression()
def fit(self, X_train, y_train, n_clusters=4):
y_train_mat = np.array(y_train).reshape((-1,1))
# 1. determine clusters
self.km = KMeans(n_clusters=5)
self.km.fit(y_train_mat)
clusters = self.km.cluster_centers_
cluster_indices = self.km.predict(y_train_mat)
print(clusters)
# 2. fit naive bayes
#self.nb.fit(X_train, ...)
#self
# 3. train regression model
#price_reg.fit
def predict(self, X):
pass
def get_weights(self):
return np.append(self.price_reg.coef_, [self.price_reg.intercept_])
def set_weights(self, w):
self.price_reg.coef_ = w[:-1]
self.price_reg.intercept_ = w[-1]
开发者ID:DaanSeynaeve,项目名称:capita_CPmeetsML,代码行数:33,代码来源:advanced_model.py
注:本文中的sklearn.cluster.KMeans类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论