本文整理汇总了Python中sklearn.cluster.MiniBatchKMeans类的典型用法代码示例。如果您正苦于以下问题:Python MiniBatchKMeans类的具体用法?Python MiniBatchKMeans怎么用?Python MiniBatchKMeans使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了MiniBatchKMeans类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: DocDescriptor
class DocDescriptor(object):
def __init__(self, word_descriptor, n_clusters = 1000):
self._n_clusters = n_clusters
self._cluster = MiniBatchKMeans(n_clusters=n_clusters,verbose=1,max_no_improvement=None,reassignment_ratio=1.0)
self._word_descriptor = word_descriptor
def get_word_descriptor(self, img):
X = get_features_from_image(img)
words = []
for i in X:
words.append(self._word_descriptor.transform(i))
return words
def partial_fit(self, img):
X = self.get_word_descriptor(img)
self._cluster.partial_fit(X)
def transform(self, img):
X = self.get_word_descriptor(img)
Y = self._cluster.predict(X)
desc = [0]*self._n_clusters
unit = 1.0/self._n_clusters
for i in range(0, len(Y)):
desc[Y[i]] += unit
return desc
开发者ID:caoym,项目名称:odr,代码行数:26,代码来源:odr.py
示例2: ClusteringEnsemble
class ClusteringEnsemble(BaseEstimator):
def __init__(self, estimator_const=LinearRegression, n_clusters=2):
self.estimator_const_ = estimator_const
self.n_clusters_ = n_clusters
self.clustering = MiniBatchKMeans(n_clusters=self.n_clusters_)
def get_params(self, deep=True):
return { "n_clusters": self.n_clusters_}
def fit(self, X, y):
print("Training KMeans")
colors = self.clustering.fit_predict(X).reshape(X.shape[0])
print("Training Estimators")
# each estimator is assigned to one cluster
self.estimators = [self.estimator_const_() for i in range(self.n_clusters_)]
for i in range(self.n_clusters_):
rows = colors == i
self.estimators[i].fit(X[rows], y[rows])
def predict(self, X):
y = np.zeros(X.shape[0])
print("Predicting clusters")
colors = self.clustering.predict(X)
print("Estimating results")
for i in range(self.n_clusters_):
rows = colors == i
y[rows] = self.estimators[i].predict(X[rows])
return y
开发者ID:Patechoc,项目名称:labs-untested,代码行数:32,代码来源:estimators.py
示例3: _run_cluster
def _run_cluster(origin_list, cluster_num = 8, batch_size=100,resize=(64,64)):
clf = MiniBatchKMeans(n_clusters=cluster_num,batch_size=batch_size)
def next_batch(allfiles,batch_size):
imgs = []
inds = []
for ind,(path,label) in enumerate(allfiles):
img = Image.open(path).convert("L")
img = img.resize(size=resize,Image.ANTIALIAS)
img = np.reshape(np.array(img),(1,-1)).astype(np.float32) / 255.0
imgs.append(img)
inds.append(ind)
if len(imgs) >= batch_size:
yield np.vstack(imgs), inds
imgs = []
inds = []
if len(inds) > 0:
return np.vstack(imgs), inds
for _,batch in next_batch(origin_list,batch_size):
clf.partial_fit(batch)
cluster_dict = defaultdict(list)
for inds, batch in next_batch(origin_list, batch_size):
Ys = clf.predict(batch)
for y, ind in zip(Ys, inds):
path,label = origin_list[ind]
cluster_dict.setdefault(y,[]).append((path,label))
return cluster_dict
开发者ID:z01nl1o02,项目名称:tests,代码行数:27,代码来源:clusterOP.py
示例4: project
def project(self, ndim=None):
""" Projects the data object given to the constructor onto `ndim` dimensions
Parameters
----------
ndim : int
The number of dimensions we want to project the data on.
Returns
-------
dataTica : :class:`MetricData <htmd.metricdata.MetricData>` object
A new :class:`MetricData <htmd.metricdata.MetricData>` object containing the projected data
Example
-------
>>> tri = KMeansTri(data)
>>> datatri = tri.project(5)
"""
import scipy.spatial.distance as scidist
from sklearn.cluster import MiniBatchKMeans
from htmd.metricdata import MetricData
datconcat = np.concatenate(self.data.dat)
mb = MiniBatchKMeans(n_clusters=ndim)
mb.fit(datconcat)
# TODO: Could make it into a loop to waste less memory
dist = scidist.cdist(datconcat, mb.cluster_centers_)
dist = np.mean(dist, axis=1)[:, np.newaxis] - dist
dist[dist < 0] = 0
return MetricData(dat=self.data.deconcatenate(dist), ref=self.data.ref, simlist=self.data.simlist,
fstep=self.data.fstep, parent=self.data)
开发者ID:jeiros,项目名称:htmd,代码行数:33,代码来源:kmeanstri.py
示例5: extract_spatial_pyramid
def extract_spatial_pyramid(images, dataset, vq=None, n_words=1000):
descriptors, locations = sift_descriptors(images, dataset)
if vq is None:
vq = MiniBatchKMeans(n_clusters=n_words, verbose=1, init='random',
batch_size=2 * n_words, compute_labels=False,
reassignment_ratio=0.0, random_state=1, n_init=3)
#vq = KMeans(n_clusters=n_words, verbose=10, init='random')
vq.fit(shuffle(np.vstack(descriptors)))
else:
n_words = vq.n_clusters
pyramids = []
for descr, locs in zip(descriptors, locations):
words = vq.predict(descr)
global_ = np.bincount(words, minlength=n_words).astype(np.float)
global_ /= max(global_.sum(), 1)
third_of_image = locs[1].max() // 3 + 1
stripe_indicator = locs[1] // third_of_image
inds = np.vstack([stripe_indicator, words])
stripe_hists = sparse.coo_matrix((np.ones(len(words)), inds),
shape=(3, n_words)).toarray()
stripe_hists = [x / max(x.sum(), 1) for x in stripe_hists]
pyramids.append(np.hstack([np.hstack(stripe_hists), global_]))
return vq, np.vstack(pyramids)
开发者ID:amueller,项目名称:segmentation,代码行数:26,代码来源:bow.py
示例6: initializeWeight
def initializeWeight(D, type, N_OUT):
# Here we first whiten the data (PCA or ZCA) and then optionally run k-means
# on this whitened data.
import numpy as np
if D.shape[0] < N_OUT:
print( " Not enough data for '%s' estimation, using elwise"%type )
return np.random.normal(0, 1, (N_OUT,D.shape[1]))
D = D - np.mean(D, axis=0, keepdims=True)
# PCA, ZCA, K-Means
assert type in ['pca', 'zca', 'kmeans', 'rand'], "Unknown initialization type '%s'"%type
C = D.T.dot(D)
s, V = np.linalg.eigh(C)
# order the eigenvalues
ids = np.argsort(s)[-N_OUT:]
s = s[ids]
V = V[:,ids]
s[s<1e-6] = 0
s[s>=1e-6] = 1. / np.sqrt(s[s>=1e-6]+1e-3)
S = np.diag(s)
if type == 'pca':
return S.dot(V.T)
elif type == 'zca':
return V.dot(S.dot(V.T))
# Whiten the data
wD = D.dot(V.dot(S))
wD /= np.linalg.norm(wD, axis=1)[:,None]
if type == 'kmeans':
# Run k-means
from sklearn.cluster import MiniBatchKMeans
km = MiniBatchKMeans(n_clusters = wD.shape[1], batch_size=10*wD.shape[1]).fit(wD).cluster_centers_
elif type == 'rand':
km = wD[np.random.choice(wD.shape[0], wD.shape[1], False)]
C = km.dot(S.dot(V.T))
C /= np.std(D.dot(C.T), axis=0, keepdims=True).T
return C
开发者ID:ShawnXUNJU,项目名称:magic_init,代码行数:35,代码来源:magic_init.py
示例7: make_cluster
def make_cluster(datasets):
num_clusters = 5
lsa_dim = 500
max_df = 0.8
max_features = 10000
minibatch = True
print("datasets are %(datasets)s" % locals())
km = MiniBatchKMeans(n_clusters=num_clusters, init='k-means++',
batch_size=1000, n_init=10, max_no_improvement=10, verbose=True)
km.fit(datasets)
labels = km.labels_
transformed = km.transform(x)
dists = np.zeros(labels.shape)
for i in range(len(labels)):
dists[i] = transformed[i, labels[i]]
clusters = []
for i in range(num_clusters):
cluster = []
ii = np.where(labels == i)[0]
dd = dists[ii]
di = np.vstack([dd, ii]).transpose().tolist()
di.sort()
for d, j in di:
cluster.append(datasets[int(j)])
clusters.append(cluster)
return clusters
开发者ID:id774,项目名称:sandbox,代码行数:30,代码来源:cluster_with_dataset.py
示例8: clustering
def clustering(self, X, NUM_CLUSTERS, MINIBATCH):
'''
k平均法によってクラス分け
'''
if MINIBATCH:
km = MiniBatchKMeans(n_clusters = NUM_CLUSTERS,
init='k-means++', batch_size=1000,
n_init=10, max_no_improvement=10)
else:
km = KMeans(n_clusters=NUM_CLUSTERS, init='k-means++', n_init=1)
km.fit(X)
transformed = km.transform(X) #商品の各クラスの中心への距離
labels = km.labels_
dists = []
for i in range(len(labels)):
dists.append(transformed[i, labels[i]]) #商品の属するクラスの中心への距離
labels = DataFrame(labels)
dists = DataFrame(dists)
labels.columns = ['label']
dists.columns = ['dists']
self.data = pd.concat([labels, dists, self.data], axis=1) #元のデータにラベルを加える
return km
开发者ID:takeru-nitta,项目名称:auction,代码行数:27,代码来源:clustering.py
示例9: define_clusters
def define_clusters(projections):
"""
Creates several different clusterings of the data in projections.
:param projections: dict(string, (2 x Num_Samples) numpy.ndarray)
dictionary mapping the projection type (e.g. "tSNE") to an array containing
the two-dimensional coordinates for each sample in the projection.
:return: dict of string (projection name) =>
(dict of string (cluster technique) => np.ndarray of size N_Samples (cluster assignments))
"""
pbar = ProgressBar(4 * len(projections));
out_clusters = dict();
for key in projections:
proj_data = projections[key];
proj_clusters = dict();
# K-means for k = 2-5
for k in range(2, 6):
clust_name = "K-Means, k=" + str(k);
kmeans = MiniBatchKMeans(n_clusters=k);
clust_assignments = kmeans.fit_predict(proj_data.T);
proj_clusters.update({clust_name: clust_assignments});
pbar.update();
out_clusters.update({key: proj_clusters});
pbar.complete();
return out_clusters;
开发者ID:shakea02,项目名称:FastProject,代码行数:34,代码来源:Projections.py
示例10: clusterize
def clusterize(self):
X = np.ndarray((len(self.real_power_data), 2))
X[:, 0] = self.real_power_data
X[:, 1] = self.reac_power_data
clustering = MiniBatchKMeans(self.spinbox_cluster.value())
clustering.fit(X)
# Identifica os centróides dos clusteres
centroids = clustering.cluster_centers_.tolist()
# Conta quantos elementos cada cluster possui
predictions = clustering.predict(X)
occurrences = Counter(predictions)
# Identifica os clusteres que possuem somente 1 elemento
# (serão tratados como clusteres de transição)
transition_clusters = [k for k, v in occurrences.iteritems()
if v < 2]
# Remove os centróides de clusteres de transição
centroids = [e for i, e in enumerate(centroids)
if i not in transition_clusters]
predictions = [-1 if v in transition_clusters else v
for v in predictions]
self.prototypes = centroids
self.plot_clusterized(predictions)
开发者ID:pauloborges,项目名称:tg,代码行数:29,代码来源:signature.py
示例11: main
def main():
if len(sys.argv) != 4:
print(__doc__)
return 1
infiles = glob(sys.argv[1])
outfile = sys.argv[2]
K = int(sys.argv[3])
print("Reading in", len(infiles), "files")
fullarr = np.loadtxt(fileinput.input(infiles), delimiter = '\t')[:,:-7]
summary_stats = None
stats_file = '/n/fs/gcf/dchouren-repo/COS513-Finance/summary_stats/stats2'
with open(stats_file, 'rb') as inf:
summary_stats = np.loadtxt(inf)
stds = summary_stats[:len(summary_stats)/2]
means = summary_stats[len(summary_stats)/2:]
fullarr = (fullarr - means) / stds
print("Learning MiniBatchKMeans with K =", K)
km = MiniBatchKMeans(n_clusters = K, verbose = True) # TODO max_iter
km.fit(fullarr)
print("KMeans trained, saving")
with open(outfile, 'wb') as out_model:
pickle.dump(km, out_model)
print("Score:", km.score(fullarr))
return 0
开发者ID:Alex223124,项目名称:COS513-Finance,代码行数:34,代码来源:clustering.py
示例12: color_quantization_sk
def color_quantization_sk(image, clusters):
# load the image and grab its width and height
(h, w) = image.shape[:2]
# convert the image from the RGB color space to the L*a*b*
# color space -- since we will be clustering using k-means
# which is based on the euclidean distance, we'll use the
# L*a*b* color space where the euclidean distance implies
# perceptual meaning
image = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
# reshape the image into a feature vector so that k-means
# can be applied
image = image.reshape((image.shape[0] * image.shape[1], 3))
# apply k-means using the specified number of clusters and
# then create the quantized image based on the predictions
clt = MiniBatchKMeans(n_clusters = clusters)
labels = clt.fit_predict(image)
quant = clt.cluster_centers_.astype("uint8")[labels]
# reshape the feature vectors to images
quant = quant.reshape((h, w, 3))
# convert from L*a*b* to RGB
quant = cv2.cvtColor(quant, cv2.COLOR_LAB2BGR)
return quant
开发者ID:simama,项目名称:RealSense,代码行数:27,代码来源:mono_avoid.py
示例13: test_minibatch_reassign
def test_minibatch_reassign():
# Give a perfect initialization, but a large reassignment_ratio,
# as a result all the centers should be reassigned and the model
# should not longer be good
for this_X in (X, X_csr):
mb_k_means = MiniBatchKMeans(n_clusters=n_clusters, batch_size=1,
random_state=42)
mb_k_means.fit(this_X)
centers_before = mb_k_means.cluster_centers_.copy()
try:
old_stdout = sys.stdout
sys.stdout = StringIO()
# Turn on verbosity to smoke test the display code
_mini_batch_step(this_X, (X ** 2).sum(axis=1),
mb_k_means.cluster_centers_,
mb_k_means.counts_,
np.zeros(X.shape[1], np.double),
False, random_reassign=True, random_state=42,
reassignment_ratio=1, verbose=True)
finally:
sys.stdout = old_stdout
centers_after = mb_k_means.cluster_centers_.copy()
# Check that all the centers have moved
assert_greater(((centers_before - centers_after)**2).sum(axis=1).min(),
.2)
开发者ID:Solvi,项目名称:scikit-learn,代码行数:25,代码来源:test_k_means.py
示例14: train
def train(self, featurefiles, k=100, subsampling=10):
nbr_images = len(featurefiles)
descr = []
descr.append(sift.read_features_from_file(featurefiles[0])[1])
descriptors = descr[0]
print "begin loading image feature files..."
for i in np.arange(1, nbr_images):
descr.append(sift.read_features_from_file(featurefiles[i])[1])
# descriptors = np.vstack((descriptors, descr[i]))
descriptors = np.vstack((descriptors, descr[i][::subsampling,:]))
if i%100 == 0:
print i, "images have been loaded..."
print "finish loading image feature files!"
# self.voc, distortion = cluster.kmeans(descriptors[::subsampling,:], k, 1)
print "begin MiniBatchKMeans cluster....patient"
mbk = MiniBatchKMeans(k, init="k-means++", compute_labels=False, n_init=3, init_size=3*k)
# mbk.fit(descriptors[::subsampling,:])
mbk.fit(descriptors)
self.voc = mbk.cluster_centers_
print "cluster finish!"
self.nbr_word = self.voc.shape[0]
imwords = np.zeros((nbr_images, self.nbr_word))
for i in xrange(nbr_images):
imwords[i] = self.project(descr[i])
nbr_occurences = np.sum((imwords > 0)*1, axis=0)
self.idf = np.log( (1.0*nbr_images) / (1.0*nbr_occurences+1) )
self.traindata = featurefiles
开发者ID:yangxian10,项目名称:CBIR_py,代码行数:29,代码来源:visual_word.py
示例15: __init__
class MiniCluster:
def __init__(self, nclusters=1000, psize=16):
self.psize = 16
self.patch_size = (self.psize, self.psize)
self.nclusters = nclusters
self.rng = np.random.RandomState(0)
self.kmeans = MiniBatchKMeans(n_clusters=nclusters, random_state=self.rng, verbose=True)
def fit(self, images):
buffer = []
index = 1
t0 = time.time()
# The online learning part: cycle over the whole dataset 4 times
index = 0
passes = 10
for _ in range(passes):
for img in images:
data = extract_patches_2d(img, self.patch_size, max_patches=15,
random_state=self.rng)
data = np.reshape(data, (len(data), -1))
#This casting is only needed for RGB data
#buffer.append(data.astype(float))
buffer.append(data)
index += 1
#if index % 1000 == 0:
if index % (self.nclusters * 2) == 0:
data = np.concatenate(buffer, axis=0)
data = gcn(data)
data = whiten(data)
self.kmeans.partial_fit(data)
buffer = []
dt = time.time() - t0
print('done in %.2fs.' % dt)
开发者ID:seanv507,项目名称:sklearn,代码行数:35,代码来源:mini_batch_clusters.py
示例16: getCluster
def getCluster(X,k,M,opts):
# M: knnNum
# t0 = time()
# print("knn graph")
knn_graph = None
# knn_graph = kneighbors_graph(X, M)
# print("knn graph done in %0.3fs" % (time() - t0))
# outfile.write("knn graph done in %0.3fs\n" % (time() - t0))
# aggl = AgglomerativeClustering(linkage='ward', connectivity=knn_graph, n_clusters=k)
if opts.minibatch:
km = MiniBatchKMeans(n_clusters=k, init='k-means++', n_init=50,
init_size=1000, batch_size=1000, verbose=opts.verbose)
else:
km = KMeans(n_clusters=k, init='k-means++', max_iter=100, n_init=50,
verbose=opts.verbose)
#aggl = AgglomerativeClustering(linkage='ward', n_clusters=k)
print("Clustering sparse data with %s" % km)
# outfile.write("Clustering sparse data with %s\n" % aggl)
t0 = time()
km.fit(X)
print("done in %0.3fs" % (time() - t0))
# outfile.write("clustering done in %0.3fs\n" % (time() - t0))
print()
labels = km.labels_
clus2doc = {}
for i in range(len(labels)):
clus2doc[labels[i]] = clus2doc.get(labels[i],set())
clus2doc[labels[i]].add(i)
return (km,clus2doc,knn_graph)
开发者ID:alwayforver,项目名称:tweetNews,代码行数:30,代码来源:dataParser.py
示例17: VideoFrameReaders
def VideoFrameReaders(VideoDirectory):
cap = cv2.VideoCapture(VideoDirectory)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
fgbg = cv2.createBackgroundSubtractorMOG2()
timestamp = []
count = 0
try:
while cap.isOpened():
ret,frame = cap.read()
time = cap.get(0) #get the frame in seconds
timestamp.append(time)
print timestamp
if frame == None:
break;
# frame = cv2.cvtColor(frame,cv2.COLOR_RGB2GRAY)
image = frame.reshape((frame.shape[0]*frame.shape[1],3))
K = 4
clf = MiniBatchKMeans(K)
#predict cluster labels and quanitize each color based on the labels
cls_labels = clf.fit_predict(image)
print cls_labels
cls_quant = clf.cluster_centers_astype("uint8")[labels]
except EOFError:
pass
开发者ID:saminaji,项目名称:CellMigration,代码行数:30,代码来源:kmeans_seg.py
示例18: cluster_function
def cluster_function(user_np):
##############################################################################
# Compute clustering with Means
if len(user_np) < 10 :
n_cl = 2
elif len(user_np) <= 100 :
n_cl = 10
elif len(user_np) <= 500 :
n_cl = 15
elif len(user_np) <= 1000 :
n_cl = 20
else :
n_cl = 30
k_means = MiniBatchKMeans(n_clusters=n_cl, init='k-means++', max_iter=100, batch_size=100, verbose=0, compute_labels=True,
random_state=None, tol=0.0, max_no_improvement=10, init_size=None, n_init=3, reassignment_ratio=0.01)
t0 = time.time()
k_means.fit(user_np)
t_batch = time.time() - t0
print "Batch running time : ", t_batch
k_means_labels = k_means.labels_
#prediction = k_means.predict(user_np)
return k_means_labels
开发者ID:jimdsouza89,项目名称:Entertainment-and-media-analytics,代码行数:27,代码来源:user_profiling.py
示例19: cluster_tfidf
def cluster_tfidf(tfidf):
kmeans = MiniBatchKMeans(n_clusters=10, init='k-means++', n_init=1,
init_size=1000, batch_size=1000)
kmeans.fit(tfidf)
return kmeans.cluster_centers_
开发者ID:sash-ko,项目名称:ml_playground,代码行数:7,代码来源:text_mining.py
示例20: generateCodebook
def generateCodebook(self, features):
""" Generate codebook using extracted features """
codebook = None
if self._codebookGenerateMethod == 'k-means':
# # Codebook generation using scipy k-means
# while run:
# try:
# # Set missing = 'raise' to raise exception
# # when one of the clusters is empty
# whitenedFeatures = whiten(features)
# codebook, _ = kmeans2(whitenedFeatures,
# self._codebookSize,
# missing = 'raise')
#
# # No empty clusters
# run = False
# except ClusterError:
# # If one of the clusters is empty, re-run k-means
# run = True
# Codebook generation using sklearn k-means
whitenedFeatures = whiten(features)
kmeans = MiniBatchKMeans(n_clusters = config.codebookSize)
kmeans.fit(whitenedFeatures)
codebook = kmeans.cluster_centers_
else:
pass
self._codebook = codebook
开发者ID:1987hasit,项目名称:BoVW_Image,代码行数:32,代码来源:bag_of_words.py
注:本文中的sklearn.cluster.MiniBatchKMeans类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论