本文整理汇总了Python中sklearn.cluster.estimate_bandwidth函数的典型用法代码示例。如果您正苦于以下问题:Python estimate_bandwidth函数的具体用法?Python estimate_bandwidth怎么用?Python estimate_bandwidth使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了estimate_bandwidth函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: quick_shift
def quick_shift(data, tau, window_type, bandwidth, metric):
"""Perform medoid shiftclustering of data with corresponding parameters.
Parameters
----------
data : array-like, shape=[n_samples, n_features]
Input points.
tau : float
Threshold parameter. Distance should not be over tau so that two points
may be connected to each other.
window_type : string
Type of window to compute the weights matrix. Can be
"flat" or "normal".
bandwidth : float
Value of the bandwidth for the window.
metric : string
Metric used to compute the distance. See pairwise_distances doc to
look at all the possible values.
Returns
-------
cluster_centers : array, shape=[n_clusters, n_features]
Coordinates of cluster centers.
labels : array, shape=[n_samples]
Cluster labels for each point.
cluster_centers_idx : array, shape=[n_clusters]
Index in data of cluster centers.
"""
if tau is None:
tau = estimate_bandwidth(data)
if bandwidth is None:
bandwidth = estimate_bandwidth(data)
medoids, cluster_centers_idx = compute_stationary_medoids(data, tau,
window_type,
bandwidth,
metric)
cluster_centers = data[cluster_centers_idx]
labels = []
labels_val = {}
lab = 0
for i in cluster_centers_idx:
labels_val[i] = lab
lab += 1
for i in range(len(data)):
next_med = medoids[i]
while next_med not in cluster_centers_idx:
next_med = medoids[next_med]
labels.append(labels_val[next_med])
return cluster_centers, np.asarray(labels), cluster_centers_idx
开发者ID:iamjakob,项目名称:MedoidShift-and-QuickShift,代码行数:57,代码来源:quick_shift_.py
示例2: meanshift_for_hough_line
def meanshift_for_hough_line(self):
# init mean shift
pixels_of_label = {}
points_of_label = {}
for hough_line in self.points_of_hough_line:
pixels = self.pixels_of_hough_line[hough_line]
pixels = np.array(pixels)
bandwidth = estimate_bandwidth(pixels, quantile=QUANTILE, n_samples=500)
if bandwidth == 0:
bandwidth = 2
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(pixels)
labels = ms.labels_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
for k in range(n_clusters_):
label = list(hough_line)
label.append(k)
pixels_of_label[tuple(label)] = map(tuple, pixels[labels==k])
for label in pixels_of_label:
pixels = pixels_of_label[label]
points = map(self.img.get_bgr_value, pixels)
points_of_label[label] = points
self.pixels_of_hough_line = pixels_of_label
self.points_of_hough_line = points_of_label
开发者ID:catbaron-,项目名称:hough_transform_color_removal,代码行数:25,代码来源:main_k.py
示例3: meanShift
def meanShift(flat_image):
# Estimate Bandwidth
bandwidth = estimate_bandwidth(flat_image, quantile = 0.2, n_samples=500)
ms = MeanShift(bandwidth, bin_seeding=True)
ms.fit(flat_image)
labels = ms.labels_
return ms.labels_, ms.cluster_centers_
开发者ID:amitkumarx86,项目名称:project_python,代码行数:7,代码来源:kmeans_meanshift.py
示例4: meanshift
def meanshift(raw_data, t):
# Compute clustering with MeanShift
# The following bandwidth can be automatically detected using
#data = [ [(raw_data[i, 1]+raw_data[i, 5]), (raw_data[i, 2]+raw_data[i,6])] for i in range(raw_data.shape[0]) ]
data = np.zeros((raw_data.shape[0],2))
X = raw_data[:,1] + raw_data[:,5]
Y = raw_data[:,2] + raw_data[:,6]
#X = raw_data[:,1] ; Y = raw_data[:,2];
data = np.transpose(np.concatenate((np.mat(X),np.mat(Y)), axis=0))
bandwidth = estimate_bandwidth(data, quantile=0.2, n_samples=500)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(data)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
print("number of estimated clusters : %d" % n_clusters_)
# Plot result
plt.figure(t)
plt.clf()
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(n_clusters_), colors):
my_members = labels == k
cluster_center = cluster_centers[k]
plt.plot(data[my_members, 0], data[my_members, 1], col + '.')
plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=14)
plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.axis('equal')
plt.show()
开发者ID:kartikbk,项目名称:mtc_parking,代码行数:30,代码来源:SVM_alpha.py
示例5: _fit_mean_shift
def _fit_mean_shift(self, x):
for c in xrange(len(self.crange)):
quant = 0.015 * (c + 1)
for r in xrange(self.repeats):
bandwidth = estimate_bandwidth(
x, quantile=quant, random_state=r)
idx = c * self.repeats + r
model = MeanShift(
bandwidth=bandwidth, bin_seeding=True)
model.fit(x)
self._labels[idx] = model.labels_
self._parameters[idx] = model.cluster_centers_
# build equivalent gmm
k = model.cluster_centers_.shape[0]
model_gmm = GMM(n_components=k, covariance_type=self.cvtype,
init_params='c', n_iter=0)
model_gmm.means_ = model.cluster_centers_
model_gmm.weights_ = sp.array(
[(model.labels_ == i).sum() for i in xrange(k)])
model_gmm.fit(x)
# evaluate goodness of fit
self._ll[idx] = model_gmm.score(x).sum()
if self.gof_type == 'aic':
self._gof[idx] = model_gmm.aic(x)
if self.gof_type == 'bic':
self._gof[idx] = model_gmm.bic(x)
print quant, k, self._gof[idx]
开发者ID:pmeier82,项目名称:BOTMpy,代码行数:30,代码来源:cluster.py
示例6: mean_shift
def mean_shift(X):
bandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=1000)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=False)
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
return labels, cluster_centers
开发者ID:athoune,项目名称:Palette,代码行数:7,代码来源:colors.py
示例7: cluster_pixels_ms
def cluster_pixels_ms(self):
# reshape
"""
cluster points descriptors by meahs shift
:type self: ColorRemover
"""
fg_pixels = self.img.fg_pixels.keys()
descriptors = []
for r, c in fg_pixels:
descriptors.append(self.descriptor_map[r][c])
descriptors = np.array(descriptors)
descriptors = PCA(n_components=int(VECTOR_DIMENSION)/2).fit_transform(descriptors)
# descriptors = self.descriptor_map.reshape(descriptors_rows, 1, VECTOR_DIMENSION)
bandwidth = estimate_bandwidth(descriptors, quantile=0.05)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(descriptors)
labels = ms.labels_
for i in range(len(labels)):
xy = fg_pixels[i]
label = labels[i]
self.labels_map.itemset(xy, label)
# save the indices and BGR values of each cluster as a dictionary with keys of label
for label in range(K):
self.pixels_of_hough_line_in_sphere[label] = map(tuple, np.argwhere((self.labels_map == label)))
self.cluster_bgr[label] = map(tuple, self.img.bgr[self.labels_map == label])
开发者ID:catbaron-,项目名称:hough_transform_color_removal,代码行数:26,代码来源:main_km_to_hough_line.py
示例8: do_meanshift
def do_meanshift(s_path, band1, band2, band3, band4, colour1, colour2,
make_plot):
'''Meanshift clustering to determine the number of clusters in the
data, which is passed to KMEANS function'''
# Truncate data
X = np.vstack([colour1, colour2]).T
'''Compute clustering with MeanShift'''
# Scale data because meanshift generates circular clusters
X_scaled = preprocessing.scale(X)
# The following bandwidth can be automatically detected using
# the routine estimate_bandwidth(X). Bandwidth can also be set manually.
bandwidth = estimate_bandwidth(X)
#bandwidth = 0.65
# Meanshift clustering
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=False)
ms.fit(X_scaled)
labels_unique = np.unique(ms.labels_)
objects = ms.labels_[ms.labels_ >= 0]
n_clusters = len(labels_unique[labels_unique >= 0])
# Make plot
if "meanshift" in make_plot:
make_ms_plots(s_path, colour1, colour2, n_clusters, X, ms,
band1, band2, band3, band4, objects)
return(n_clusters, bandwidth)
开发者ID:PBarmby,项目名称:m83_clustering,代码行数:25,代码来源:Clustering_Analysis.py
示例9: make
def make(filename, precision):
with open('test.geojson') as f:
data = json.load(f)
features = data['features']
points = [
geo['geometry']["coordinates"]
for geo in features if pred(geo)
]
print points
ar_points = array(points).reshape(len(points) * 2, 2)
print ar_points
bandwidth = estimate_bandwidth(ar_points) / precision
cluster = MeanShift(bandwidth=bandwidth)
cluster.fit(ar_points)
labels = cluster.labels_
cluster_centers = cluster.cluster_centers_
print 'clusters:', len(unique(labels))
for i, geo in enumerate(filter(pred, features)):
geo['geometry']["coordinates"] = [
list(cluster_centers[labels[i*2 + j]])
for j in range(2)
]
with open(filename, 'w') as f:
json.dump(data, f)
开发者ID:hackerspace-silesia,项目名称:mapotrans,代码行数:27,代码来源:clustering.py
示例10: BA_meanshift_cluster
def BA_meanshift_cluster(mark, chrom):
'''
@param:
@return:
perform mean shift cluster on 2D data:
((chromStart+chromEnd)*0.5, chromEnd-chromStart)
'''
path = os.path.join(get_data_dir(), "tmp", mark,"{0}-{1}.csv".format(chrom, mark))
DF = pd.read_csv(path, sep='\t')
S_x = 0.5*(DF.loc[:, 'chromEnd'].values+DF.loc[:, 'chromStart'].values)
S_y = DF.loc[:, 'chromEnd'].values-DF.loc[:, 'chromStart'].values
X = np.hstack((np.atleast_2d(S_x[7000:8000]).T, np.atleast_2d(S_y[7000:8000]).T))
print X
bandwidth = estimate_bandwidth(X, quantile=0.1, n_samples=1000)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
print list(set(labels))
import matplotlib.pyplot as plt
from itertools import cycle
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(len(list(set(labels)))), colors):
my_members = labels == k
plt.plot(X[my_members, 0], X[my_members, 1], col + '.')
plt.title('Estimated number of clusters: %d' % len(list(set(labels))))
plt.show()
开发者ID:LuyiTian,项目名称:ChIPOmic,代码行数:26,代码来源:comp_hm.py
示例11: mean_shift_cluster_analysis
def mean_shift_cluster_analysis(x,y,quantile=0.2,n_samples=1000):
# ADAPTED FROM:
# http://scikit-learn.org/stable/auto_examples/cluster/plot_mean_shift.html#example-cluster-plot-mean-shift-py
# The following bandwidth can be automatically detected using
X = np.hstack((x.reshape((x.shape[0],1)),y.reshape((y.shape[0],1))))
bandwidth = estimate_bandwidth(X, quantile=quantile, n_samples=n_samples)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
#print("number of estimated clusters : %d" % n_clusters_)
colors = 'bgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbgrcmykbgrcmyk' #cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for i in xrange(len(np.unique(labels))):
my_members = labels == i
cluster_center = cluster_centers[i]
plt.scatter(X[my_members, 0], X[my_members, 1],s=90,c=colors[i],alpha=0.7)
plt.scatter(cluster_center[0], cluster_center[1],marker='+',s=280,c=colors[i])
tolx = (X[:,0].max()-X[:,0].min())*0.03
toly = (X[:,1].max()-X[:,1].min())*0.03
plt.xlim(X[:,0].min()-tolx,X[:,0].max()+tolx)
plt.ylim(X[:,1].min()-toly,X[:,1].max()+toly)
plt.show()
return labels
开发者ID:armatita,项目名称:GEOMS2,代码行数:28,代码来源:cerena_multivariate_utils.py
示例12: clusterise_data
def clusterise_data(data_obj):
""" Assigns a cluster label to each days present in the data received
using three different algorithms: MeanShift, Affinity Propagation,
or KMeans.
@param data_obj: List of dictionaries
"""
L = len(data_obj)
#Simply converts data_obj to a 2D list for computation
List2D = [[None for _ in range(4)] for _ in range(L-1)]
for i in range(L-1): #don't include current day
#wake_up and sleep_duration are the most important factors
List2D[i][0] = 5 * data_obj[i]["wake_up"]
List2D[i][1] = 1 * data_obj[i]["sleep"]
List2D[i][2] = 5 * data_obj[i]["sleep_duration"]
List2D[i][3] = 0.5 * data_obj[i]["activity"]
points = NumpyArray(List2D) #converts 2D list to numpyarray
if ALGO == "Affinity Propagation":
labels = AffinityPropagation().fit_predict(points)
elif ALGO == "KMeans":
labels= KMeans(init='k-means++', n_clusters=5, n_init=10) .fit_predict(points)
elif ALGO == "MeanShift":
bandwidth = estimate_bandwidth(points, quantile=0.2, n_samples=20)
labels = MeanShift(bandwidth=bandwidth, bin_seeding=True).fit_predict(points)
else:
raise Exception("Algorithm not defined: "+str(ALGO))
for i in range(L-1):
data_obj[i]["cluster"] = labels[i]
for unique_label in remove_duplicates(labels):
debug_print(ALGO+": Cluster "+str(unique_label)+" contains "+str(labels.tolist().count(unique_label))+" data points")
debug_print(ALGO+": Silhouette coefficient"+ str(metrics.silhouette_score(points, labels, metric='euclidean')*100)+"%")
开发者ID:qdm12,项目名称:Staminaputations,代码行数:33,代码来源:api_clustering.py
示例13: ms_algo
def ms_algo(X, bandwidth=None):
if(bandwidth==None):
n_samples = X.shape[0]
bandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=n_samples)
# Apply the meanshit algorithm from sklearn library
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
# collect from the meanshift algorithm the labels and the centers of the clusters
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique) #Number of clusters
# Print section
print("The number of clusters is: %d" % n_clusters_)
print("The centers are:")
for i in range(n_clusters_):
print i,
print cluster_centers[i]
return cluster_centers
开发者ID:PFAWeb2Control,项目名称:combined_results,代码行数:26,代码来源:meanshift.py
示例14: clustering
def clustering(matrix,lst,blst):
dblabel=cluster.DBSCAN(eps=8E-4).fit_predict(matrix)
dblabel=select(dblabel)
print("DBScan finished.")
kmlabel=cluster.KMeans(n_clusters=300).fit_predict(matrix)
kmlabel=select(kmlabel)
print("KMeans finished.")
bw=cluster.estimate_bandwidth(matrix,quantile=0.01,n_samples=1000)
ms=cluster.MeanShift(bandwidth=bw)
mslabel=ms.fit_predict(matrix)
mslabel=select(mslabel)
print("MeanShift finished.")
bc=cluster.Birch(threshold=0.01)
bmat=matrix.tolist()
bclabel=bc.fit_predict(bmat)
bclabel=select(bclabel)
print("Birch finished.")
intesec=[]
suspct=[]
c=0
for i in range(len(matrix)):
#if bclabel[i]:
#c+=1
#if mslabel[i]:
if dblabel[i] and kmlabel[i] and mslabel[i] and bclabel[i]:
intesec.append(lst[i])
if dblabel[i] or kmlabel[i] or mslabel[i] or bclabel[i]:
suspct.append(lst[i])
print(str(c))
return intesec,suspct
开发者ID:yszhong,项目名称:PrimRepo,代码行数:30,代码来源:clst.py
示例15: train
def train(trainingData, pklFile, clusteringAll, numberOfClusters=None):
# ========================================================================= #
# =============== STEP 1. DEFINE OUTPUT LEARNT MODEL FILE ================= #
# ========================================================================= #
if (pklFile == ''):
os.system('rm -rf learntModel & mkdir learntModel')
pklFile = 'learntModel/learntModel.pkl'
# ========================================================================= #
# =============== STEP 2. PERFORM CLUSTERING TO THE DATA ================== #
# ========================================================================= #
if (numberOfClusters == None):
print "Running MeanShift Model..."
bandwidth = estimate_bandwidth(trainingData)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=False, cluster_all=clusteringAll)
ms.fit(trainingData)
joblib.dump(ms, pklFile)
return {"numberOfClusters":len(ms.cluster_centers_), "labels": ms.labels_, "clusterCenters":ms.cluster_centers_}
elif (numberOfClusters != None):
print "Running K-Means Model..."
kMeans = KMeans(init='k-means++', n_clusters=numberOfClusters)
kMeans.fit(trainingData)
joblib.dump(kMeans, pklFile)
return {"numberOfClusters":len(kMeans.cluster_centers_), "labels": kMeans.labels_, "clusterCenters":kMeans.cluster_centers_}
开发者ID:ZAZAZakari,项目名称:ML-Algorithm,代码行数:25,代码来源:clustering.py
示例16: test_estimate_bandwidth
def test_estimate_bandwidth(self):
iris = datasets.load_iris()
df = pdml.ModelFrame(iris)
result = df.cluster.estimate_bandwidth(random_state=self.random_state)
expected = cluster.estimate_bandwidth(iris.data, random_state=self.random_state)
self.assertEqual(result, expected)
开发者ID:Sandy4321,项目名称:pandas-ml,代码行数:7,代码来源:test_cluster.py
示例17: mean
def mean(X, save_fig=False, params_labels=None, prefix='clusters'):
'''
Compute clustering with MeanShift
'''
logger.debug('Calculating MeanShift clusters using %d parameters'%len(X[0]))
X = np.array( X )
with warnings.catch_warnings():
warnings.simplefilter("ignore")
bandwidth = estimate_bandwidth(X, quantile=0.2)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
if save_fig:
plotClusters(X, ms, method='mean', prefix=prefix,
params=params_labels)
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
logger.debug('Found %d clusters with MeanShift algorithm'%n_clusters_)
return labels
开发者ID:EBosi,项目名称:Skaffolder,代码行数:27,代码来源:clustering.py
示例18: simplify_data1
def simplify_data1(x):
X = np.array(zip(x,np.zeros(len(x))), dtype=np.float)
bandwidth = estimate_bandwidth(X, quantile=0.2)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
#print n_clusters_
#exit()
start=0
value=0
print x
for k in range(n_clusters_):
my_members = labels == k
print "cluster {0}: {1}".format(k, X[my_members, 0]),np.average(X[my_members, 0])
value=np.average(X[my_members, 0])
val2=0
for i in xrange(start,start+len(X[my_members, 0])):
val2+=X[i][0]
print val2,X[i][0],i
X[i][0]=value
print "FINAL",val2/len(X[my_members, 0])
start+=len(X[my_members, 0])
return X[:,0]
开发者ID:leaguilar,项目名称:playground,代码行数:26,代码来源:plot_data.py
示例19: cluster_data
def cluster_data(df_story, algo='kmeans', params='{}'):
print "[EDEN I/O -- cluster_data] algo: ", algo
start = time.time()
params = ast.literal_eval(params)
if algo in ['gac', 'gactemporal']:
model = algo_select(algo, params)
model.fit(df_story)
elif algo == 'meanshift':
vsm = recon_vsm(df_story['vsm'])
params['bandwidth'] = estimate_bandwidth(vsm, n_samples=200)
model = algo_select(algo, params)
model.fit(vsm)
else:
vsm = recon_vsm(df_story['vsm'])
model = algo_select(algo, params)
model.fit(vsm)
# print "[EDEN I/O -- cluster_data.py] plot: cluster counts"
# plot_cluster_counts(model, "Cluster counts using algorithm: " + str(algo))
# print "[EDEN I/O -- cluster_data] model: ", model
end = time.time()
print "[EDEN I/O -- cluster_data.py] Total elapsed time: ", end - start
return model
开发者ID:jonathanmanfield,项目名称:EDEN,代码行数:29,代码来源:edenutil.py
示例20: call_kmean
def call_kmean(num_cluster, data, update_flag):
X = StandardScaler().fit_transform(data)
bandwidth = cluster.estimate_bandwidth(X, quantile=0.3)
two_means = MiniBatchKMeans( n_clusters=num_cluster)
labels = two_means.fit(X).labels_.astype(np.int)
# if user upload files
if update_flag:
return labels
label_dict = {}
label_dict_count = 0
for label in labels:
label_dict[str(label_dict_count)] = float(label)
label_dict_count = label_dict_count + 1
print label_dict
unique_dict = {}
unique_dict_count = 0
for uniq in np.unique(labels):
print uniq
unique_dict[str(unique_dict_count)] = float(uniq)
unique_dict_count = unique_dict_count + 1
print unique_dict
return label_dict, unique_dict
开发者ID:benaneesh,项目名称:cluster,代码行数:27,代码来源:algorithm_manager.py
注:本文中的sklearn.cluster.estimate_bandwidth函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论