本文整理汇总了Python中sklearn.metrics.silhouette_score函数的典型用法代码示例。如果您正苦于以下问题:Python silhouette_score函数的具体用法?Python silhouette_score怎么用?Python silhouette_score使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了silhouette_score函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_sihouette
def test_sihouette(self):
n1 = np.array([[1,2,1], [1,3,1], [7,8,2], [7,9,2], [13,19,3]])
print(Silhouette.score(n1))
print(silhouette_score(n1, n1[:,-1]))
n2 = np.array([[1,2,1], [1,3,2], [7,8,2], [7,9,1], [13,19,3]])
print(Silhouette.score(n2))
print(silhouette_score(n2, n2[:,-1]))
开发者ID:brenden17,项目名称:infinity,代码行数:7,代码来源:silhouette.py
示例2: compute_silhouette_score
def compute_silhouette_score(clusters):
"""
Compute the euclidean silhouette score and the cosine silhouette score. Return the scores.
:param clusters: clusters assignment for each tweet
:type clusters: list
:return: the silhouette scores
:rtype: tuple
"""
# Load the files
tfidf_matrix = pickle.load(open('TF-IDF Matrix - ' + str(n_data) + ' Tweets.p', 'rb'))
# Compute the Silhouette Score
start = timer()
distance = 1 - cosine_similarity(tfidf_matrix)
euclidean_silhouette_score = silhouette_score(tfidf_matrix, clusters, metric='euclidean')
cosine_silhouette_score = silhouette_score(distance, clusters, metric='precomputed')
end = timer()
print('Silhouette Score (Euclidean): %.4f' % euclidean_silhouette_score)
print('Silhouette Score (Cosine): %.4f' % cosine_silhouette_score)
print('Obtained the Silhouette Score in %.2f seconds' % (end - start))
txt_file.write('Silhouette Score (Euclidean): %.4f. \n' % euclidean_silhouette_score)
txt_file.write('Silhouette Score (Cosine): %.4f. \n' % cosine_silhouette_score)
txt_file.write('Obtained the Silhouette Score in %.2f seconds. \n' % (end - start))
return euclidean_silhouette_score, cosine_silhouette_score
开发者ID:enrsr,项目名称:CommunityDetection,代码行数:28,代码来源:Clustering.py
示例3: get_constant_height_labels
def get_constant_height_labels(clustering, n_clusters=None):
"""
use silhouette analysis to select the best heigh to cut a linkage matrix
:df: a correlation matrix
parse_heatmap: int (optional). If defined, devides the columns of the
heatmap based on cutting the dendrogram
"""
N_variables = len(clustering['reorder_vec'])
scores = []
if n_clusters is None:
for k_clusters in range(2,N_variables//3):
labels = cut_tree(clustering['linkage'], n_clusters=k_clusters)
try:
score = silhouette_score(clustering['distance_df'],
labels.ravel(), metric='precomputed')
except ValueError:
continue
scores.append((k_clusters,score))
best_k = max(scores, key=lambda x: x[1])[0]
labels = cut_tree(clustering['linkage'], n_clusters=best_k)
else:
labels = cut_tree(clustering['linkage'], n_clusters=n_clusters)
score = silhouette_score(clustering['distance_df'],
labels, metric='precomputed')
scores.append((n_clusters, score))
labels = reorder_labels(labels.flatten(), clustering['linkage'])
# comparison
MI = adjusted_mutual_info_score(labels, clustering['labels'])
return labels, scores, MI
开发者ID:IanEisenberg,项目名称:Self_Regulation_Ontology,代码行数:30,代码来源:utils.py
示例4: bench_k_means
def bench_k_means(estimator, data, labels):
t0 = time()
estimator.fit(data)
print("time to fit: {:.5}".format(time() - t0))
homogenity = metrics.homogeneity_score(labels, estimator.labels_)
completeness = metrics.completeness_score(labels, estimator.labels_)
v_measure = metrics.v_measure_score(labels, estimator.labels_)
print("homogenity {:.5}, completeness {:.5}, v_measure_score {:.5}".format(
homogenity, completeness, v_measure)
)
adj_rand_score = metrics.adjusted_rand_score(
labels, estimator.labels_
)
print("adjusted_rand_score {:.5}".format(adj_rand_score))
adj_mutual_info_score = metrics.adjusted_mutual_info_score(
labels, estimator.labels_
)
print("adjusted_mutual_info_score {:.5}".format(
adj_mutual_info_score)
)
silhouette_score = metrics.silhouette_score(
data, estimator.labels_, metric='euclidean'
)
print("silhouette_score {:.5}".format(
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean'))
)
return [
homogenity, completeness, v_measure, adj_rand_score,
adj_mutual_info_score, silhouette_score
]
开发者ID:all3fox,项目名称:term_paper,代码行数:35,代码来源:rules_cluster.py
示例5: clustering_drawing
def clustering_drawing():
X,Tag = getData()
n = 3
kmeans_model = KMeans(n_clusters = n).fit(X)
labels = kmeans_model.labels_
score = metrics.silhouette_score(X, labels, metric='euclidean')
scoreList = [score]
nList = [3,4,5,6,7,8,9]
for i in range(4,10):# 聚类4-10类循环
# print i
kmeans_model_temp = KMeans(n_clusters=i).fit(X)
labels_temp = kmeans_model_temp.labels_
score_temp = metrics.silhouette_score(X, labels_temp, metric='euclidean')
print i,score_temp
scoreList.append(float(score_temp))
if float(score_temp) > score:
kmeans_model = kmeans_model_temp
labels = labels_temp
n = i
print n,labels
plt.axis([3,9,0.8,1.0])
plt.plot(nList, scoreList, 'r--')
plt.show()
开发者ID:yinruyi,项目名称:Project,代码行数:26,代码来源:cluster.py
示例6: print_cluster
def print_cluster(clusterTrainClass, labels, clusterTestStory):
print("Homogeneity: %0.3f" % metrics.homogeneity_score(clusterTrainClass, labels))
print("Completeness: %0.3f" % metrics.completeness_score(clusterTrainClass, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(clusterTrainClass, labels))
print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(clusterTrainClass, labels))
print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(clusterTrainClass, labels))
print "Silhouette Coefficient:"
print metrics.silhouette_score(clusterTestStory, labels, metric='euclidean')
开发者ID:SebCompski,项目名称:DataMining,代码行数:8,代码来源:classifier.py
示例7: benchmark
def benchmark(estimator, name, data):
t0 = time()
estimator.fit(data)
print('% 9s %.2fs %i %.3f'
% (name, (time() - t0), estimator.inertia_,
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean',
sample_size=None)))
return [time() - t0, estimator.inertia_, metrics.silhouette_score(data, estimator.labels_, metric='euclidean', sample_size=None)]
开发者ID:warrenonedge,项目名称:Python-Code,代码行数:9,代码来源:affinity_analysis.py
示例8: drawwDBSCAN
def drawwDBSCAN(newarray,comparearray,cityname):
X = StandardScaler().fit_transform(newarray)
# print newarray
# print "#########"
# print X
# X = newarray
##############################################################################
# Compute DBSCAN
db = DBSCAN(eps=0.3, min_samples=10).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print('Estimated number of clusters: %d' % n_clusters_)
print("Silhouette Coefficient: %0.3f"
% metrics.silhouette_score(X, labels))
##############################################################################
# Plot result
matplotlib.style.use('ggplot')
# Black removed and is used for noise instead.
unique_labels = set(labels)
colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = 'k'
class_member_mask = (labels == k)
xy = X[class_member_mask & core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=14)
xy = X[class_member_mask & ~core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=6)
plt.title('Estimated number of clusters: %d' % n_clusters_)
imgname = "./clusterimage/hourcondimention/" +cityname+'.png'
fig = plt.gcf()
fig.set_size_inches(16.5, 12.5)
fig.savefig(imgname)
ScandARI = drawlableCLuster(comparearray,labels,cityname.split('_')[3])
print ScandARI
with open('summary_hour_total_dimention.csv','a') as f:
write = csv.writer(f)
# write.writerow(['name','clusters','SC'])
write.writerow([cityname,n_clusters_,metrics.silhouette_score(X, labels, metric='sqeuclidean')])
write.writerow(["hour_dimention_twitterinfo"+cityname.split('_')[3],ScandARI[0],ScandARI[1],ScandARI[2]])
开发者ID:evanslight,项目名称:Exploring-Twitter-Sentiment-Analysis-and-the-Weather,代码行数:56,代码来源:hourlevel_cluster_mds.py
示例9: cluster_kmeans1
def cluster_kmeans1():
dataset = datasets.load_iris()
X = dataset.data
kmeans_model = KMeans(n_clusters=4,random_state=1).fit(X)
labels = kmeans_model.labels_
print X
print kmeans_model.cluster_centers_
print labels
print metrics.silhouette_score(X,labels,metric="euclidean")
开发者ID:swenker,项目名称:bigdata,代码行数:10,代码来源:scikit_lab.py
示例10: eval_perf
def eval_perf(self):
X_tst, y_tst = self.data.get_test_set()
code = self.dbn.f_code(X_tst)
from sklearn import metrics
sil_c = metrics.silhouette_score(code, y_tst)
sil_X = metrics.silhouette_score(X_tst, y_tst)
print 'Silhouette code y', sil_c
print 'Silhouette X y', sil_X
开发者ID:tomMoral,项目名称:RBM,代码行数:10,代码来源:Experience.py
示例11: kmeans
def kmeans(input_file, n_clusters, Output):
lvltrace.lvltrace("LVLEntree dans kmeans unsupervised")
ncol=tools.file_col_coma(input_file)
data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
X = data[:,1:]
y = data[:,0]
sample_size, n_features = X.shape
k_means=cluster.KMeans(init='k-means++', n_clusters=n_clusters, n_init=10)
k_means.fit(X)
reduced_data = k_means.transform(X)
values = k_means.cluster_centers_.squeeze()
labels = k_means.labels_
k_means_cluster_centers = k_means.cluster_centers_
print "#########################################################################################################\n"
#print y
#print labels
print "K-MEANS\n"
print('homogeneity_score: %f'%metrics.homogeneity_score(y, labels))
print('completeness_score: %f'%metrics.completeness_score(y, labels))
print('v_measure_score: %f'%metrics.v_measure_score(y, labels))
print('adjusted_rand_score: %f'%metrics.adjusted_rand_score(y, labels))
print('adjusted_mutual_info_score: %f'%metrics.adjusted_mutual_info_score(y, labels))
print('silhouette_score: %f'%metrics.silhouette_score(X, labels, metric='euclidean', sample_size=sample_size))
print('\n')
print "#########################################################################################################\n"
results = Output+"kmeans_scores.txt"
file = open(results, "w")
file.write("K-Means Scores\n")
file.write("Homogeneity Score: %f\n"%metrics.homogeneity_score(y, labels))
file.write("Completeness Score: %f\n"%metrics.completeness_score(y, labels))
file.write("V-Measure: %f\n"%metrics.v_measure_score(y, labels))
file.write("The adjusted Rand index: %f\n"%metrics.adjusted_rand_score(y, labels))
file.write("Adjusted Mutual Information: %f\n"%metrics.adjusted_mutual_info_score(y, labels))
file.write("Silhouette Score: %f\n"%metrics.silhouette_score(X, labels, metric='euclidean', sample_size=sample_size))
file.write("\n")
file.write("True Value, Cluster numbers, Iteration\n")
for n in xrange(len(y)):
file.write("%f, %f, %i\n"%(y[n],labels[n],(n+1)))
file.close()
import pylab as pl
from itertools import cycle
# plot the results along with the labels
k_means_cluster_centers = k_means.cluster_centers_
fig, ax = plt.subplots()
im=ax.scatter(X[:, 0], X[:, 1], c=labels, marker='.')
for k in xrange(n_clusters):
my_members = labels == k
cluster_center = k_means_cluster_centers[k]
ax.plot(cluster_center[0], cluster_center[1], 'w', color='b',
marker='x', markersize=6)
fig.colorbar(im)
plt.title("Number of clusters: %i"%n_clusters)
save = Output + "kmeans.png"
plt.savefig(save)
lvltrace.lvltrace("LVLsortie dans kmeans unsupervised")
开发者ID:xaviervasques,项目名称:Neuron_Morpho_Classification_ML,代码行数:55,代码来源:unsupervised.py
示例12: acc_silhouette
def acc_silhouette(X, lbls_true, lbls_pred, reject, strat_lbl_inds, use_strat=False, metric='euclidean'):
if use_strat:
dists = sc.distances(X[:, strat_lbl_inds], gene_ids=np.arange(strat_lbl_inds.size), metric=metric )
sil = metrics.silhouette_score(dists, lbls_pred[strat_lbl_inds], metric='precomputed')
perc = np.int(np.float(len(strat_lbl_inds))/np.float(lbls_true.size) * 100.0)
desc = ('Silhouette (strat={0},{1})'.format(perc, metric), 'Silhouette ({0})'.format(metric))
else:
dists = sc.distances(X, gene_ids=np.arange(X.shape[1]), metric=metric )
sil = metrics.silhouette_score(dists, lbls_pred, metric='precomputed')
desc = ('Silhouette ({0})'.format(metric), 'Silhouette ({0})'.format(metric))
return sil, desc
开发者ID:nicococo,项目名称:scRNA,代码行数:11,代码来源:experiments_utils.py
示例13: fit
def fit(self, X, Y=None):
proj = skl_cluster.KMeans(**self.params)
if isinstance(X, Table):
proj = proj.fit(X.X, Y)
proj.silhouette = silhouette_score(X.X, proj.labels_)
else:
proj = proj.fit(X, Y)
proj.silhouette = silhouette_score(X, proj.labels_)
proj.inertia = proj.inertia_ / len(X)
cluster_dist = Euclidean(proj.cluster_centers_)
proj.inter_cluster = np.mean(cluster_dist[np.triu_indices_from(cluster_dist, 1)])
return KMeansModel(proj, self.preprocessors)
开发者ID:Coding4Sec,项目名称:orange3,代码行数:12,代码来源:kmeans.py
示例14: bench_k_means
def bench_k_means(estimator, name, data, silhouette_results):
t0 = time()
estimator.fit(data)
print('init\t\ttime\tinertia\thomo\tcompl\tv-meas\tARI\tAMI\tsilhouette')
print('% 9s\t %.2fs\t%i\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f' % \
(name, (time() - t0), \
estimator.inertia_, \
metrics.homogeneity_score(labels, estimator.labels_), \
metrics.completeness_score(labels, estimator.labels_), \
metrics.v_measure_score(labels, estimator.labels_), \
metrics.adjusted_rand_score(labels, estimator.labels_), \
metrics.adjusted_mutual_info_score(labels, estimator.labels_), \
metrics.silhouette_score(data, estimator.labels_, metric='euclidean')))
return str(metrics.silhouette_score(data,estimator.labels_, metric='euclidean'))
开发者ID:UncleEricB,项目名称:GA_homework,代码行数:14,代码来源:hw4.py
示例15: find_best
def find_best(df, cls, norm):
'''
INPUTS: Pandas DataFrame, String of which dataset is being used,
Boolean if data is normalized
OUTPUTS: Prints score to screen. Saves model to RESULTS_DIR
'''
if norm == True:
df = StandardScaler(copy=False).fit_transform(df)
files = [f for f in os.listdir(RESULTS_DIR)
if f.endswith('{}_{}.pkl'.format(cls, norm)) and
if f.startswith('k')]]
scores = []
for f in files:
model = pickle.load(open(RESULTS_DIR + f, 'rb'))
labels = model.predict(df)
score = silhouette_score(df.values, labels, sample_size=10000)
name = f.split('_')[1]
scores.append((score, name))
print "{} {} {} {}".format(f.split('_')[1], float(score), cls, norm)
del labels
del model
ranked_scores = sorted(scores, reverse=True)
ranked_scores = [(item[1], item[0]) for item in ranked_scores]
with open('{}_{}.pkl'.format(cls, norm), 'wb') as f:
pickle.dump(ranked_scores, f)
for item in ranked_scores: print item
开发者ID:real-limoges,项目名称:match-terpiece,代码行数:32,代码来源:loop_k_means.py
示例16: save_cluster_metrics
def save_cluster_metrics(self, points, predictions, key, level2_mode = False):
try:
silhoutte_global = metrics.silhouette_score(points, predictions, metric='euclidean')
silhoutte_weighted = utils.silhoutte_weighted(points, predictions)
self.silhouette_scores_global[key] = silhoutte_global
self.silhouette_scores_weighted[key] = silhoutte_weighted
if level2_mode:
self.level2_silhoutte_global.append(silhoutte_global)
self.level2_silhoutte_weighted.append(silhoutte_weighted)
except ValueError as e:
pass
# dunn_scores = cluster_evaluation.dunn_index(points, predictions, means)
dunn_scores = [0, 0, 0]
if (dunn_scores[0] is not None) and (dunn_scores[1] is not None) and (dunn_scores[2] is not None):
self.dunn_scores_1[key] = dunn_scores[0]
self.dunn_scores_2[key] = dunn_scores[1]
self.dunn_scores_3[key] = dunn_scores[2]
if level2_mode:
self.level2_dunn_1.append(dunn_scores[0])
self.level2_dunn_2.append(dunn_scores[1])
self.level2_dunn_3.append(dunn_scores[2])
开发者ID:BerkeleyAutomation,项目名称:tsc-dl,代码行数:30,代码来源:clustering.py
示例17: optimal_cutoff
def optimal_cutoff(Y,dist_mat,min_size):
labels = np.array([sch.fcluster(Y,c,criterion='distance') for c in Y[:,2]])
score = np.array([metrics.silhouette_score(dist_mat,l) for l in labels[:-min_size]])
c = Y[:-min_size,2]
f = interp(c,-score,kind='linear')
opt_c = opt.fmin(f,x0=c[2*min_size])
return opt_c
开发者ID:sherpaman,项目名称:MolToolPy,代码行数:7,代码来源:matrix_dendro.py
示例18: ap
def ap(data):
X = data
af = AffinityPropagation(
damping=0.8,
max_iter=200,
convergence_iter=15,
preference=None,
affinity='euclidean',
verbose=True).fit(X)
cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_
n_clusters_ = len(cluster_centers_indices)
print('Estimated number of clusters: %d' % n_clusters_)
# print(
# "Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
# print("Completeness: %0.3f" % metrics.completeness_score(
# labels_true, labels))
# print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
# print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(
# labels_true, labels))
# print("Adjusted Mutual Information: %0.3f" %
# metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(
X, labels, metric='sqeuclidean'))
开发者ID:chenhang,项目名称:chenhang.github.io,代码行数:26,代码来源:sklearn_kmeans.py
示例19: calculateNumberOfIdealClusters
def calculateNumberOfIdealClusters(maxAmount, corpus):
print "Initializing silhouette analysis"
range_n_clusters = range(2, maxAmount) # max amount of clusters equal to amount of jobs
silhouette_high = 0;
silhouette_high_n_clusters = 2;
for n_clusters in range_n_clusters:
# Initialize the clusterer with n_clusters value
cluster = AgglomerativeClustering(n_clusters=n_clusters, linkage="ward", affinity="euclidean")
cluster_labels = cluster.fit_predict(corpus)
# The silhouette_score gives the average value for all the samples.
# This gives a perspective into the density and separation of the formed clusters
silhouette_avg = silhouette_score(corpus, cluster_labels)
print "For n_clusters = %d, the average silhouette_score is: %.5f" % (n_clusters, silhouette_avg)
if (silhouette_avg > silhouette_high):
silhouette_high = silhouette_avg
silhouette_high_n_clusters = n_clusters
# Compute the silhouette scores for each sample
sample_silhouette_values = silhouette_samples(corpus, cluster_labels)
print ("Highest score = %f for n_clusters = %d" % (silhouette_high, silhouette_high_n_clusters))
return silhouette_high_n_clusters
开发者ID:edwardmp,项目名称:clustering-job-offers-and-assessing-job-similarity,代码行数:27,代码来源:clustering.py
示例20: _cluster
def _cluster(params):
cls = None
method = sh.getConst('method')
if method=='kmedoid':
assert False
# from kmedoid import kmedsoid
# cls = kmedoid
elif method=='dbscan':
from sklearn.cluster import DBSCAN
cls = DBSCAN(eps=params['eps'],min_samples=params['min_samples'],
metric='precomputed')
else:
assert False, 'FATAL: unknown cluster method'
##
mat = sh.getConst('mat')
labels = cls.fit_predict(mat)
nLabels = len(set(labels))
##
sil = None; cal = None
if (nLabels >= 2)and(nLabels <= len(labels)-1):
sil = met.silhouette_score(mat,labels,'precomputed')
cal = met.calinski_harabaz_score(mat,labels)
perf = dict(silhouette_score=sil,calinski_harabaz_score=cal)
return (labels,perf)
开发者ID:tttor,项目名称:csipb-jamu-prj,代码行数:27,代码来源:cluster.py
注:本文中的sklearn.metrics.silhouette_score函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论