本文整理汇总了Python中sklearn.metrics.v_measure_score函数的典型用法代码示例。如果您正苦于以下问题:Python v_measure_score函数的具体用法?Python v_measure_score怎么用?Python v_measure_score使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了v_measure_score函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: kmeans
def kmeans(input_file, n_clusters, Output):
lvltrace.lvltrace("LVLEntree dans kmeans unsupervised")
ncol=tools.file_col_coma(input_file)
data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
X = data[:,1:]
y = data[:,0]
sample_size, n_features = X.shape
k_means=cluster.KMeans(init='k-means++', n_clusters=n_clusters, n_init=10)
k_means.fit(X)
reduced_data = k_means.transform(X)
values = k_means.cluster_centers_.squeeze()
labels = k_means.labels_
k_means_cluster_centers = k_means.cluster_centers_
print "#########################################################################################################\n"
#print y
#print labels
print "K-MEANS\n"
print('homogeneity_score: %f'%metrics.homogeneity_score(y, labels))
print('completeness_score: %f'%metrics.completeness_score(y, labels))
print('v_measure_score: %f'%metrics.v_measure_score(y, labels))
print('adjusted_rand_score: %f'%metrics.adjusted_rand_score(y, labels))
print('adjusted_mutual_info_score: %f'%metrics.adjusted_mutual_info_score(y, labels))
print('silhouette_score: %f'%metrics.silhouette_score(X, labels, metric='euclidean', sample_size=sample_size))
print('\n')
print "#########################################################################################################\n"
results = Output+"kmeans_scores.txt"
file = open(results, "w")
file.write("K-Means Scores\n")
file.write("Homogeneity Score: %f\n"%metrics.homogeneity_score(y, labels))
file.write("Completeness Score: %f\n"%metrics.completeness_score(y, labels))
file.write("V-Measure: %f\n"%metrics.v_measure_score(y, labels))
file.write("The adjusted Rand index: %f\n"%metrics.adjusted_rand_score(y, labels))
file.write("Adjusted Mutual Information: %f\n"%metrics.adjusted_mutual_info_score(y, labels))
file.write("Silhouette Score: %f\n"%metrics.silhouette_score(X, labels, metric='euclidean', sample_size=sample_size))
file.write("\n")
file.write("True Value, Cluster numbers, Iteration\n")
for n in xrange(len(y)):
file.write("%f, %f, %i\n"%(y[n],labels[n],(n+1)))
file.close()
import pylab as pl
from itertools import cycle
# plot the results along with the labels
k_means_cluster_centers = k_means.cluster_centers_
fig, ax = plt.subplots()
im=ax.scatter(X[:, 0], X[:, 1], c=labels, marker='.')
for k in xrange(n_clusters):
my_members = labels == k
cluster_center = k_means_cluster_centers[k]
ax.plot(cluster_center[0], cluster_center[1], 'w', color='b',
marker='x', markersize=6)
fig.colorbar(im)
plt.title("Number of clusters: %i"%n_clusters)
save = Output + "kmeans.png"
plt.savefig(save)
lvltrace.lvltrace("LVLsortie dans kmeans unsupervised")
开发者ID:xaviervasques,项目名称:Neuron_Morpho_Classification_ML,代码行数:55,代码来源:unsupervised.py
示例2: main
def main():
''' doctsring for main '''
args = parse_args()
setup_logging(verbose = args.verbose)
records = consume_fasta(args.fasta_file)
# setup Hasher, Vectorizer and Classifier
hasher = HashingVectorizer(analyzer='char',
n_features = 2 ** 18,
ngram_range=(args.ngram_min, args.ngram_max),
)
logging.info(hasher)
encoder, classes = get_classes(records, args.tax_level)
n_clusters = len(classes)
logging.info('using taxonomic level %s' % args.tax_level)
logging.info('Using %s clusters' % n_clusters)
classifier = MiniBatchKMeans(n_clusters = n_clusters)
records = records[0:args.n_iters]
chunk_generator = iter_chunk(records, args.chunk_size, args.tax_level)
logging.info('ngram range: [%s-%s]' % (args.ngram_min, args.ngram_max))
for labels, features in chunk_generator:
logging.info('transforming training chunk')
labels = encoder.transform(labels)
vectors = hasher.transform(features)
logging.info('fitting training chunk')
classifier.partial_fit(vectors)
pred_labels = classifier.predict(vectors)
score = v_measure_score(labels, pred_labels)
shuffled_score = v_measure_score(labels, sample(pred_labels, len(pred_labels)))
logging.info('score: %.2f' % (score))
logging.info('shuffled score: %.2f' % (shuffled_score))
开发者ID:audy,项目名称:bfc,代码行数:48,代码来源:bfc.py
示例3: bench_k_means
def bench_k_means(estimator, name, data, target_labels, sample_size):
"""For benchmarking K-Means estimators. Prints different clustering metrics and train accuracy
ARGS
estimator: K-Means clustering algorithm <sklearn.cluster.KMeans>
name: estimator name <str>
data: array-like or sparse matrix, shape=(n_samples, n_features)
target_labels: labels of data points <number array>
sample_size: size of the sample to use when computing the Silhouette Coefficient <int>
"""
t0 = time()
estimator.fit(data)
_, _, train_accuracy = compute_residuals_and_rsquared(estimator.labels_, target_labels)
print('% 9s\t%.2fs\t%i\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f'
% (name, (time() - t0), estimator.inertia_,
metrics.homogeneity_score(target_labels, estimator.labels_),
metrics.completeness_score(target_labels, estimator.labels_),
metrics.v_measure_score(target_labels, estimator.labels_),
metrics.adjusted_rand_score(target_labels, estimator.labels_),
metrics.adjusted_mutual_info_score(target_labels, estimator.labels_),
metrics.silhouette_score(data, estimator.labels_,metric='euclidean',sample_size=sample_size),
train_accuracy
)
)
开发者ID:rafaelvalle,项目名称:machine_listening,代码行数:25,代码来源:knn_engine.py
示例4: main
def main(argv):
file_vectors,clust_type, clusters, distance, cluster_param, std = get_arguments(argv)
fname='.'.join(map(str,[file_vectors.split('/')[-1],clust_type, clusters, distance, cluster_param, std]))
writer=open(fname,'w') ## better to put in EX1, EX2, .. folders
print 'clustering:',clust_type
print 'clusters:',clusters
print 'cluster_param:',cluster_param
print 'std:',std
X,words,truth=load_data(file_vectors,True)
X=np.array(X)
if clust_type=='affin':
labels=affin_sclustering(X, n_clust=int(clusters), distance=distance, gamma=float(cluster_param), std=bool(std))
else:
labels=knn_sclustering(X, n_clust=int(clusters), k=int(cluster_param))
writer.write('\nVMeas:'+ str(v_measure_score(truth,labels)))
writer.write('\nRand:'+str(adjusted_rand_score(truth,labels)))
writer.write('\nHomogen:'+str(homogeneity_score(truth,labels))+'\n')
i=0
for word in words:
writer.write(word+' : '+str(labels[i])+'\n')
i+=1
writer.close()
开发者ID:ivri,项目名称:DiffVec,代码行数:26,代码来源:cluster.py
示例5: my_clustering
def my_clustering(X, y, n_clusters, pca):
# =======================================
# Complete the code here.
# return scores like this: return [score, score, score, score]
# =======================================
from sklearn.cluster import KMeans
#print('fuck X ', X.shape)
#print('fuck y ', y.shape)
clf = KMeans(n_clusters)
clf.fit(X)
from sklearn import metrics
ari = metrics.adjusted_rand_score(y, clf.labels_)
mri = metrics.adjusted_mutual_info_score(y, clf.labels_)
v_measure = metrics.v_measure_score(y, clf.labels_)
'''
silhouette_coeff = metrics.silhouette_score(X, clf.labels_,
metric='euclidean',
sample_size=300)
'''
silhouette_coeff = metrics.silhouette_score(X, clf.labels_)
show_images(n_clusters, clf, pca)
return [ari,mri,v_measure,silhouette_coeff]
开发者ID:kevguy,项目名称:CSCI3320Asg3,代码行数:26,代码来源:ex2.py
示例6: bench_k_means
def bench_k_means(estimator, data, labels):
t0 = time()
estimator.fit(data)
print("time to fit: {:.5}".format(time() - t0))
homogenity = metrics.homogeneity_score(labels, estimator.labels_)
completeness = metrics.completeness_score(labels, estimator.labels_)
v_measure = metrics.v_measure_score(labels, estimator.labels_)
print("homogenity {:.5}, completeness {:.5}, v_measure_score {:.5}".format(
homogenity, completeness, v_measure)
)
adj_rand_score = metrics.adjusted_rand_score(
labels, estimator.labels_
)
print("adjusted_rand_score {:.5}".format(adj_rand_score))
adj_mutual_info_score = metrics.adjusted_mutual_info_score(
labels, estimator.labels_
)
print("adjusted_mutual_info_score {:.5}".format(
adj_mutual_info_score)
)
silhouette_score = metrics.silhouette_score(
data, estimator.labels_, metric='euclidean'
)
print("silhouette_score {:.5}".format(
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean'))
)
return [
homogenity, completeness, v_measure, adj_rand_score,
adj_mutual_info_score, silhouette_score
]
开发者ID:all3fox,项目名称:term_paper,代码行数:35,代码来源:rules_cluster.py
示例7: cluster
def cluster(Z, K=4, algo='kmeans'):
descr = Z.columns
X = Imputer().fit_transform(Z)
##############################################################################
if algo == 'dbscan':
# Compute DBSCAN
db = DBSCAN(eps=0.3, min_samples=10).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print('Estimated number of clusters: %d' % n_clusters_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels))
print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels))
elif algo == 'kmeans':
km = KMeans(n_clusters=K)
km.fit(X)
print(km.labels_)
return km
开发者ID:orichardson,项目名称:mcm2016,代码行数:28,代码来源:cluster_vi.py
示例8: bench_k_means
def bench_k_means(estimator, name, data, sample_size, labels,postIds):
data=sparse.csr_matrix(data)
t0 = time()
print("Performing dimensionality reduction using LSA")
t0 = time()
lsa = TruncatedSVD(500)
data = lsa.fit_transform(data)
data = Normalizer(copy=False).fit_transform(data)
print("done in %fs" % (time() - t0))
print()
#sData=sparse.csr_matrix(data)
val=estimator.fit(data)
print('% 9s %.2fs %i %.3f %.3f %.3f %.3f %.3f '
% (name, (time() - t0), estimator.inertia_,
metrics.homogeneity_score(labels, estimator.labels_),
metrics.completeness_score(labels, estimator.labels_),
metrics.v_measure_score(labels, estimator.labels_),
metrics.adjusted_rand_score(labels, estimator.labels_),
metrics.adjusted_mutual_info_score(labels, estimator.labels_)))
print("Parsing USer File:")
parseUserFile()
print("extracting User File:")
clusterDict=extractCluster(postIds,estimator.labels_)
print("writing Cluster Data to File")
writeCluterToFile(clusterDict)
开发者ID:ashwini-mnnit,项目名称:UnstackExperts,代码行数:29,代码来源:tag_body_clustring.py
示例9: clustering_by_kmeans
def clustering_by_kmeans(vectorizer, X, true_k):
print "Clustering in " + str(true_k) + " groups by K-means..."
km = KMeans(n_clusters=true_k, init='k-means++', max_iter=500, n_init=1)
km.fit_predict(X)
print "Measuring..."
print("Homogeneity: %0.3f" % metrics.homogeneity_score(documents, km.labels_))
print("Completeness: %0.3f" % metrics.completeness_score(documents, km.labels_))
print("V-measure: %0.3f" % metrics.v_measure_score(documents, km.labels_)) #V-measure is an entropy-based measure which explicitly measures how successfully the criteria of homogeneity and completeness have been satisfied.
print("Adjusted Rand-Index: %.3f" % metrics.adjusted_rand_score(documents, km.labels_))
print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, km.labels_, sample_size=1000))
#print top terms per cluster clusters
clusters = km.labels_.tolist() # 0 iff term is in cluster0, 1 iff term is in cluster1 ... (lista de termos)
#print "Lista de termos pertencentes aos clusters " + str(clusters)
print "Total de " + str(len(km.labels_)) + " documents"
#Example to get all documents in cluster 0
#cluster_0 = np.where(clusters==0) # don't forget import numpy as np
#print cluster_0
#cluster_0 now contains all indices of the documents in this cluster, to get the actual documents you'd do:
#X_cluster_0 = documents[cluster_0]
terms = vectorizer.get_feature_names()
#print terms
measuring_kmeans(true_k,clusters)
开发者ID:eubr-bigsea,项目名称:Tweets-cluster,代码行数:27,代码来源:tf_idf.py
示例10: bestClassify
def bestClassify(X,Y):
"Best classifier function"
tfidf = True
if tfidf:
vec = TfidfVectorizer(preprocessor = identity,
tokenizer = identity, sublinear_tf = True)
else:
vec = CountVectorizer(preprocessor = identity,
tokenizer = identity)
km = KMeans(n_clusters=2, n_init=100, verbose=1)
clusterer = Pipeline( [('vec', vec),
('cls', km)] )
prediction = clusterer.fit_predict(X,Y)
checker = defaultdict(list)
for pred,truth in zip(prediction,Y):
checker[pred].append(truth)
labeldict = {}
for pred, label in checker.items():
labeldict[pred] = Counter(label).most_common(1)[0][0]
#print(pred, Counter(label).most_common(1)[0][0])
prediction = [labeldict[p] for p in prediction]
labels = list(labeldict.values())
print(labels)
print(confusion_matrix(Y, prediction, labels=labels))
print("Homogeneity:", homogeneity_score(Y,prediction))
print("Completeness:", completeness_score(Y,prediction))
print("V-measure:", v_measure_score(Y,prediction))
print("Rand-Index:", adjusted_rand_score(Y,prediction))
开发者ID:Martbov,项目名称:LearningFromData,代码行数:35,代码来源:LFDassignment5_KMBinary_Mart.py
示例11: compute_metrics
def compute_metrics(answers, predictions):
aris = []
vscores = []
fscores = []
weights = []
for k in answers.keys():
idx = np.argsort(np.array(answers[k][0]))
true = np.array(answers[k][1])[idx]
pred = np.array(predictions[k][1])
weights.append(pred.shape[0])
if len(np.unique(true)) > 1:
aris.append(adjusted_rand_score(true, pred))
vscores.append(v_measure_score(true, pred))
fscores.append(compute_fscore(true, pred))
# print '%s: ari=%f, vscore=%f, fscore=%f' % (k, aris[-1], vscores[-1], fscores[-1])
aris = np.array(aris)
vscores = np.array(vscores)
fscores = np.array(fscores)
weights = np.array(weights)
print 'number of one-sense words: %d' % (len(vscores) - len(aris))
print 'mean ari: %f' % np.mean(aris)
print 'mean vscore: %f' % np.mean(vscores)
print 'weighted vscore: %f' % np.sum(vscores * (weights / float(np.sum(weights))))
print 'mean fscore: %f' % np.mean(fscores)
print 'weighted fscore: %f' % np.sum(fscores * (weights / float(np.sum(weights))))
return np.mean(aris),np.mean(vscores)
开发者ID:FedericoV,项目名称:AdaGram.jl,代码行数:26,代码来源:test-all.py
示例12: cluster
def cluster(model, uids):
##############################################################################
# Generate sample data
X = []
for uid in uids:
X.append(model.docvecs[uid])
labels_true = uids
##############################################################################
# Compute Affinity Propagation
af = AffinityPropagation(preference=-50).fit(X)
pickle.dump(af, open('data/af.pick', 'w'))
cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_
n_clusters_ = len(cluster_centers_indices)
print('Estimated number of clusters: %d' % n_clusters_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
print("Adjusted Rand Index: %0.3f"
% metrics.adjusted_rand_score(labels_true, labels))
print("Adjusted Mutual Information: %0.3f"
% metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f"
% metrics.silhouette_score(X, labels, metric='sqeuclidean'))
开发者ID:wtgme,项目名称:ohsn,代码行数:27,代码来源:profile_cluster.py
示例13: predictAffinityPropagation
def predictAffinityPropagation(X, labels_true):
#ranX, ranY = shuffle(X, y, random_state=0)
af = AffinityPropagation(preference=-50).fit(X)
cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_
n_clusters_ = len(cluster_centers_indices)
print('Estimated number of clusters: %d' % n_clusters_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
print("Adjusted Rand Index: %0.3f"
% metrics.adjusted_rand_score(labels_true, labels))
print("Adjusted Mutual Information: %0.3f"
% metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f"
% metrics.silhouette_score(X, labels, metric='sqeuclidean'))
plt.close('all')
plt.figure(1)
plt.clf()
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(n_clusters_), colors):
class_members = labels == k
cluster_center = X[cluster_centers_indices[k]]
plt.plot(X[class_members, 0], X[class_members, 1], col + '.')
plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=14)
for x in X[class_members]:
plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)
plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()
开发者ID:BIDS-collaborative,项目名称:EDAM,代码行数:35,代码来源:meanshift.py
示例14: test_KMeans_scores
def test_KMeans_scores(self):
digits = datasets.load_digits()
df = pdml.ModelFrame(digits)
scaled = pp.scale(digits.data)
df.data = df.data.pp.scale()
self.assert_numpy_array_almost_equal(df.data.values, scaled)
clf1 = cluster.KMeans(init='k-means++', n_clusters=10,
n_init=10, random_state=self.random_state)
clf2 = df.cluster.KMeans(init='k-means++', n_clusters=10,
n_init=10, random_state=self.random_state)
clf1.fit(scaled)
df.fit_predict(clf2)
expected = m.homogeneity_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.homogeneity_score(), expected)
expected = m.completeness_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.completeness_score(), expected)
expected = m.v_measure_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.v_measure_score(), expected)
expected = m.adjusted_rand_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.adjusted_rand_score(), expected)
expected = m.homogeneity_score(digits.target, clf1.labels_)
self.assertEqual(df.metrics.homogeneity_score(), expected)
expected = m.silhouette_score(scaled, clf1.labels_, metric='euclidean',
sample_size=300, random_state=self.random_state)
result = df.metrics.silhouette_score(metric='euclidean', sample_size=300,
random_state=self.random_state)
self.assertAlmostEqual(result, expected)
开发者ID:Sandy4321,项目名称:pandas-ml,代码行数:35,代码来源:test_cluster.py
示例15: cluster
def cluster(algorithm, data, topics, make_silhouette=False):
print str(algorithm)
clusters = algorithm.fit_predict(data)
labels = algorithm.labels_
print 'Homogeneity: %0.3f' % metrics.homogeneity_score(topics, labels)
print 'Completeness: %0.3f' % metrics.completeness_score(topics, labels)
print 'V-measure: %0.3f' % metrics.v_measure_score(topics, labels)
print 'Adjusted Rand index: %0.3f' % metrics.adjusted_rand_score(topics, labels)
print 'Silhouette test: %0.3f' % metrics.silhouette_score(data, labels)
print ' ***************** '
silhouettes = metrics.silhouette_samples(data, labels)
num_clusters = len(set(clusters))
print 'num clusters: %d' % num_clusters
print 'num fitted: %d' % len(clusters)
# Make a silhouette plot if the flag is set
if make_silhouette:
order = numpy.lexsort((-silhouettes, clusters))
indices = [numpy.flatnonzero(clusters[order] == num_clusters) for k in range(num_clusters)]
ytick = [(numpy.max(ind)+numpy.min(ind))/2 for ind in indices]
ytickLabels = ["%d" % x for x in range(num_clusters)]
cmap = cm.jet( numpy.linspace(0,1,num_clusters) ).tolist()
clr = [cmap[i] for i in clusters[order]]
fig = plt.figure()
ax = fig.add_subplot(111)
ax.barh(range(data.shape[0]), silhouettes[order], height=1.0,
edgecolor='none', color=clr)
ax.set_ylim(ax.get_ylim()[::-1])
plt.yticks(ytick, ytickLabels)
plt.xlabel('Silhouette Value')
plt.ylabel('Cluster')
plt.savefig('cluster.png')
开发者ID:RuthRainbow,项目名称:DataMining,代码行数:34,代码来源:scilearn.py
示例16: clustering
def clustering(dataset):
vectorizer = dataset.vectorizer
X = dataset.X
true_k = dataset.n_classes
labels = dataset.target
km = cluster.KMeans(n_clusters=true_k, max_iter=100, n_init=1)
print("Clustering sparse data with %s" % km)
t0 = time()
km.fit(X)
print("done in %0.3fs" % (time() - t0))
print()
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels, km.labels_))
print("Completeness: %0.3f" % metrics.completeness_score(labels, km.labels_))
print("V-measure: %0.3f" % metrics.v_measure_score(labels, km.labels_))
print("Adjusted Rand-Index: %.3f"
% metrics.adjusted_rand_score(labels, km.labels_))
print("Silhouette Coefficient: %0.3f"
% metrics.silhouette_score(X, labels, sample_size=1000))
print()
print("Top terms per cluster:")
order_centroids = km.cluster_centers_.argsort()[:, ::-1]
terms = vectorizer.get_feature_names()
sizes = np.sum(km.labels_[:, np.newaxis] == np.arange(true_k), axis=0)
for i in range(true_k):
print("Cluster %d (%d):" % (i, sizes[i]), end='')
for ind in order_centroids[i, :10]:
print(' %s' % terms[ind], end='')
print()
开发者ID:humem,项目名称:dotfiles,代码行数:32,代码来源:analyzer.py
示例17: cluster_evaluation
def cluster_evaluation(D, y_true, n_clusters, eps=0.8, min_samples=10):
##############################################################################
# Extract Y true
labels_true = y_true
##############################################################################
# transform distance matrix into a similarity matrix
S = 1 - D
##############################################################################
# compute DBSCAN
#db = DBSCAN(eps=eps, min_samples=min_samples).fit(S)
db = Ward(n_clusters=n_clusters).fit(S)
#core_samples = db.core_sample_indices_
labels = db.labels_
# number of clusters in labels, ignoring noise if present
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print 'Number of clusters: %d' % n_clusters_
print 'Homogeneity: %0.3f' % metrics.homogeneity_score(labels_true, labels)
print 'Completeness: %0.3f' % metrics.completeness_score(labels_true, labels)
print 'V-meassure: %0.3f' % metrics.v_measure_score(labels_true, labels)
print 'Adjusted Rand Index: %0.3f' % metrics.adjusted_rand_score(labels_true, labels)
print 'Adjusted Mutual Information: %0.3f' % metrics.adjusted_mutual_info_score(labels_true, labels)
print 'Silhouette Coefficient: %0.3f' % metrics.silhouette_score(D, labels, metric='precomputed')
开发者ID:fbkarsdorp,项目名称:MotifRetrieval,代码行数:26,代码来源:cluster_evaluation.py
示例18: kmeans_setup
def kmeans_setup(data):
if pca_f == 1:
pca = PCA(n_components = num_clusters).fit(data)
initializer = pca.components_
name = 'PCA'
else:
initializer = 'k-means++'
name = 'k-means++'
t0 = time()
estimator = KMeans(init=initializer, n_clusters=num_clusters, n_init = num_init, max_iter = num_iterations)
estimator.fit(data)
if debug == True:
sample_size = 300
print('% 9s %.2fs %i %.3f %.3f %.3f %.3f %.3f %.3f'
% (name, (time() - t0), estimator.inertia_,
metrics.homogeneity_score(labels, estimator.labels_),
metrics.completeness_score(labels, estimator.labels_),
metrics.v_measure_score(labels, estimator.labels_),
metrics.adjusted_rand_score(labels, estimator.labels_),
metrics.adjusted_mutual_info_score(labels, estimator.labels_),
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean',
sample_size=sample_size)))
return estimator
开发者ID:jklapacz,项目名称:cs498,代码行数:29,代码来源:main.py
示例19: affin_test
def affin_test():
savefile = open('traindata.pkl', 'rb')
(x_train, y_train, t1) = cPickle.load(savefile)
savefile.close()
x_train, X_valid, y_train, y_valid = cross_validation.train_test_split(
x_train, y_train, test_size=0.9, random_state=42)
labels_true = y_train
x_train = StandardScaler().fit_transform(x_train)
af = AffinityPropagation(preference=-50).fit(x_train)
cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_
n_clusters_ = len(cluster_centers_indices)
print('Estimated number of clusters: %d' % n_clusters_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
print("Adjusted Rand Index: %0.3f"
% metrics.adjusted_rand_score(labels_true, labels))
print("Adjusted Mutual Information: %0.3f"
% metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f"
% metrics.silhouette_score(x_train, labels, metric='sqeuclidean'))
开发者ID:coreyhahn,项目名称:kaggle_otto,代码行数:29,代码来源:clustering.py
示例20: run_clustering
def run_clustering( clusterer, data, labels ):
"""
Cluster: Using a predefined and parameterized clustering algorithm, fit
some dataset and perform metrics given a set of ground-truth labels.
clusterer: the clustering algorithm, from sklearn
data: array-like dataset input
labels: vector of ground-truth labels
"""
# Time the operation
t0 = time()
clusterer.fit(data)
t1 = time()
# Perform metrics
runtime = (t1 - t0)
homogeneity = metrics.homogeneity_score( labels, clusterer.labels_ )
completeness = metrics.completeness_score( labels, clusterer.labels_ )
v_measure = metrics.v_measure_score( labels, clusterer.labels_ )
adjusted_rand = metrics.adjusted_rand_score( labels, clusterer.labels_ )
adjusted_mutual = metrics.adjusted_mutual_info_score( labels,
clusterer.labels_ )
# Output to logs
logging.info(" |- Execution time: %fs" % runtime)
logging.info(" |- Homogeneity: %0.3f" % homogeneity)
logging.info(" |- Completeness: %0.3f" % completeness)
logging.info(" |- V-measure: %0.3f" % v_measure)
logging.info(" |- Adjusted Rand-Index: %.3f" % adjusted_rand)
logging.info(" |- Adjusted Mutual Info: %.3f" % adjusted_mutual)
开发者ID:jrouly,项目名称:stackmining,代码行数:32,代码来源:cluster.py
注:本文中的sklearn.metrics.v_measure_score函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论