本文整理汇总了Python中scipy.cluster.hierarchy.inconsistent函数的典型用法代码示例。如果您正苦于以下问题:Python inconsistent函数的具体用法?Python inconsistent怎么用?Python inconsistent使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了inconsistent函数的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: flatcluster
def flatcluster(
dRow, runLogs, interClusterDistance="complete", plotDendrogram=True, cMethod="inconsistent", cValue=2.5
):
# if 'inter-cluster distance' in clusterSetup.keys():
# method = clusterSetup['inter-cluster distance']
# else:
# method = 'complete'
z = linkage(dRow, interClusterDistance)
inc = inconsistent(z)
# print inc
if plotDendrogram:
plotdendrogram(z)
clusters = fcluster(z, cValue, cMethod)
noClusters = max(clusters)
print("Total number of clusters:", noClusters)
for i in range(noClusters):
counter = 0
for j in range(len(clusters)):
if clusters[j] == (i + 1):
counter += 1
print("Cluster", str(i + 1), ":", str(counter))
global clusterCount
clusterCount = noClusters
print(len(clusters))
print(len(runLogs))
for i, log in enumerate(runLogs):
log[0]["Cluster"] = str(clusters[i])
return z, clusters, runLogs
开发者ID:adrivsh,项目名称:EMAworkbench,代码行数:34,代码来源:clusterer.py
示例2: _run_hier_clust_on_centroids
def _run_hier_clust_on_centroids(self,method='average'):
'''
runs hierarchical clustering based on the centroids of the data per scipy's methods
'''
uniqueLabels = np.sort(np.unique(self.templateLabels))
centroids = np.array([self.templateMat[np.where(self.templateLabels == i)[0],:].mean(axis=0) for i in uniqueLabels])
self.y = pdist(centroids)
self.z = hierarchy.linkage(self.y,method)
r2 = hierarchy.inconsistent(self.z,2)
## rank the average of linkage hieghts by standard deviation the report the averages
meanHeights = r2[:,0]
stdHeights = r2[:,1]
rankedInds = np.argsort(stdHeights)[::-1]
bestCutPoints = meanHeights[rankedInds]
## save centroid labels for all cuts of the dentragram
allCentroidLabels = {}
rankedK = []
for cp in bestCutPoints:
centroidLabels = hierarchy.fcluster(self.z,t=cp,criterion='distance')
k = len(np.unique(centroidLabels))
if allCentroidLabels.has_key(str(k)) == True:
continue
allCentroidLabels[str(k)] = centroidLabels
rankedK.append(k)
centroidLabels = allCentroidLabels[str(rankedK[0])]
## save the top xx modes
self.bestModeLabels = []
print 'doing ranking...'
for rk in rankedK[:25]:
centroidLabels = allCentroidLabels[str(rk)]
modeLabels = self._get_mode_labels(self.templateLabels,centroidLabels,uniqueLabels)
self.bestModeLabels.append(modeLabels)
## provide silvalue ranks in case we wish to reorder the top xx modes by sil value
self.modeSilValues = []
self.modeSizes = []
allEvents = [self.templateData]
for count in range(len(self.bestModeLabels)):
numClusters = np.unique(self.bestModeLabels[count]).size
silValues = get_silhouette_values(allEvents,[self.bestModeLabels[count]],subsample=self.noiseSample,
minNumEvents=5000,resultsType='raw')
silMean = silValues['0'].mean()
self.modeSilValues.append(silValues['0'].mean())
self.modeSizes.append(numClusters)
silValues = get_silhouette_values(allEvents,[self.templateLabels],subsample=self.noiseSample,
minNumEvents=5000,resultsType='raw')
self.clusterSilValues = silValues['0'].mean()
self.modeSilValues = np.array(self.modeSilValues)
self.modeSizes = np.array(self.modeSizes)
开发者ID:ajrichards,项目名称:cytostream,代码行数:60,代码来源:TemplateFileCreator.py
示例3: check_maxRstat_Q_linkage
def check_maxRstat_Q_linkage(self, method, i):
# Tests maxRstat(Z, R, i) on the Q data set
X = hierarchy_test_data.Q_X
Z = linkage(X, method)
R = inconsistent(Z)
MD = maxRstat(Z, R, 1)
expectedMD = calculate_maximum_inconsistencies(Z, R, 1)
assert_allclose(MD, expectedMD, atol=1e-15)
开发者ID:abudulemusa,项目名称:scipy,代码行数:8,代码来源:test_hierarchy.py
示例4: test_is_valid_im_4_and_up
def test_is_valid_im_4_and_up(self):
# Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
# (step size 3).
for i in xrange(4, 15, 3):
y = np.random.rand(i*(i-1)//2)
Z = linkage(y)
R = inconsistent(Z)
assert_(is_valid_im(R) == True)
开发者ID:abudulemusa,项目名称:scipy,代码行数:8,代码来源:test_hierarchy.py
示例5: check_maxRstat_Q_linkage
def check_maxRstat_Q_linkage(self, method, i):
# Tests maxRstat(Z, R, i) on the Q data set
X = eo['Q-X']
Y = pdist(X)
Z = linkage(X, method)
R = inconsistent(Z)
MD = maxRstat(Z, R, 1)
expectedMD = calculate_maximum_inconsistencies(Z, R, 1)
assert_allclose(MD, expectedMD, atol=1e-15)
开发者ID:FrankZhao66,项目名称:scipy,代码行数:9,代码来源:test_hierarchy.py
示例6: test_is_valid_im_4_and_up_neg_dist
def test_is_valid_im_4_and_up_neg_dist(self):
# Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
# (step size 3) with negative link counts.
for i in xrange(4, 15, 3):
y = np.random.rand(i*(i-1)//2)
Z = linkage(y)
R = inconsistent(Z)
R[i//2,2] = -0.5
assert_(is_valid_im(R) == False)
assert_raises(ValueError, is_valid_im, R, throw=True)
开发者ID:abudulemusa,项目名称:scipy,代码行数:10,代码来源:test_hierarchy.py
示例7: flatcluster
def flatcluster(dMatrix, clusterSetup):
dRow = prepareDRow(dMatrix)
print dRow
# Checking user-specified options, if there is any. Otherwise the default
# values are assigned
if 'inter-cluster distance' in clusterSetup.keys():
method = clusterSetup['inter-cluster distance']
else:
method = 'complete'
z = linkage(dRow, method)
inc = inconsistent(z)
print inc
if 'plotDendrogram?' in clusterSetup.keys():
if clusterSetup['plotDendrogram?']:
plotdendrogram(z)
else:
pass
else:
plotdendrogram(z)
if 'cutoff criteria' in clusterSetup.keys():
cmethod = clusterSetup['cutoff criteria']
else:
cmethod = 'inconsistent'
if 'cutoff criteria value' in clusterSetup.keys():
cvalue = clusterSetup['cutoff criteria value']
else:
cvalue = 2.5
clusters = fcluster(z, cvalue, cmethod)
noClusters = max(clusters)
print 'Total number of clusters:', noClusters
for i in range(noClusters):
counter = 0
for j in range(len(clusters)):
if clusters[j]==(i+1):
counter+=1
print "Cluster",str(i+1),":",str(counter)
for runIndex in range(len(clusters)):
global runLogs
runLogs[runIndex][0]['Cluster'] = str(clusters[runIndex])
global clusterCount
if clusters[runIndex] > clusterCount:
clusterCount = clusters[runIndex]
return clusters
开发者ID:canerhamarat,项目名称:EMAworkbench,代码行数:50,代码来源:x_clustererV1.py
示例8: Hierarchy
def Hierarchy(V, **kwargs):
"""Performs hierarchical clustering on *V*. The function essentially uses two scipy functions: ``linkage`` and
``fcluster``. See :func:`scipy.cluster.hierarchy.linkage` and :func:`scipy.cluster.hierarchy.fcluster` for the
explaination of the arguments. Here lists arguments that are different from those of scipy.
:arg V: row-normalized eigenvectors for the purpose of clustering.
:type V: :class:`numpy.ndarray`
:arg inconsistent_percentile: if the clustering *criterion* for :func:`scipy.cluster.hierarchy.fcluster`
is ``inconsistent`` and threshold *t* is not given (default), then the function will use the percentile specified
by this argument as the threshold.
:type inconsistent_percentile: double
:arg n_clusters: specifies the maximal number of clusters. If this argument is given, then the function will
automatically set *criterion* to ``maxclust`` and *t* equal to *n_clusters*.
:type n_clusters: int
"""
from scipy.cluster.hierarchy import linkage, fcluster, inconsistent
method = kwargs.pop('method', 'single')
metric = kwargs.pop('metric', 'euclidean')
Z = linkage(V, method=method, metric=metric)
criterion = kwargs.pop('criterion', 'inconsistent')
t = kwargs.get('t', None)
ip = kwargs.pop('inconsistent_percentile', 99.9)
if t is None and criterion == 'inconsistent':
I = inconsistent(Z)
i = np.percentile(I[:,3], ip)
t = kwargs.pop('t', i)
depth = kwargs.pop('depth', 2)
R = kwargs.pop('R', None)
monocrit = kwargs.pop('monocrit', None)
n_clusters = kwargs.pop('n_clusters', None)
if n_clusters is not None:
criterion = 'maxclust'
t = n_clusters
labels = fcluster(Z, t, criterion=criterion, depth=depth, R=R, monocrit=monocrit)
return labels.flatten()
开发者ID:fongchun,项目名称:ProDy,代码行数:42,代码来源:cluster.py
示例9: silhouette_score
def silhouette_score(dendroMatrix, distance_metric, linkage_method, labels):
"""
Generate silhoutte score based on hierarchical clustering.
Args:
dendroMatrix: list, occurance of words in different files
distance_metric: string, style of distance metric in the dendrogram
linkage_method: string, style of linkage method in the dendrogram
labels: list, file names
Returns:
silhouetteScore: string, containing the result of silhouette score
silhouetteAnnotation: string, annotation of the silhouette score
score: float, silhouette score
inconsistentMax: float, upper bound of threshold to calculate silhouette score if using Inconsistent criterion
maxclustMax: integer, upper bound of threshold to calculate silhouette score if using Maxclust criterion
distanceMax: float, upper bound of threshold to calculate silhouette score if using Distance criterion
distanceMin: float, lower bound of threshold to calculate silhouette score if using Distance criterion
monocritMax: float, upper bound of threshold to calculate silhouette score if using Monocrit criterion
monocritMin: float, lower bound of threshold to calculate silhouette score if using Monocrit criterion
threshold: float/integer/string, threshold (t) value that users entered, equals to 'N/A' if users leave the field blank
"""
activeFiles = len(labels) - 1
if (activeFiles > 2): # since "number of lables should be more than 2 and less than n_samples - 1"
Y = metrics.pairwise.pairwise_distances(dendroMatrix, metric=distance_metric)
Z = hierarchy.linkage(Y, method=linkage_method)
monocrit = None
# 'maxclust' range
maxclustMax = len(labels) - 1
# 'incosistent' range
R = hierarchy.inconsistent(Z, 2)
inconsistentMax = R[-1][-1]
slen = len('%.*f' % (2, inconsistentMax))
inconsistentMax = float(str(inconsistentMax)[:slen])
# 'distance' range
d = hierarchy.cophenet(Z)
distanceMax = d.max()
slen = len('%.*f' % (2, distanceMax))
distanceMax = float(str(distanceMax)[:slen])
distanceMin = d.min() + 0.01
slen = len('%.*f' % (2, distanceMin))
distanceMin = float(str(distanceMin)[:slen])
# 'monocrit' range
MR = hierarchy.maxRstat(Z, R, 0)
monocritMax = MR.max()
slen = len('%.*f' % (2, monocritMax))
monocritMax = float(str(monocritMax)[:slen])
monocritMin = MR.min() + 0.01
slen = len('%.*f' % (2, monocritMin))
monocritMin = float(str(monocritMin)[:slen])
threshold = request.form['threshold']
if threshold == '':
threshold = str(threshold)
else:
threshold = float(threshold)
if request.form['criterion'] == 'maxclust':
criterion = 'maxclust'
if (threshold == '') or (threshold > maxclustMax):
threshold = len(labels) - 1
else:
threshold = round(float(threshold))
elif request.form['criterion'] == 'distance':
criterion = 'distance'
if (threshold == '') or (threshold > distanceMax) or (threshold < distanceMin):
threshold = distanceMax
elif request.form['criterion'] == 'inconsistent':
criterion = 'inconsistent'
if (threshold == '') or (threshold > inconsistentMax):
threshold = inconsistentMax
elif request.form['criterion'] == 'monocrit':
criterion = 'monocrit'
monocrit = MR
if (threshold == '') or (threshold > monocritMax) or (threshold < monocritMin):
threshold = monocritMax
scoreLabel = hierarchy.fcluster(Z, t=threshold, criterion=criterion, monocrit=monocrit)
if len(set(scoreLabel)) <= 1: # this means all the files are divided into only 1 or less cluster
silhouetteScore = "Silhouette Score: invalid for only 1 cluster."
silhouetteAnnotation = "because your file are too similar to each other, program classify all of them in the same cluster"
score = 'invalid for only 1 cluster'
inconsistentMax = maxclustMax = distanceMax = distanceMin = monocritMax = monocritMin = threshold = 'N/A'
else:
score = metrics.silhouette_score(Y, labels=scoreLabel, metric='precomputed')
score = round(score, constants.ROUND_DIGIT)
inequality = '≤'.decode('utf-8')
silhouetteScore = "Silhouette Score: " + str(
score) + "\n(-1 " + inequality + " Silhouette Score " + inequality + " 1)"
silhouetteAnnotation = "The best value is 1 and the worst value is -1. Values near 0 indicate overlapping clusters. Negative values generally indicate that a sample has been assigned to the wrong cluster, as a different cluster is more similar."
else:
silhouetteScore = "Silhouette Score: invalid for less than or equal to 2 files."
silhouetteAnnotation = ""
score = 'invalid for less than or equal to 2 files.'
#.........这里部分代码省略.........
开发者ID:chantisnake,项目名称:Lexos,代码行数:101,代码来源:dendrogrammer.py
示例10: check_inconsistent_tdist
def check_inconsistent_tdist(self, depth):
Z = hierarchy_test_data.linkage_ytdist_single
assert_allclose(inconsistent(Z, depth),
hierarchy_test_data.inconsistent_ytdist[depth])
开发者ID:abudulemusa,项目名称:scipy,代码行数:4,代码来源:test_hierarchy.py
示例11: check_inconsistent_q_single
def check_inconsistent_q_single(self, depth):
X = eo['Q-X']
Z = linkage(X, 'single', 'euclidean')
R = inconsistent(Z, depth)
Rright = eo['inconsistent-Q-single-%d' % depth]
assert_allclose(R, Rright, atol=1e-05)
开发者ID:FrankZhao66,项目名称:scipy,代码行数:6,代码来源:test_hierarchy.py
示例12: check_inconsistent_tdist
def check_inconsistent_tdist(self, method, depth, atol):
Y = squareform(_tdist)
Z = linkage(Y, method)
R = inconsistent(Z, depth)
Rright = eo['inconsistent-%s-tdist-depth-%d' % (method, depth)]
assert_allclose(R, Rright, atol=atol)
开发者ID:FrankZhao66,项目名称:scipy,代码行数:6,代码来源:test_hierarchy.py
示例13: pdist
centroids = np.array([case1[np.where(case1Labels == i)[0],:].mean(axis=0) for i in uniqueLabels])
fig = plt.figure()
ax = fig.add_subplot(111)
ncluster = 27
y = pdist(centroids)
method = 'centroid'#'average'
z = hierarchy.linkage(y,'average')
#t = hierarchy.fcluster(27,criterion='maxclust')
## computes the max distance between any cluster and ea non singleton cluster
print 'max dists', hierarchy.maxdists(z)
## inconsistancy
r = hierarchy.inconsistent(z)
print 'r',r
#print 'max inconsts', hierarchy.maxinconsts(z,r,i)
print 'z',z
#print 'blah', z[:,2] - np.array(z[1:,2].tolist()+[0])
print z[:,2]
print np.hstack([z[1:,2],[0]])
levelDiffs = np.abs(z[:,2] - np.hstack([z[1:,2],[0]]))
levelDiffMeans = z[:,2]# - 0.001 #np.hstack([z[1:,2],[0]]) / 2.0#z[:,2] + np.hstack([z[1:,2],[0]]) / 2.0
print 'diffs',levelDiffs*100
diffInds = np.argsort(levelDiffs)
diffInds = diffInds[::-1]
print 'a',levelDiffMeans[diffInds[:6]] #,levelDiffMeans[diffInds[1]],levelDiffMeans[diffInds[2]],levelDiffMeans[diffInds[3]]
print 'b', diffInds[:5]
hierarchy.dendrogram(z)
开发者ID:ajrichards,项目名称:cytostream,代码行数:31,代码来源:SimulatedData3.py
示例14: linkage
# generate two clusters: a with 100 points, b with 50:
np.random.seed(4711) # for repeatability of this tutorial
a = np.random.multivariate_normal([10, 0], [[3, 1], [1, 4]], size=[100,])
b = np.random.multivariate_normal([0, 20], [[3, 1], [1, 4]], size=[50,])
X = np.concatenate((a, b),)
print X.shape # 150 samples with 2 dimensions
#plt.scatter(X[:,0], X[:,1])
#plt.show()
# generate the linkage matrix
Z = linkage(X, 'ward')
print Z.shape
depth = 5
incons = inconsistent(Z, depth)
print incons[-10:]
last = Z[-10:, 2]
last_rev = last[::-1]
idxs = np.arange(1, len(last) + 1)
plt.plot(idxs, last_rev)
acceleration = np.diff(last, 2) # 2nd derivative of the distances
acceleration_rev = acceleration[::-1]
plt.plot(idxs[:-2] + 1, acceleration_rev)
plt.show()
k = acceleration_rev.argmax() + 2 # if idx 0 is the max of this we want 2 clusters
print "clusters:", k
开发者ID:ronanki,项目名称:Hybrid_prosody_model,代码行数:29,代码来源:hierarchical_clustering.py
注:本文中的scipy.cluster.hierarchy.inconsistent函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论