本文整理汇总了Python中scipy.cluster.vq.whiten函数的典型用法代码示例。如果您正苦于以下问题:Python whiten函数的具体用法?Python whiten怎么用?Python whiten使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了whiten函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: sent_integrate
def sent_integrate(sim_matrix,n_class):
# 次元ごとの分散を均一にする
whiten(sim_matrix)
centroid, destortion = kmeans(sim_matrix, n_class, iter=100, thresh=1e-05)
labels, dist = vq(sim_matrix, centroid)
return labels
开发者ID:NuitNoir,项目名称:MachineLearning,代码行数:7,代码来源:qiita_doc_sim.py
示例2: parse
def parse(data_file_name, predict_index, ignore_indices, **options):
data_file = open(data_file_name, 'r')
lines = data_file.read().splitlines()
x = []
y = []
for i, line in enumerate(lines):
if i == 0 or i == 1:
continue
datas = line.split()
x_category = []
for i, data in enumerate(datas):
if ignore_indices.has_key(i):
continue
if i == predict_index:
if data == 'T':
y.append(1.0)
elif data == 'F':
y.append(0.0)
else:
y.append(float(data))
continue
x_category.append(float(data))
x.append(x_category)
x = whiten(np.array(x)) if options.get('whiten_x') else np.array(x)
y = whiten(np.array(y)) if options.get('whiten_y') else np.array(y)
x = x - x.mean() if options.get('mean_center_x') else x
y = y - y.mean() if options.get('mean_center_y') else y
return (x, y)
开发者ID:AustinStoneProjects,项目名称:statsHw2,代码行数:28,代码来源:regression.py
示例3: clust_scatter
def clust_scatter(samples, clusters, allocation_table, n):
c = len(allocation_table[0]) # Columns
r = len(allocation_table) # Rows
time_scat_square = 0
mat_scatter = 0
for j in range(0, c): # clusters
for t in range(0, 10): # maturities
for p in range(0, r): # samples within a cluster
index = allocation_table[p, j]
if index != 0:
time_scat_square += samples.samples[index-1].scatter_maturity[t].scatter
mat_scatter += time_scat_square**2
time_scat_square = 0
clusters.clusters[j].scatter = np.sqrt(mat_scatter - 10 * clusters.clusters[j].mean**2)
mat_scatter = 0
if n == 0 or n == 4999:
print('clust scatter : ' + str(clusters.clusters[j].scatter))
# Normalize clusters' scatter
vec = np.zeros(4)
for j in range(0, c):
vec[j] = clusters.clusters[j].scatter
whiten(vec)
for j in range(0, c):
clusters.clusters[j].scatter = vec[j]
return clusters;
开发者ID:ArtMgn,项目名称:k-means-pca,代码行数:31,代码来源:distances_lib.py
示例4: test1
def test1(self):
print "TEST 1:----------------------------------------------------------------"
features = np.array([[1.9, 2.3],
[1.5, 2.5],
[0.8, 0.6],
[0.4, 1.8],
[0.1, 0.1],
[0.2, 1.8],
[2.0, 0.5],
[0.3, 1.5],
[1.0, 1.0]])
whitened = whiten(features)
book = np.array((whitened[0], whitened[2]))
numpy_result = kmeans(whitened, book)[0]
print numpy_result
print ""
features2 = np.array([[1.9, 2.3,0],
[1.5, 2.5,0],
[0.8, 0.6,0],
[0.4, 1.8,0],
[0.1, 0.1,0],
[0.2, 1.8,0],
[2.0, 0.5,0],
[0.3, 1.5,0],
[1.0, 1.0,0]])
whitened2 = whiten(features2)
book2 = [whitened[0], whitened[2]]
our_result = np.array(KMeans.k_means2(whitened2.tolist(), 2, book2).centroids)[:, :-1]
print our_result
开发者ID:jwallp,项目名称:151-Assignments,代码行数:30,代码来源:test_kmeans.py
示例5: LexicalFeatures
def LexicalFeatures():
"""
Compute feature vectors for word and punctuation features
"""
num_chapters = len(chapters)
fvs_lexical = np.zeros((len(chapters), 3), np.float64)
fvs_punct = np.zeros((len(chapters), 3), np.float64)
for e, ch_text in enumerate(chapters):
# note: the nltk.word_tokenize includes punctuation
tokens = nltk.word_tokenize(ch_text.lower())
words = word_tokenizer.tokenize(ch_text.lower())
sentences = sentence_tokenizer.tokenize(ch_text)
vocab = set(words)
words_per_sentence = np.array([len(word_tokenizer.tokenize(s))
for s in sentences])
# average number of words per sentence
fvs_lexical[e, 0] = words_per_sentence.mean()
# sentence length variation
fvs_lexical[e, 1] = words_per_sentence.std()
# Lexical diversity
fvs_lexical[e, 2] = len(vocab) / float(len(words))
# Commas per sentence
fvs_punct[e, 0] = tokens.count(',') / float(len(sentences))
# Semicolons per sentence
fvs_punct[e, 1] = tokens.count(';') / float(len(sentences))
# Colons per sentence
fvs_punct[e, 2] = tokens.count(':') / float(len(sentences))
# apply whitening to decorrelate the features
fvs_lexical = whiten(fvs_lexical)
fvs_punct = whiten(fvs_punct)
return fvs_lexical, fvs_punct
开发者ID:sophie-greene,项目名称:repo,代码行数:35,代码来源:author.py
示例6: kmeansCluster
def kmeansCluster(self, layer, distance, number):
import scipy
import scipy.cluster.hierarchy as sch
from scipy.cluster.vq import vq,kmeans,whiten
import numpy as np
count = layer.featureCount()
self.setProgressRange(count)
points = []
for f in layer.getFeatures():
geom = f.geometry()
x = geom.asPoint().x()
y = geom.asPoint().y()
point = []
point.append(x)
point.append(y)
points.append(point)
self.updateProgress()
distances = {0:'euclidean', 1:'cityblock', 2:'hamming'}
disMat = sch.distance.pdist(points, distances.get(distance))#'euclidean''cityblock''hamming''cosine'
Z=sch.linkage(disMat,method='average')
P=sch.dendrogram(Z)
cluster= sch.fcluster(Z, t=1, criterion='inconsistent')
data=whiten(points)
centroid=kmeans(data, number)[0]
label=vq(data, centroid)[0]
return centroid, label
开发者ID:GerrardYNWA,项目名称:KmeansClustering,代码行数:28,代码来源:kmeans_dialog.py
示例7: sparse_run
def sparse_run(g, pos1):
g2 = sparse_graph(g)
# pos1 = nx.spring_layout(g)
pos2 = nx.spring_layout(g2)
features = []
for u in g2.nodes_iter():
# print type(u)
# print u
# print pos[u]
features.append(pos2[u])
print "featurs:", len(features)
features = ny.array(features)
method = 2
if method == 1:
whitened = whiten(features)
book = ny.array((whitened[0],whitened[2]))
km = kmeans(whitened, book)
print km
elif method == 2:
n_digits = 4
km = KMeans(init='k-means++', n_clusters=n_digits, n_init=10)
res = km.fit(features)
print len(km.labels_), km.labels_
print res
return km.labels_, g2
开发者ID:WeiliangXing,项目名称:Facebook-Data-Mining,代码行数:30,代码来源:analyze.py
示例8: bow
def bow(images,codebook,clusters):
out = images
temp = []
print "-"*60
print "Creating the pseudo database."
for im in images:
c = Counter()
bag,dist = vq(whiten(im[1]),codebook)
for word in bag:
c[word]+=1
#Creating histograms
for i in range(clusters):
if i in c.iterkeys():
c[i] = c[i]/sum(c.values())
if i not in c.iterkeys():
c[i] = 0
temp.append(c)
for i in range(len(temp)):
out[i].append(temp[i])
print "Done.\n"
return out
开发者ID:MariaBarrett,项目名称:VIPExam2,代码行数:27,代码来源:main.py
示例9: kmeans
def kmeans(d, headers, K, metric, whiten=True, categories=None):
'''Takes in a Data object, a set of headers, and the number of clusters to create
Computes and returns the codebook, codes and representation errors.
If given an Nx1 matrix of categories, it uses the category labels
to calculate the initial cluster means.
'''
# assign to A the result getting the data given the headers
try:
A = d.get_data(headers)
except AttributeError:
A = d
if whiten:
W = vq.whiten(A)
else:
W = A
codebook = kmeans_init(W, K, categories)
# assign to codebook, codes, errors, the result of calling kmeans_algorithm with W and codebook
codebook, codes, errors = kmeans_algorithm(W, codebook, metric)
# return the codebook, codes, and representation error
return codebook, codes, errors
开发者ID:ymohanty,项目名称:data-analysis,代码行数:25,代码来源:analysis.py
示例10: normalize
def normalize(data,mode="pca",n=10):
""" normalize and reduce data by PCA"""
if mode == "whiten":
res = whiten(data)
elif mode == "pca":
v,P,res = pca_train(data,n,0,1)
print v
print "eigen ratio is ",v[n-1] / v[0]
elif mode == "pca_whiten":
v,P,proj = pca_train(data,n,0,1)
res = whiten(proj)
else:
res = np.array(data)
return res
开发者ID:lucidfrontier45,项目名称:PyNumPDB,代码行数:16,代码来源:clustering.py
示例11: kmeans
def kmeans(dataset, n_cluster = 625):
from scipy.cluster.vq import kmeans2, whiten
feature_matrix = numpy.asarray(dataset)
whitened = whiten(feature_matrix)
cluster_num = 625
_, cluster_labels = kmeans2(whitened, cluster_num, iter = 100)
return cluster_labels
开发者ID:persistforever,项目名称:sentenceEmbedding,代码行数:7,代码来源:cluster.py
示例12: clustering_scipy_kmeans
def clustering_scipy_kmeans(features, n_clust = 8):
"""
"""
whitened = whiten(features)
print whitened.shape
initial = [kmeans(whitened,i) for i in np.arange(1,12)]
plt.plot([var for (cent,var) in initial])
plt.show()
#cent, var = initial[3]
##use vq() to get as assignment for each obs.
#assignment,cdist = vq(whitened,cent)
#plt.scatter(whitened[:,0], whitened[:,1], c=assignment)
#plt.show()
codebook, distortion = kmeans(whitened, n_clust)
print codebook, distortion
assigned_label, dist = vq(whitened, codebook)
for ii in range(8):
plt.subplot(4,2,ii+1)
plt.plot(codebook[ii])
plt.show()
centroid, label = kmeans2(whitened, n_clust, minit = 'points')
print centroid, label
for ii in range(8):
plt.subplot(4,2,ii)
plt.plot(centroid[ii])
plt.show()
开发者ID:kaustuvkanti,项目名称:Experiments,代码行数:30,代码来源:dump_transition_for_clustering.py
示例13: _get_cluster
def _get_cluster(self, feat_array, k):
# Normalise the feature array
whitened = whiten(feat_array)
codebook, _ = kmeans(whitened, k, iter=self.iter)
code, _ = vq(whitened, codebook)
return code
开发者ID:realstraw,项目名称:abathur,代码行数:7,代码来源:cluster.py
示例14: do_cluster
def do_cluster(cluster_count, filename):
"""Use the scipy k-means clustering algorithms to cluster data.
Return the item names for the smallest cluster.
"""
input = Data(filename, -1)
d = vq.whiten(input.data.transpose())
codebook, avg_distortion = vq.kmeans(d, cluster_count, 150)
codes, distortions = vq.vq(d, codebook)
# codes is now a vector of cluster assignments
# it is ordered the same as data elements in input
c_sizes = {}
small_i = 0
if DEBUG: print "Cluster Sizes: ",
for i in range(cluster_count):
c_sizes[i] = count(codes, i)
if DEBUG: print c_sizes[i],
if DEBUG: print
for i in range(cluster_count):
if c_sizes[i] < c_sizes[small_i]:
small_i = i
if DEBUG: print "Smallest cluster size: " + str(c_sizes[small_i])
return [input._names[i] for i in findall(codes, small_i)]
开发者ID:jrbl,项目名称:wikilytics,代码行数:27,代码来源:jkm.py
示例15: cluster
def cluster(self, graph):
"""
Take a graph and cluster using the method in "On spectral clusering: analysis
and algorithm" by Ng et al., 2001.
:param graph: the graph to cluster
:type graph: :class:`apgl.graph.AbstractMatrixGraph`
:returns: An array of size graph.getNumVertices() of cluster membership
"""
L = graph.normalisedLaplacianSym()
omega, Q = numpy.linalg.eig(L)
inds = numpy.argsort(omega)
#First normalise rows, then columns
standardiser = Standardiser()
V = standardiser.normaliseArray(Q[:, inds[0:self.k]].T).T
V = vq.whiten(V)
#Using kmeans2 here seems to result in a high variance
#in the quality of clustering. Therefore stick to kmeans
centroids, clusters = vq.kmeans(V, self.k, iter=self.numIterKmeans)
clusters, distortion = vq.vq(V, centroids)
return clusters
开发者ID:charanpald,项目名称:sandbox,代码行数:25,代码来源:SpectralClusterer.py
示例16: recognize
def recognize(wavfn):
samplerate, w = wavfile.read(open(wavfn))
mfcc = run_mfcc(samplerate, w, FRAME_SIZE, STEP, NUM_COEFFICIENTS)
sample_length = mfcc.shape[0]
whitened = vq.whiten(mfcc)
def getfile(x):
return os.path.join(DATADIR, x)
sq_sum_candidates = []
cos_sim_candidates = []
for dirname in os.listdir(DATADIR):
codebook_fn = os.path.join(DATADIR, dirname, CODEBOOK_FN)
if not os.path.isfile(codebook_fn):
continue
codebook, dist_1 = numpy.load(open(codebook_fn, 'rb'))
code, dist = vq.vq(whitened, codebook)
sq_sum_candidates.append((sum(dist*dist)/sample_length, dirname))
cos_dist = []
for c, d, w in zip(code, dist, whitened):
cdist = cosine_distance(codebook[c], w)
cos_dist.append(cdist)
cdista = numpy.array(cos_dist)
cos_sim_candidates.append((sum(cdista)/sample_length, dirname))
#print 'Order by square-sum error ascending:'
#for score, person in sorted(sq_sum_candidates):
#print '\t', score, person
print 'Cosine similarity'
for score, person in sorted(cos_sim_candidates, reverse=True):
print '\t', score, person
开发者ID:zejn,项目名称:govorec,代码行数:32,代码来源:govorec.py
示例17: perform
def perform(self):
print "Start KMeans"
data = whiten(self.seeds)#normalizando os dados
self.centro,self.sens = kmeans(data,self.k)
self.matrix,_ = vq(data,self.centro)
self.resp = self.centro[self.matrix]
print "Sensibilidade: "+str(self.sens)
开发者ID:tuka04,项目名称:mc906,代码行数:7,代码来源:cluster.py
示例18: process_options
def process_options(args):
options = argparser().parse_args(args)
if options.max_rank is not None and options.max_rank < 1:
raise ValueError('max-rank must be >= 1')
if options.threshold is not None and options.threshold < 0.0:
raise ValueError('threshold must be >= 0')
if options.tolerance is not None and options.tolerance < 0.0:
raise ValueError('tolerance must be >= 0')
if options.approximate and not options.threshold:
raise ValueError('approximate only makes sense with a threshold')
if options.approximate and options.metric != 'cosine':
raise NotImplementedError('approximate only supported for cosine')
wv = wvlib.load(options.vectors[0], max_rank=options.max_rank)
if options.normalize:
logging.info('normalize vectors to unit length')
wv.normalize()
words, vectors = wv.words(), wv.vectors()
if options.whiten:
# whitening should be implemented in wvlib to support together with
# approximate similarity
if options.approximate:
raise NotImplemenedError
logging.info('normalize features to unit variance')
vectors = whiten(vectors)
return words, vectors, wv, options
开发者ID:billy322,项目名称:BioNLP-2016,代码行数:31,代码来源:pairdist.py
示例19: _get_jump
def _get_jump(feat_array, max_cluster):
if max_cluster < 2:
max_cluster = self._determine_max_k(feat_array)
whitened = whiten(feat_array)
# first obtain the covariance matrix of the feature array
gamma = np.cov(whitened.T)
num_dim = whitened.shape[1]
jump = {}
distortions_dict = {0: 1}
power_fact = -num_dim / 2.0
# Run k mean for all possible number of clusters
for k in xrange(1, max_cluster + 1):
codebook, _ = kmeans(whitened, k, iter=self.iter)
code, _ = vq(whitened, codebook)
clusters_dict = self._segment_to_clusters(whitened, code)
mahalanobis_dist_list = []
for cid, cvals in clusters_dict.iteritems():
centroid = codebook[cid]
cluster_mahalanobis_dist = map(
lambda x: self._sq_mahalanobis(x, centroid, gamma),
clusters_dict[cid].values)
mahalanobis_dist_list.extend(cluster_mahalanobis_dist)
this_distortion = np.mean(mahalanobis_dist_list) / num_dim
distortions_dict[k] = this_distortion ** power_fact
for k in xrange(1, max_cluster + 1):
jump[k] = distortions_dict[k] - distortions_dict[k - 1]
return jump
开发者ID:realstraw,项目名称:abathur,代码行数:30,代码来源:cluster.py
示例20: new_labelled_page
def new_labelled_page(no_of_samples:int, window_size:int, page_scale:int or tuple, labelled_centroids:[tuple], page_paths:[str]):
### Duplication from above
weighter = gaussian_weighter(window_size)
windower = f.partial(win_centred_on, window=window_size)
shifter = f.partial(point_shift, window=window_size)
scaler = img_scaler(page_scale)
make_observations = compose(prepare_features, real_fft, weighter, std_dev_contrast_stretch)
img, label = open_image_label(*page_paths)
img, label = scaler(img, label)
f_img = prepare_fft_image(img, window_size)
access_img = img_accessor(img, identity)
access_label = img_accessor(label, identity)
access_f_img = img_accessor(f_img, compose(windower, shifter))
### End of duplication
labels = [a[0] for a in labelled_centroids]
centroids = np.asarray([a[1] for a in labelled_centroids])
new_label = np.zeros_like(label)
for s in img_slices(new_label.shape, 80):
unlabelled_samples = sample_all_in_area(s, applier(identity, compose(make_observations, access_f_img)))
coords = [a[0] for a in unlabelled_samples]
observations = vq.whiten(np.asarray([a[1] for a in unlabelled_samples]))
codes, dist = vq.vq(observations, centroids)
for i, code in zip(coords, codes):
new_label[i] = labels[code]
return new_label
开发者ID:fmcc,项目名称:mss_layout_analysis,代码行数:25,代码来源:G_run.py
注:本文中的scipy.cluster.vq.whiten函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论