• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python vq.whiten函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中scipy.cluster.vq.whiten函数的典型用法代码示例。如果您正苦于以下问题:Python whiten函数的具体用法?Python whiten怎么用?Python whiten使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了whiten函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: sent_integrate

def sent_integrate(sim_matrix,n_class):
    # 次元ごとの分散を均一にする
    whiten(sim_matrix)

    centroid, destortion = kmeans(sim_matrix, n_class, iter=100, thresh=1e-05)
    labels, dist = vq(sim_matrix, centroid)
    return labels
开发者ID:NuitNoir,项目名称:MachineLearning,代码行数:7,代码来源:qiita_doc_sim.py


示例2: parse

def parse(data_file_name, predict_index, ignore_indices, **options):
	data_file = open(data_file_name, 'r')
	lines = data_file.read().splitlines()
	x = []
	y = []
	for i, line in enumerate(lines):
		if i == 0 or i == 1:
			continue
		datas = line.split()
		x_category = []
		for i, data in enumerate(datas):
			if ignore_indices.has_key(i):
				continue
			if i == predict_index:
				if data == 'T':
					y.append(1.0)
				elif data == 'F':
					y.append(0.0)
				else:
					y.append(float(data))
				continue
			x_category.append(float(data))
		x.append(x_category)
	x = whiten(np.array(x)) if options.get('whiten_x') else np.array(x)
	y = whiten(np.array(y)) if options.get('whiten_y') else np.array(y)
	x = x - x.mean() if options.get('mean_center_x') else x
	y = y - y.mean() if options.get('mean_center_y') else y
	return (x, y)
开发者ID:AustinStoneProjects,项目名称:statsHw2,代码行数:28,代码来源:regression.py


示例3: clust_scatter

def clust_scatter(samples, clusters, allocation_table, n):

    c = len(allocation_table[0])  # Columns
    r = len(allocation_table)  # Rows

    time_scat_square = 0
    mat_scatter = 0

    for j in range(0, c):  # clusters
        for t in range(0, 10):  # maturities
            for p in range(0, r):  # samples within a cluster
                index = allocation_table[p, j]
                if index != 0:
                    time_scat_square += samples.samples[index-1].scatter_maturity[t].scatter
            mat_scatter += time_scat_square**2
            time_scat_square = 0
        clusters.clusters[j].scatter = np.sqrt(mat_scatter - 10 * clusters.clusters[j].mean**2)
        mat_scatter = 0
        if n == 0 or n == 4999:
            print('clust scatter : ' + str(clusters.clusters[j].scatter))

    # Normalize clusters' scatter
    vec = np.zeros(4)
    for j in range(0, c):
        vec[j] = clusters.clusters[j].scatter

    whiten(vec)
    for j in range(0, c):
        clusters.clusters[j].scatter = vec[j]

    return clusters;
开发者ID:ArtMgn,项目名称:k-means-pca,代码行数:31,代码来源:distances_lib.py


示例4: test1

    def test1(self):
        print "TEST 1:----------------------------------------------------------------"
        features = np.array([[1.9, 2.3],
                          [1.5, 2.5],
                          [0.8, 0.6],
                          [0.4, 1.8],
                          [0.1, 0.1],
                          [0.2, 1.8],
                          [2.0, 0.5],
                          [0.3, 1.5],
                          [1.0, 1.0]])
        whitened = whiten(features)
        book = np.array((whitened[0], whitened[2]))
        numpy_result = kmeans(whitened, book)[0]
        print numpy_result
        print ""

        features2 = np.array([[1.9, 2.3,0],
                             [1.5, 2.5,0],
                             [0.8, 0.6,0],
                             [0.4, 1.8,0],
                             [0.1, 0.1,0],
                             [0.2, 1.8,0],
                             [2.0, 0.5,0],
                             [0.3, 1.5,0],
                             [1.0, 1.0,0]])
        whitened2 = whiten(features2)
        book2 = [whitened[0], whitened[2]]
        our_result = np.array(KMeans.k_means2(whitened2.tolist(), 2, book2).centroids)[:, :-1]
        print our_result
开发者ID:jwallp,项目名称:151-Assignments,代码行数:30,代码来源:test_kmeans.py


示例5: LexicalFeatures

def LexicalFeatures():
    """
    Compute feature vectors for word and punctuation features
    """
    num_chapters = len(chapters)
    fvs_lexical = np.zeros((len(chapters), 3), np.float64)
    fvs_punct = np.zeros((len(chapters), 3), np.float64)
    for e, ch_text in enumerate(chapters):
        # note: the nltk.word_tokenize includes punctuation
        tokens = nltk.word_tokenize(ch_text.lower())
        words = word_tokenizer.tokenize(ch_text.lower())
        sentences = sentence_tokenizer.tokenize(ch_text)
        vocab = set(words)
        words_per_sentence = np.array([len(word_tokenizer.tokenize(s))
                                       for s in sentences])

        # average number of words per sentence
        fvs_lexical[e, 0] = words_per_sentence.mean()
        # sentence length variation
        fvs_lexical[e, 1] = words_per_sentence.std()
        # Lexical diversity
        fvs_lexical[e, 2] = len(vocab) / float(len(words))

        # Commas per sentence
        fvs_punct[e, 0] = tokens.count(',') / float(len(sentences))
        # Semicolons per sentence
        fvs_punct[e, 1] = tokens.count(';') / float(len(sentences))
        # Colons per sentence
        fvs_punct[e, 2] = tokens.count(':') / float(len(sentences))

    # apply whitening to decorrelate the features
    fvs_lexical = whiten(fvs_lexical)
    fvs_punct = whiten(fvs_punct)

    return fvs_lexical, fvs_punct
开发者ID:sophie-greene,项目名称:repo,代码行数:35,代码来源:author.py


示例6: kmeansCluster

    def kmeansCluster(self, layer, distance, number):
        import scipy
        import scipy.cluster.hierarchy as sch
        from scipy.cluster.vq import vq,kmeans,whiten
        import numpy as np

        count = layer.featureCount()
        self.setProgressRange(count)
        points = []
        for f in layer.getFeatures():
            geom = f.geometry()
            x = geom.asPoint().x()
            y = geom.asPoint().y()
            point = []
            point.append(x)
            point.append(y)
            points.append(point)
            self.updateProgress()

        distances = {0:'euclidean', 1:'cityblock', 2:'hamming'}
        disMat = sch.distance.pdist(points, distances.get(distance))#'euclidean''cityblock''hamming''cosine' 
        Z=sch.linkage(disMat,method='average') 
        P=sch.dendrogram(Z)
        cluster= sch.fcluster(Z, t=1, criterion='inconsistent')
        data=whiten(points)
        centroid=kmeans(data, number)[0]
        label=vq(data, centroid)[0]
        return centroid, label
开发者ID:GerrardYNWA,项目名称:KmeansClustering,代码行数:28,代码来源:kmeans_dialog.py


示例7: sparse_run

def sparse_run(g, pos1):

    g2 = sparse_graph(g)

    # pos1 = nx.spring_layout(g)
    pos2 = nx.spring_layout(g2)

    features = []
    for u in g2.nodes_iter():
        # print type(u)
        # print u
        # print pos[u]
        features.append(pos2[u])
    print "featurs:", len(features)
    features = ny.array(features)

    method = 2
    if method == 1:
        whitened = whiten(features)
        book = ny.array((whitened[0],whitened[2]))
        km = kmeans(whitened, book)

        print km
    elif method == 2:
        n_digits = 4
        km = KMeans(init='k-means++', n_clusters=n_digits, n_init=10)
        res = km.fit(features)
        print len(km.labels_), km.labels_
        print res
    return km.labels_, g2
开发者ID:WeiliangXing,项目名称:Facebook-Data-Mining,代码行数:30,代码来源:analyze.py


示例8: bow

def bow(images,codebook,clusters):
	out = images
	temp = []

	print "-"*60
	print "Creating the pseudo database."
	for im in images:
		c = Counter()
		bag,dist = vq(whiten(im[1]),codebook)
		
		for word in bag:
			c[word]+=1

		#Creating histograms
		for i in range(clusters):
			if i in c.iterkeys():
				c[i] = c[i]/sum(c.values())
			if i not in c.iterkeys():
				c[i] = 0
		
		temp.append(c)
		
	for i in range(len(temp)):
		out[i].append(temp[i])

	print "Done.\n"
	return out
开发者ID:MariaBarrett,项目名称:VIPExam2,代码行数:27,代码来源:main.py


示例9: kmeans

def kmeans(d, headers, K, metric, whiten=True, categories=None):
    '''Takes in a Data object, a set of headers, and the number of clusters to create
    Computes and returns the codebook, codes and representation errors.
    If given an Nx1 matrix of categories, it uses the category labels
    to calculate the initial cluster means.
    '''

    # assign to A the result getting the data given the headers
    try:
        A = d.get_data(headers)
    except AttributeError:
        A = d

    if whiten:
        W = vq.whiten(A)
    else:
        W = A

    codebook = kmeans_init(W, K, categories)

    # assign to codebook, codes, errors, the result of calling kmeans_algorithm with W and codebook
    codebook, codes, errors = kmeans_algorithm(W, codebook, metric)

    # return the codebook, codes, and representation error
    return codebook, codes, errors
开发者ID:ymohanty,项目名称:data-analysis,代码行数:25,代码来源:analysis.py


示例10: normalize

def normalize(data,mode="pca",n=10):
  """ normalize and reduce data by PCA"""
  
  if mode == "whiten":
    res = whiten(data)
  elif mode == "pca":
    v,P,res = pca_train(data,n,0,1) 
    print v
    print "eigen ratio is ",v[n-1] / v[0] 
  elif mode == "pca_whiten": 
    v,P,proj = pca_train(data,n,0,1) 
    res = whiten(proj)
  else:
    res = np.array(data)    
 
  return res
开发者ID:lucidfrontier45,项目名称:PyNumPDB,代码行数:16,代码来源:clustering.py


示例11: kmeans

def kmeans(dataset, n_cluster = 625):
    from scipy.cluster.vq import kmeans2, whiten
    feature_matrix = numpy.asarray(dataset)
    whitened = whiten(feature_matrix)
    cluster_num = 625
    _, cluster_labels = kmeans2(whitened, cluster_num, iter = 100)
    return cluster_labels
开发者ID:persistforever,项目名称:sentenceEmbedding,代码行数:7,代码来源:cluster.py


示例12: clustering_scipy_kmeans

def clustering_scipy_kmeans(features, n_clust = 8):
  """
  """
  whitened = whiten(features)
  print whitened.shape
  
  initial = [kmeans(whitened,i) for i in np.arange(1,12)]
  plt.plot([var for (cent,var) in initial])
  plt.show()
  
  #cent, var = initial[3]
  ##use vq() to get as assignment for each obs.
  #assignment,cdist = vq(whitened,cent)
  #plt.scatter(whitened[:,0], whitened[:,1], c=assignment)
  #plt.show()
  
  codebook, distortion = kmeans(whitened, n_clust)
  print codebook, distortion
  assigned_label, dist = vq(whitened, codebook)
  for ii in range(8):
    plt.subplot(4,2,ii+1)
    plt.plot(codebook[ii])
  plt.show()
  
  centroid, label = kmeans2(whitened, n_clust, minit = 'points')
  print centroid, label
  for ii in range(8):
    plt.subplot(4,2,ii)
    plt.plot(centroid[ii])
  plt.show()
开发者ID:kaustuvkanti,项目名称:Experiments,代码行数:30,代码来源:dump_transition_for_clustering.py


示例13: _get_cluster

    def _get_cluster(self, feat_array, k):
        # Normalise the feature array
        whitened = whiten(feat_array)

        codebook, _ = kmeans(whitened, k, iter=self.iter)
        code, _ = vq(whitened, codebook)
        return code
开发者ID:realstraw,项目名称:abathur,代码行数:7,代码来源:cluster.py


示例14: do_cluster

def do_cluster(cluster_count, filename):
    """Use the scipy k-means clustering algorithms to cluster data.

    Return the item names for the smallest cluster.
    """
    input = Data(filename, -1)
    d = vq.whiten(input.data.transpose())
    codebook, avg_distortion = vq.kmeans(d, cluster_count, 150)
    codes, distortions = vq.vq(d, codebook)

    # codes is now a vector of cluster assignments
    # it is ordered the same as data elements in input

    c_sizes = {}
    small_i = 0
    if DEBUG: print "Cluster Sizes: ",
    for i in range(cluster_count):
        c_sizes[i] = count(codes, i)
        if DEBUG: print c_sizes[i],
    if DEBUG: print
    for i in range(cluster_count):
        if c_sizes[i] < c_sizes[small_i]: 
            small_i = i

    if DEBUG: print "Smallest cluster size: " + str(c_sizes[small_i])

    return [input._names[i] for i in findall(codes, small_i)]
开发者ID:jrbl,项目名称:wikilytics,代码行数:27,代码来源:jkm.py


示例15: cluster

    def cluster(self, graph):
        """
        Take a graph and cluster using the method in "On spectral clusering: analysis
        and algorithm" by Ng et al., 2001. 

        :param graph: the graph to cluster
        :type graph: :class:`apgl.graph.AbstractMatrixGraph`

        :returns:  An array of size graph.getNumVertices() of cluster membership 
        """
        L = graph.normalisedLaplacianSym()

        omega, Q = numpy.linalg.eig(L)
        inds = numpy.argsort(omega)

        #First normalise rows, then columns
        standardiser = Standardiser()
        V = standardiser.normaliseArray(Q[:, inds[0:self.k]].T).T
        V = vq.whiten(V)
        #Using kmeans2 here seems to result in a high variance
        #in the quality of clustering. Therefore stick to kmeans
        centroids, clusters = vq.kmeans(V, self.k, iter=self.numIterKmeans)
        clusters, distortion = vq.vq(V, centroids)

        return clusters
开发者ID:charanpald,项目名称:sandbox,代码行数:25,代码来源:SpectralClusterer.py


示例16: recognize

def recognize(wavfn):
    samplerate, w = wavfile.read(open(wavfn))
    mfcc = run_mfcc(samplerate, w, FRAME_SIZE, STEP, NUM_COEFFICIENTS)
    sample_length = mfcc.shape[0]
    whitened = vq.whiten(mfcc)
    
    def getfile(x):
        return os.path.join(DATADIR, x)
    
    sq_sum_candidates = []
    cos_sim_candidates = []
    for dirname in os.listdir(DATADIR):
        codebook_fn = os.path.join(DATADIR, dirname, CODEBOOK_FN)
        if not os.path.isfile(codebook_fn):
            continue
        codebook, dist_1 = numpy.load(open(codebook_fn, 'rb'))
        code, dist = vq.vq(whitened, codebook)
        sq_sum_candidates.append((sum(dist*dist)/sample_length, dirname))
        cos_dist = []
        for c, d, w in zip(code, dist, whitened):
            cdist = cosine_distance(codebook[c], w)
            cos_dist.append(cdist)
        cdista = numpy.array(cos_dist)
        cos_sim_candidates.append((sum(cdista)/sample_length, dirname))
    
    #print 'Order by square-sum error ascending:'
    #for score, person in sorted(sq_sum_candidates):
        #print '\t', score, person

    print 'Cosine similarity'
    for score, person in sorted(cos_sim_candidates, reverse=True):
        print '\t', score, person
开发者ID:zejn,项目名称:govorec,代码行数:32,代码来源:govorec.py


示例17: perform

 def perform(self):
     print "Start KMeans"
     data = whiten(self.seeds)#normalizando os dados
     self.centro,self.sens = kmeans(data,self.k)
     self.matrix,_ = vq(data,self.centro)
     self.resp = self.centro[self.matrix]
     print "Sensibilidade: "+str(self.sens)
开发者ID:tuka04,项目名称:mc906,代码行数:7,代码来源:cluster.py


示例18: process_options

def process_options(args):    
    options = argparser().parse_args(args)

    if options.max_rank is not None and options.max_rank < 1:
        raise ValueError('max-rank must be >= 1')
    if options.threshold is not None and options.threshold < 0.0:
        raise ValueError('threshold must be >= 0')
    if options.tolerance is not None and options.tolerance < 0.0:
        raise ValueError('tolerance must be >= 0')
    if options.approximate and not options.threshold:
        raise ValueError('approximate only makes sense with a threshold')
    if options.approximate and options.metric != 'cosine':
        raise NotImplementedError('approximate only supported for cosine')

    wv = wvlib.load(options.vectors[0], max_rank=options.max_rank)

    if options.normalize:
        logging.info('normalize vectors to unit length')
        wv.normalize()

    words, vectors = wv.words(), wv.vectors()

    if options.whiten:
        # whitening should be implemented in wvlib to support together with
        # approximate similarity
        if options.approximate:
            raise NotImplemenedError
        logging.info('normalize features to unit variance')
        vectors = whiten(vectors)

    return words, vectors, wv, options
开发者ID:billy322,项目名称:BioNLP-2016,代码行数:31,代码来源:pairdist.py


示例19: _get_jump

        def _get_jump(feat_array, max_cluster):
            if max_cluster < 2:
                max_cluster = self._determine_max_k(feat_array)
            whitened = whiten(feat_array)
            # first obtain the covariance matrix of the feature array
            gamma = np.cov(whitened.T)
            num_dim = whitened.shape[1]
            jump = {}
            distortions_dict = {0: 1}
            power_fact = -num_dim / 2.0
            # Run k mean for all possible number of clusters
            for k in xrange(1, max_cluster + 1):
                codebook, _ = kmeans(whitened, k, iter=self.iter)
                code, _ = vq(whitened, codebook)

                clusters_dict = self._segment_to_clusters(whitened, code)
                mahalanobis_dist_list = []
                for cid, cvals in clusters_dict.iteritems():
                    centroid = codebook[cid]
                    cluster_mahalanobis_dist = map(
                        lambda x: self._sq_mahalanobis(x, centroid, gamma),
                        clusters_dict[cid].values)
                    mahalanobis_dist_list.extend(cluster_mahalanobis_dist)
                this_distortion = np.mean(mahalanobis_dist_list) / num_dim
                distortions_dict[k] = this_distortion ** power_fact

            for k in xrange(1, max_cluster + 1):
                jump[k] = distortions_dict[k] - distortions_dict[k - 1]

            return jump
开发者ID:realstraw,项目名称:abathur,代码行数:30,代码来源:cluster.py


示例20: new_labelled_page

def new_labelled_page(no_of_samples:int, window_size:int, page_scale:int or tuple, labelled_centroids:[tuple], page_paths:[str]):
    ### Duplication from above
    weighter = gaussian_weighter(window_size)
    windower = f.partial(win_centred_on, window=window_size)
    shifter = f.partial(point_shift, window=window_size)
    scaler = img_scaler(page_scale)
    make_observations = compose(prepare_features, real_fft, weighter, std_dev_contrast_stretch)
    img, label = open_image_label(*page_paths)
    img, label = scaler(img, label)
    f_img = prepare_fft_image(img, window_size)
    access_img = img_accessor(img, identity)
    access_label = img_accessor(label, identity)
    access_f_img = img_accessor(f_img, compose(windower, shifter))
    ### End of duplication
    labels = [a[0] for a in labelled_centroids]
    centroids = np.asarray([a[1] for a in labelled_centroids])
    new_label = np.zeros_like(label)
    for s in img_slices(new_label.shape, 80):
        unlabelled_samples = sample_all_in_area(s, applier(identity, compose(make_observations, access_f_img)))   
        coords = [a[0] for a in unlabelled_samples]
        observations = vq.whiten(np.asarray([a[1] for a in unlabelled_samples]))
        codes, dist = vq.vq(observations, centroids)
        for i, code in zip(coords, codes):
            new_label[i] = labels[code]
    return new_label
开发者ID:fmcc,项目名称:mss_layout_analysis,代码行数:25,代码来源:G_run.py



注:本文中的scipy.cluster.vq.whiten函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python constants.value函数代码示例发布时间:2022-05-27
下一篇:
Python vq.vq函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap