• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python cluster.affinity_propagation函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.cluster.affinity_propagation函数的典型用法代码示例。如果您正苦于以下问题:Python affinity_propagation函数的具体用法?Python affinity_propagation怎么用?Python affinity_propagation使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了affinity_propagation函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: learn_number_clusters

def learn_number_clusters(ds, method='ap'):
    if method=='ap':
        from sklearn import cluster, covariance
#         _, labels = cluster.affinity_propagation(ds.M)
#         return labels
#     elif method=='ap2':
        edge_model = covariance.GraphLassoCV(verbose=True)
        # standardize the time series: using correlations rather than covariance
        # is more efficient for structure recovery
        X = ds.M.values.copy().T
        X /= X.std(axis=0)
        print '--- B'
        edge_model.fit(X)
        _, labels = cluster.affinity_propagation(edge_model.covariance_)
        return labels

    elif method=='rbm':
        from sklearn import neural_network
        model = neural_network.BernoulliRBM(n_components=100,
                                            #random_state=0, 
                                            #n_iter=npasses,
                                            verbose=True
                                            )
        X = model.fit_transform(ds.M)
        return X
开发者ID:jjsnlee,项目名称:Kaggle_LearningSocialNetworks,代码行数:25,代码来源:kaggle_work.py


示例2: cluster_keywords

def cluster_keywords(data,min_size=2, cluster_preference=True, verbose=True):
    start_cluster_idx = 0
    clusters = np.zeros(len(data['labels']), dtype=int)

    # create graph
    label_co_occ = data['co_occ']
    g = nx.from_numpy_matrix(label_co_occ)

    if(cluster_preference):
        # define preferences
        net_sizes = np.array([np.sqrt(1+m) for m in data['label_expenses']])
        M = np.percentile(label_co_occ,75.)
        m = np.min(label_co_occ)
        sizeM = np.percentile(net_sizes,75.)
        preference = m + np.asarray([min(s,sizeM) for s in net_sizes])*((M-m)/sizeM)

    for comp in sorted(nx.connected_components(g), key=len, reverse=True):
        l = len(comp)
        if(l >= min_size):
            temp_co_occ = (label_co_occ[:,comp])[comp,:]
            if(cluster_preference):
                [n_clusters, temp_clusters] = affinity_propagation(temp_co_occ,
                                                                    preference=preference[comp],
                                                                    max_iter=500,
                                                                    convergence_iter=40)
            else:
                [n_clusters, temp_clusters] = affinity_propagation(temp_co_occ,
                                                                    max_iter=500,
                                                                    convergence_iter=40)
            for i in xrange(l):
                clusters[comp[i]] = temp_clusters[i] + start_cluster_idx
            start_cluster_idx += len(n_clusters)
            if(verbose):
                print('Found component of size ' + str(l) + ' and added ' \
                        + str(len(n_clusters)) + ' clusters')
        else:
            # only one cluster for this component
            if(verbose):
                print('Found component of size ' + str(l) + ' so do not run affinity_propagation')
            for n in comp:
                clusters[n] = start_cluster_idx            
            start_cluster_idx += 1

    return g, clusters
开发者ID:sds-dubois,项目名称:ExpenseAnalyzer,代码行数:44,代码来源:graph_tools.py


示例3: clust

def clust(vectorfile,matrixfile,clusted):

    fid2fname = {}
    for line in open(vectorfile) :
        line = line.strip().split('\t')
        fid2fname.setdefault(int(line[0]), line[1:])

    N = len(fid2fname)
    rowlist = []
    collist = []
    datalist = []
    for line in open(matrixfile) :
        line = line.strip().split('\t')
        if len(line) < 3 : continue
        f1, f2, sim = line[:3]
        rowlist.append(int(f1))
        collist.append(int(f2))
        datalist.append(float(sim))

    for id in fid2fname :
        rowlist.append(int(id))
        collist.append(int(id))
        datalist.append(1.0)

    row = np.array(rowlist)
    col = np.array(collist)
    data = np.array(datalist)
    graph = coo_matrix((data, (row, col)), shape=(N, N))

    ###############################################################################

    # Force the solver to be arpack, since amg is numerically
    # unstable on this example
    # labels = spectral_clustering(graph, n_clusters=160, eigen_solver='arpack')

    _, labels = cluster.affinity_propagation(graph)
    n_labels = labels.max()
    print ("nlabels:",n_labels)

    # for i in range(n_labels + 1):
    #     print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))

    cluster2fid = {}
    for index, lab in enumerate(labels) :
        cluster2fid.setdefault(lab, [])
        cluster2fid[lab].append(index)

    normal_data = open("normal-data.txt", 'w')
    easy_data=open("easy-data-500.txt", 'w')
    for index, lab in enumerate(cluster2fid) :
        for fid in cluster2fid[lab] :
            strx=""
            for i in range(0, len(fid2fname[fid])):
                strx+=str(fid2fname[fid][i])+"\t"
            print >> normal_data,strx+'\t'+str(index)
            print >> easy_data,strx+'\t'+str(fid)+'\t'+str(index)
开发者ID:DvHuang,项目名称:Wifi_Clust,代码行数:56,代码来源:b.py


示例4: affinityprop

def affinityprop(correlations,names):
    n_clusters=0
    a,labels = cluster.affinity_propagation(correlations)
    #print labels
    print "Affinity Propagation Clusters"
    for i in range(labels.max()+1):
        print 'Cluster %i: %s' % ((i+1),
                              ', '.join(names[labels==i]))
        if len(names[labels==i]) > 1:
            n_clusters+=1
    print "Number of Clusters with more than 1 element: " + str(n_clusters)
    return n_clusters
开发者ID:winteram,项目名称:FashionPins,代码行数:12,代码来源:spectralcluster.py


示例5: test_affinity_propagation

    def test_affinity_propagation(self):
        iris = datasets.load_iris()
        similality = np.cov(iris.data)
        df = pdml.ModelFrame(similality)

        result = df.cluster.affinity_propagation()
        expected = cluster.affinity_propagation(similality)

        self.assertEqual(len(result), 2)
        self.assert_numpy_array_almost_equal(result[0], expected[0])

        self.assertTrue(isinstance(result[1], pdml.ModelSeries))
        self.assert_index_equal(result[1].index, df.index)
        self.assert_numpy_array_equal(result[1].values, expected[1])
开发者ID:Sandy4321,项目名称:pandas-ml,代码行数:14,代码来源:test_cluster.py


示例6: discover_clusters

def discover_clusters(var):
    from sklearn import cluster, covariance
    # Learn a graphical structure from the correlations
    edge_model = covariance.GraphLassoCV()
    edge_model.fit(var)
    
    # Cluster using affinity propagation
    _, labels = cluster.affinity_propagation(edge_model.covariance_)
    n_labels = labels.max()
    for i in xrange(n_labels + 1):
        print 'Cluster %i: %s' % (i, \
            ', '.join(var.columns[labels == i]))
    del cluster, covariance
    
    return labels, edge_model.precision_.copy()
开发者ID:xiahongze,项目名称:au_finance,代码行数:15,代码来源:market_structure.py


示例7: makeClusters

 def makeClusters(self, toks, cnts, rootName = 'ROOT',saveSimMat=None):
     '''
     '''
     S = self.w2v.getSimMat(toks)
     
     if(saveSimMat):
         file_iter=open(saveSimMat,"wb")
         logger=file_iter.write
         logger("word|%s\n"%("|".join(toks)))
         for ind,tok in enumerate(toks): 
             list_toks=[str(round(k,3)) for k in S[ind]]
             str_join="|".join((tok,"|".join((list_toks))))
             logger("%s\n"%str_join)
         file_iter.close()
     
     ctable = []
     n = len(toks)
     x = range(n)
     ntoks = np.array(toks)
     ncnts = np.array(cnts, dtype='float')
     ncnts = ncnts/ncnts.sum()    
     k = 0
     while (True):
         Sk = S[np.ix_(x,x)]
         ntoks = ntoks[x]
         ncnts = ncnts[x]    
         xk, labels = cluster.affinity_propagation(Sk) 
         n_labels = labels.max()
         for i in xrange(n_labels + 1):
             cidx = labels == i
             ctoks = ntoks[cidx]
             ccnts = ncnts[cidx]
             pidx = ccnts.argsort()[::-1][0]
             cname = ntoks[xk[i]] #cluster center
             clname = ctoks[pidx] #most frequent node in cluster
             #temp = {'LEVEL':k, 'CLUSTER': (i+1), 'CENTER': cname, 'NAME': ' '.join(clname[:-2].split('_NG_')), 'MEMBERS': ctoks}
             temp = {'LEVEL':k, 'CLUSTER': (i+1), 'CENTER': cname, 'NAME': ' '.join(cname[:-2].split('_NG_')), 'MEMBERS': ctoks}
             ctable.append(temp)
         k+=1
         #break
         x = xk
         if len(xk) <= 3:
             break
         
     self.ctable = ctable
     self.G = self.ctable2G_()
             
     return ctable
开发者ID:saurabh-singh-17,项目名称:secgov,代码行数:48,代码来源:mcsa_clusters.py


示例8: cluster_measurement_points

def cluster_measurement_points(m_matrix, m_name, corr_bnd = [0.1,0.9],alg='aff'):
    exemplars_dict = dict()

    if m_matrix.shape[1] == 0:
        return [], exemplars_dict, [], []

    elif m_matrix.shape[1] == 1:
        exemplars_ = [0]
        labels_= [0]
        exemplars_name = m_name

    else:
        distmat_input = find_norm_dist_matrix(m_matrix)

        # Find representative set of sensor measurements 
        min_dist_ = np.sqrt(2*(1-(corr_bnd[1])))
        max_dist_ = np.sqrt(2*(1-(corr_bnd[0])))

        if alg == 'pack':
            log.info('use pack clustering algoirthm')
            exemplars_, labels_ = max_pack_cluster(distmat_input, min_dist=min_dist_, max_dist=max_dist_)
        else:
            log.info('use affinity clustering algoirthm')
            SIMM_MAT = 2 - distmat_input
            exemplars_, labels_ = cluster.affinity_propagation(SIMM_MAT, damping=0.5)

        num_clusters = int(labels_.max()+1)
        log.info('-' * 40)
        log.info(str(num_clusters) + 'clusters out of ' + str(len(labels_)) + 'measurements')
        log.info('-' * 40)

        validity, intra_dist, inter_dist = compute_cluster_err(distmat_input, labels_)

        log.info('validity: ' + str(round(validity,2)) + ', intra_dist: ' +
                 str(np.round(intra_dist,2)) + ', inter_dist: ' +
                 str(np.round(inter_dist,2)))
        log.info('-' * 40)
        exemplars_name = list(np.array(m_name)[exemplars_])
    
    for label_id, (m_idx,exemplar_label) in enumerate(zip(exemplars_, exemplars_name)):
        log.info(str(exemplar_label))
        children_set = list(set(np.nonzero(labels_ == label_id)[0]) - set([m_idx]))
        log.info('Label ' + str(label_id) + ' : ' + str(m_idx) + '<--' + str(children_set) )
        exemplars_dict.update({exemplar_label : list(np.array(m_name)[children_set])})

    return m_matrix[:, exemplars_], exemplars_dict, exemplars_, labels_
开发者ID:TinyOS-Camp,项目名称:DDEA-DEV,代码行数:46,代码来源:pack_cluster.py


示例9: corr_cluster

def corr_cluster(corr_file, matrix_file, cluster_dir):
    corr_frame = pa.DataFrame.from_csv(corr_file).abs()
    #use the abs value of corr to cluster
    freq_matrix = pa.DataFrame.from_csv(matrix_file)

    _, labels = cluster.affinity_propagation(corr_frame)
    n_label = labels.max()
    names = corr_frame.index
    #output cluster file
    if corr_file[-1] == "/":
        cluster_file = corr_file.split("/")[-2] + "_cluster"
    else:
        cluster_file = corr_file.split("/")[-1] + "_cluster"

    cluster_file = codecs.open(os.path.join(cluster_dir, cluster_file),
                               "w",
                               encoding="utf-8")

    for i in range(n_label + 1):
        #compute the average correlation between cluster and out-cluster
        clus = np.array(names[labels == i])
        in_cluster = corr_frame[clus].ix[clus]
        up_index = np.triu_indices(len(clus), 1)
        aver_corr = np.array(in_cluster)[up_index].mean()

        out_clus = np.array(names[labels != i])
        out_cluster = corr_frame[clus].ix[out_clus]
        out_aver_corr = np.array(out_cluster).mean()

        #compute the variance of the entity
        in_aver_var = freq_matrix[clus].var(axis=1).mean()

        obj = {"cluster": map(str, clus), "in_aver_corr": aver_corr,
               "out_aver_corr": out_aver_corr,
               "in_aver_var": in_aver_var}
        cluster_file.write(json.dumps(obj, ensure_ascii=False) + "\n")

    cluster_file.flush()
    cluster_file.close()
开发者ID:Tskatom,项目名称:twitter_finance,代码行数:39,代码来源:corr_cluster.py


示例10: cluster_measurement_points

def cluster_measurement_points(m_matrix,m_name,corr_bnd=[0.1,0.9],alg='aff'):
    exemplars_dict={}    
    if m_matrix.shape[1]==0:
        return [],exemplars_dict,[],[]
    elif m_matrix.shape[1]==1:
        exemplars_=[0]
        labels_=[0]
        exemplars_name=m_name
    else:
        distmat_input=find_norm_dist_matrix(m_matrix)
        # Find representative set of sensor measurements 
        min_dist_=np.sqrt(2*(1-(corr_bnd[1])))
        max_dist_=np.sqrt(2*(1-(corr_bnd[0])))
        if alg=='pack':
            print 'use pack clustering algoirthm'
            exemplars_,labels_=max_pack_cluster(distmat_input,min_dist=min_dist_,max_dist=max_dist_)
        else:
            print 'use affinity clustering algoirthm'
            SIMM_MAT=2-distmat_input
            exemplars_,labels_=cluster.affinity_propagation(SIMM_MAT,damping=0.5)

        
        num_clusters=int(labels_.max()+1)
        print '-------------------------------------------------------------------------'
        print num_clusters, 'clusters out of ', len(labels_), 'measurements'
        print '-------------------------------------------------------------------------'
        validity,intra_dist,inter_dist=compute_cluster_err(distmat_input,labels_)
        print 'validity:',round(validity,2),', intra_dist: ',np.round(intra_dist,2),', inter_dist: ',np.round(inter_dist,2)
        print '-------------------------------------------------------------------------'
        exemplars_name=list(np.array(m_name)[exemplars_])
    
    for label_id,(m_idx,exemplar_label) in enumerate(zip(exemplars_,exemplars_name)):
        print exemplar_label
        children_set=list(set(np.nonzero(labels_==label_id)[0])-set([m_idx]))
        print 'Label ', label_id, ': ',m_idx,'<--', children_set
        exemplars_dict.update({exemplar_label:list(np.array(m_name)[children_set])})
    return m_matrix[:,exemplars_], exemplars_dict,exemplars_,labels_
开发者ID:TinyOS-Camp,项目名称:DDEA-DEV,代码行数:37,代码来源:pack_cluster.py


示例11: affProp

def affProp(instance_path, res_folder, strategy = 2):
	instances = instance_path.rsplit('/', 1)[0] + '/'
	file = instance_path.rsplit('/', 1)[1]
	input_type  = '.' + file.rsplit('.', 1)[1]
	file = file.rsplit('.', 1)[0]
	data, row_names = parse.read(instances + file + input_type)
	print 'Size of data matrix: ', data.shape
	if len(data) <> len(row_names):
		print 'Af prop error: data and row_names have diff. lens', len(data), len(row_names)	
	#save_matrix_fig(data, res_folder, file+'_in')
	sim_matrix = []
	# 	
	try:
		sim_matrix = np.load(res_folder+file+'_sim'+str(strategy)+'.npy')
		print 'Sim matrix %s found.' %(res_folder+file+'_sim'+str(strategy)+'.npy')
	except:
		print 'Sim matrix %s NOT found!!!!' %(res_folder+file+'_sim'+str(strategy)+'.npy')
		sim_matrix = pp.strategy(data, 'sim',strategy)	
		np.save(res_folder+file+'_sim'+str(strategy), sim_matrix)

	old_n_clusters = 0
	old_non_clustered = 0

	# list to save labels from all iterations, so we can later pick the best clustering
	res_from_diff_params = {}
	nr_clusters_from_diff_params = {}
	non_clustered_from_diff_params = {}
	distribution_from_diff_params = {}
	best_iteration = -1
	sec_best_iteration = -1
	n = sim_matrix.shape[0]
	min_non_clusterd = n
	s_min_non_clusterd = n
	max_std_dev = n
	sec_threshold = 0.0001
	n_iterations = 20  		# must be an odd number 

	sim_matrix[sim_matrix == 0] = -1e10
	#min_preferance = 0
	#min_preferance *= np.max(sim_matrix[sim_matrix > 0])
	min_preferance = np.min(sim_matrix[sim_matrix > 0]) -10
	max_preferance = np.median(sim_matrix[sim_matrix > 0])
	print 'min_preferance, ', min_preferance
	print 'max_preferance, ', max_preferance
	
	if min_preferance > max_preferance:
		raise Exception('Something is wrong with preferance setting: %d %d', 
			min_preferance, max_preferance)
	elif min_preferance == max_preferance:
		n_iterations = 1
		pref_list = [min_preferance]
	
	pref_step = (max_preferance-min_preferance) / n_iterations

	# cluster the data with DBSCAN ---------------------------------------------
	for iteration in range(n_iterations):
		
		if iteration == 0:
			preference = min_preferance
		else:
			preference += pref_step
		labels = []
		print '_______________________________________________________'
		print 'Aff. Prop. with preferance =', preference
		
		_, labels = affinity_propagation(sim_matrix, preference=preference)
		n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
		
		num_per_cluster = {}
		for i in range(n_clusters):
			num_per_cluster[i] = 0

		for label in labels:
			for i in range(n_clusters):
				if label == i:
					num_per_cluster[i] += 1; 


		# TODO: criteria for skiping or breaking the loop ---------------------------------------------
		# skip the iteration if the number of clusters is as before
		if iteration == 0:
			old_n_clusters = n_clusters
		#elif n_clusters >= old_n_clusters:
		#	break
		old_n_clusters = n_clusters
		# increase the preferance 
		if n_clusters == 1:
			print 'DEBUG: Aff prop. n_clusters == 1, going to next iteration'
			min_preferance = preference
			max_preferance += (max_preferance - min_preferance) / 2
			pref_step = (max_preferance-min_preferance) / (n_iterations-iteration)
			print 'min = %f, max = %f, step = %f' %(min_preferance, max_preferance, pref_step)
			continue
		# lower the preferance	
		if n_clusters >= 0.1*n:
			print 'DEBUG: Aff prop. n_clusters = %i, TOO HIGH!!!' %n_clusters
			max_preferance = preference
			min_preferance = preference - pref_step
			pref_step = (max_preferance-min_preferance) / (n_iterations-iteration)
			print 'min = %f, max = %f, step = %f' %(min_preferance, max_preferance, pref_step)
#.........这里部分代码省略.........
开发者ID:igor-93,项目名称:strucutreDet,代码行数:101,代码来源:cluster_affPropagation.py


示例12: range

    gl_prec = graph.precision_
    gl_alphas =graph.cv_alphas_
    gl_scores = np.mean(graph.grid_scores, axis=1)

    plt.figure()        
    sns.heatmap(gl_prec)
    
    plt.figure()    
    plt.plot(gl_alphas, gl_scores, marker='o', color='b', lw=2.0, label='GraphLassoCV')
    plt.title("Graph Lasso Alpha Selection")
    plt.xlabel("alpha")
    plt.ylabel("score")
    plt.legend()
    
    #cluster using affinity propagation
    _, labels = cluster.affinity_propagation(gl_cov)
    num_labels = np.max(labels)
    
    for i in range(num_labels+1):
        print("Cluster %i: %s" %((i+1), ', '.join(names[labels==i])))
    
    #find a low dim embedding for visualization
    node_model = manifold.LocallyLinearEmbedding(n_components=2, n_neighbors=6, eigen_solver='dense')
    embedding = node_model.fit_transform(X.T).T
    
    #generate plots
    plt.figure()
    plt.clf()
    ax = plt.axes([0.,0.,1.,1.])
    plt.axis('off')
    
开发者ID:vsmolyakov,项目名称:fin,代码行数:30,代码来源:inv_cov.py


示例13: showCovariances

def showCovariances(names,variation):

    
    ###############################################################################
    # Learn a graphical structure from the correlations
    edge_model = covariance.GraphLassoCV()
    
    # standardize the time series: using correlations rather than covariance
    # is more efficient for structure recovery
    X = variation.copy().T
    X /= X.std(axis=0)
    edge_model.fit(X)
    
    ###############################################################################
    # Cluster using affinity propagation
    
    _, labels = cluster.affinity_propagation(edge_model.covariance_)
    n_labels = labels.max()
    
    for i in range(n_labels + 1):
        print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))
    
    ###############################################################################
    # Find a low-dimension embedding for visualization: find the best position of
    # the nodes (the stocks) on a 2D plane
    
    # We use a dense eigen_solver to achieve reproducibility (arpack is
    # initiated with random vectors that we don't control). In addition, we
    # use a large number of neighbors to capture the large-scale structure.
    node_position_model = manifold.LocallyLinearEmbedding(
        n_components=2, eigen_solver='dense', n_neighbors=6)
    
    embedding = node_position_model.fit_transform(X.T).T
    
    ###############################################################################
    # Visualization
    plt.figure(1, facecolor='w', figsize=(10, 8))
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    plt.axis('off')
    
    # Display a graph of the partial correlations
    partial_correlations = edge_model.precision_.copy()
    d = 1 / np.sqrt(np.diag(partial_correlations))
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]
    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)
    
    # Plot the nodes using the coordinates of our embedding
    plt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels,
                cmap=plt.cm.spectral)
    
    # Plot the edges
    start_idx, end_idx = np.where(non_zero)
    #a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    lc = LineCollection(segments,
                        zorder=0, cmap=plt.cm.hot_r,
                        norm=plt.Normalize(0, .7 * values.max()))
    lc.set_array(values)
    lc.set_linewidths(15 * values)
    ax.add_collection(lc)
    
    # Add a label to each node. The challenge here is that we want to
    # position the labels to avoid overlap with other labels
    for index, (name, label, (x, y)) in enumerate(
            zip(names, labels, embedding.T)):
    
        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .002
        else:
            horizontalalignment = 'right'
            x = x - .002
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .002
        else:
            verticalalignment = 'top'
            y = y - .002
        plt.text(x, y, name, size=10,
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.spectral(label / float(n_labels)),
                           alpha=.6))
    
    plt.xlim(embedding[0].min() - .15 * embedding[0].ptp(),
             embedding[0].max() + .10 * embedding[0].ptp(),)
    plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
             embedding[1].max() + .03 * embedding[1].ptp())
#.........这里部分代码省略.........
开发者ID:ravenshooter,项目名称:BA_Analysis,代码行数:101,代码来源:TimeSeriesAnalysis.py


示例14: dailyStockClusters


#.........这里部分代码省略.........
        except:
            print "Cant find ", ticker
    
    symbols_edit = []
    names_edit = []
    for i, ticker in enumerate( symbols ):
        if True in np.isnan(np.array(qclose[ticker])).tolist():
            print ticker, " nans found, ticker removed"
            del qclose[ticker]
            del qopen[ticker]
        else:
            symbols_edit.append(ticker)
            names_edit.append( names[i] )
    
    # The daily variations of the quotes are what carry most information
    variation = qclose - qopen
    variation[ np.isnan(variation) ] = 0.
    
    
    ###############################################################################
    # Learn a graphical structure from the correlations
    edge_model = covariance.GraphLassoCV()
    
    # standardize the time series: using correlations rather than covariance
    # is more efficient for structure recovery
    X = variation.copy()
    #X = variation.copy().T
    X /= X.std(axis=0)
    edge_model.fit(X)
    
    ###############################################################################
    # Cluster using affinity propagation
    
    _, labels = cluster.affinity_propagation(edge_model.covariance_)
    n_labels = labels.max()
    
    for i in range(n_labels + 1):
        print "Cluster "+str(i)+":"
        for j in range(len(labels)):
            if labels[j] == i:
                print " ... "+names_edit[j]
        #print('Cluster %i: %s' % ((i + 1), ', '.join(names_edit[labels == i])))

    for i in range(n_labels + 1):
        print "Cluster "+str(i)+":"
        for j in range(len(labels)):
            if labels[j] == i:
                print " ... "+names_edit[j]
                
    figure7path = 'Clustered_companyNames.png'  # re-set to name without full path
    figure7_htmlText = "\n<br><h3>Daily stock clustering analyis. Based on one year performance correlations.</h3>\n"
    figure7_htmlText = figure7_htmlText + "\nClustering based on daily variation in Nasdaq 100 quotes.\n"
    figure7_htmlText = figure7_htmlText + '''<br><img src="'''+figure7path+'''" alt="PyTAAA by DonaldPG" width="850" height="500"><br>\n'''

        
    ###############################################################################
    # Find a low-dimension embedding for visualization: find the best position of
    # the nodes (the stocks) on a 2D plane
    
    # We use a dense eigen_solver to achieve reproducibility (arpack is
    # initiated with random vectors that we don't control). In addition, we
    # use a large number of neighbors to capture the large-scale structure.
    node_position_model = manifold.LocallyLinearEmbedding(
        n_components=2, eigen_solver='dense', n_neighbors=6)
    
    embedding = node_position_model.fit_transform(X.T).T
开发者ID:DonaldPG,项目名称:PyTAAA,代码行数:67,代码来源:stock_cluster.py


示例15: clusterSymbol


#.........这里部分代码省略.........
        # for q in quotes2:
        #     npquotesClose.append(q['Close'].values)
        # npquotesOpen = np.array([q['Open'].values for q in quotes2])
        # open2 =  npquotesOpen
        # npquotesClose = np.array([q['Close'].values for q in quotes2])
        # close2 =  npquotesClose
        # print npquotesOpen
        # print npquotesClose
        
        variation = (close2 - open2)
        
        symbol_dict = dict(zip(codearrs,nametitles))

        symbols, names = np.array(symbol_dict.items()).T

        edge_model = covariance.GraphLassoCV()

        # standardize the time series: using correlations rather than covariance
        # is more efficient for structure recovery
        tempX = variation.T
        # print tempX,'tempX len',len(tempX)
        X = variation.copy().T
        # print 'open len',len(open2),'close len',len(close2),'variation len',len(variation),'X len',len(X)
        print 'type open',type(open2),'type close',type(close2),'type variation',type(variation),'type X',type(X)
        print 'shape open',open2.shape,'shape close',close2.shape,'shape variation',variation.shape,'shape X',X.shape

        
        X /= X.std(axis=0)
        edge_model.fit(X)

        # ###############################################################################
        # # Cluster using affinity propagation

        _, labels = cluster.affinity_propagation(edge_model.covariance_)
        n_labels = labels.max()

        # print names
        # print 'type symbols',type(symbols),'type names',type(names)
        # for name in names:
        #     print 'name',name
        # print names[0],names[1],names[2],names[3]
        # print 'lables',labels,'n_labels',n_labels,'type labels',type(labels)

        randomtitles = pd.DataFrame()
        for i in range(n_labels+1):
            # print labels == i
            print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))
            if 1 < len(names[labels==i]) <= 3:
                # print 'random cluster ',np.random.choice(names[labels==i],3)
                tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],1)})
                randomtitles = pd.concat([tmpdf, randomtitles])
            elif 3 < len(names[labels==i]) <= 5:
                tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],2)})
                randomtitles = pd.concat([tmpdf, randomtitles])
            elif 5 < len(names[labels==i]) <= 7:
                tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],4)})
                randomtitles = pd.concat([tmpdf, randomtitles])    
            elif 7 < len(names[labels==i]) :
                tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],5)})
                randomtitles = pd.concat([tmpdf, randomtitles])        
                # print randomtitles

        # for i in range(n_labels + 1):
        #     print 'Cluster '+str(i + 1)+', '+ names[labels == i]
        
        # ###############################################################################
开发者ID:HGboda,项目名称:AlgorithmTrading,代码行数:67,代码来源:cluster_ver2_0.py


示例16: range

    "WMT": "Wal-Mart",
    "WAG": "Walgreen",
    "HD": "Home Depot",
    "GSK": "GlaxoSmithKline",
    "PFE": "Pfizer",
    "SNY": "Sanofi-Aventis",
    "NVS": "Novartis",
    "KMB": "Kimberly-Clark",
    "R": "Ryder",
    "GD": "General Dynamics",
    "RTN": "Raytheon",
    "CVS": "CVS",
    "CAT": "Caterpillar",
    "DD": "DuPont de Nemours",
}

symbols, names = np.array(symbol_dict.items()).T

quotes = [finance.quotes_historical_yahoo(symbol, d1, d2, asobject=True) for symbol in symbols]

# volumes = np.array([q.volume for q in quotes]).astype(np.float)
open = np.array([q.open for q in quotes]).astype(np.float)
close = np.array([q.close for q in quotes]).astype(np.float)
variation = close - open
correlations = np.corrcoef(variation)

_, labels = cluster.affinity_propagation(correlations)

for i in range(labels.max() + 1):
    print "Cluster %i: %s" % ((i + 1), ", ".join(names[labels == i]))
开发者ID:yikuizhai,项目名称:scikit-learn,代码行数:30,代码来源:stock_market.py


示例17: range

            DIST_MAT[i,j]=sqrt(norm(sample1-sample2))
    cov_mat=COV_MAT
    
corr_mat=(np.diag(cov_mat)**(-0.5))*cov_mat*(np.diag(cov_mat)**(-0.5))


################################################################################
# Unsupervised clustering for sensors given the measurement correlation 
# Find only a few represetative sensors out of many sensors
################################################################################
# exemplars are a set of representative signals for each cluster
# Smaller dampding input will generate more clusers, default is 0.5
# 0.5 <= damping <=0.99
################################################################################
#exemplars, labels = cluster.affinity_propagation(cov_mat,damping=0.5)
exemplars, labels = cluster.affinity_propagation(cov_mat)
n_labels = labels.max()

for i in range(n_labels + 1):
    print('Cluster %i: %s' % ((i + 1), ', '.join(input_names[labels == i])))


###############################################################################
# Find a low-dimension embedding for visualization: find the best position of
# the nodes (the stocks) on a 2D plane

# We use a dense eigen_solver to achieve reproducibility (arpack is
# initiated with random vectors that we don't control). In addition, we
# use a large number of neighbors to capture the large-scale structure.
node_position_model = manifold.LocallyLinearEmbedding(
    n_components=2, eigen_solver='dense', n_neighbors=3)
开发者ID:TinyOS-Camp,项目名称:DDEA-DEV,代码行数:31,代码来源:df_data_analysis.py


示例18: CLUSTERING_TEST

def CLUSTERING_TEST(distmat_input,min_corr=0.1,max_corr=0.9):
    ################################################################################
    # Unsupervised clustering for sensors given the normalized euclidian distance
    # of sensor data
    # Find only a few represetative sensors out of many sensors
    ################################################################################
    # exemplars are a set of representative signals for each cluster
    # Smaller dampding input will generate more clusers, default is 0.5
    # 0.5 <= damping <=0.99
    ################################################################################
    print '==========================================================='
    print 'Clustering Test'
    print '==========================================================='
    print 'Pack Clustering'
    print '---------------------------'
    min_dist_=np.sqrt(2*(1-(max_corr)))
    max_dist_=np.sqrt(2*(1-(min_corr)))
    pack_exemplars,pack_labels=max_pack_cluster(distmat_input,min_dist=min_dist_,max_dist=max_dist_)
    pack_num_clusters=int(pack_labels.max()+1)
    print '-------------------------------------------------------------------------'
    print pack_num_clusters, 'clusters out of ', len(pack_labels), 'measurements'
    print '-------------------------------------------------------------------------'
    validity,intra_dist,inter_dist=compute_cluster_err(distmat_input,pack_labels)
    print 'validity:',round(validity,2),', intra_dist: ',np.round(intra_dist,2),', inter_dist: ',np.round(inter_dist,2)
    print '-------------------------------------------------------------------------'
    
    
    max_num_clusters=pack_num_clusters   
    print 'Heirachical Clustering'
    print '---------------------------'
    ward_validity_log=[];
    ward_intra_dist_log=[];
    ward_inter_dist_log=[];
    ward_num_clusters_log=[]
    for k in range(2,max_num_clusters+1):
        start_time = time.time()
        ward = Ward(n_clusters=k).fit(distmat_input.T)
        exec_time=time.time() - start_time
        print exec_time, ' secs'
        ward_labels=ward.labels_
        ward_validity,ward_intra_dist,ward_inter_dist=compute_cluster_err(distmat_input,ward_labels)
        ward_num_clusters=int(ward_labels.max()+1)
        ward_validity_log.append(ward_validity);
        ward_intra_dist_log.append(list(ward_intra_dist));
        ward_inter_dist_log.append(list(ward_inter_dist));
        ward_num_clusters_log.append(ward_num_clusters)
    ward_intra_dist_log=np.array(ward_intra_dist_log);
    ward_inter_dist_log=np.array(ward_inter_dist_log)
    
    

    print 'K-Mean Clustering'
    print '---------------------------'
    kmean_validity_log=[];
    kmean_intra_dist_log=[];
    kmean_inter_dist_log=[];
    kmean_num_clusters_log=[]
    for k in range(2,max_num_clusters+1):
        start_time = time.time()
        kmean=KMeans(n_clusters=k).fit(distmat_input.T)
        exec_time=time.time() - start_time
        print exec_time, ' secs'
        kmean_labels=kmean.labels_
        kmean_validity,kmean_intra_dist,kmean_inter_dist=compute_cluster_err(distmat_input,kmean_labels)
        kmean_num_clusters=int(kmean_labels.max()+1)
        kmean_validity_log.append(kmean_validity);
        kmean_intra_dist_log.append(list(kmean_intra_dist));
        kmean_inter_dist_log.append(list(kmean_inter_dist));
        kmean_num_clusters_log.append(kmean_num_clusters)

    kmean_intra_dist_log=np.array(kmean_intra_dist_log);
    kmean_inter_dist_log=np.array(kmean_inter_dist_log)
    
    
    
    print 'Affinity Clustering'
    print '---------------------------'
    SIMM_MAT=2-distmat_input
    start_time = time.time()
    aff_exemplars, aff_labels = cluster.affinity_propagation(SIMM_MAT,damping=0.5)
    exec_time=time.time() - start_time
    print exec_time, ' secs'
    aff_num_clusters=int(aff_labels.max()+1)
    aff_validity,aff_intra_dist,aff_inter_dist=compute_cluster_err(distmat_input,aff_labels)
    
    
    fig = plt.figure('Intra_dist')
    fig.suptitle('Intra_dist')
    plot(pack_num_clusters,intra_dist[0],'s',label='pack')
    plot(pack_num_clusters,intra_dist[1],'s',label='pack')
    plot(pack_num_clusters,intra_dist[2],'s',label='pack')
    plot(ward_num_clusters_log,ward_intra_dist_log[:,0],'-+',label='ward')
    plot(ward_num_clusters_log,ward_intra_dist_log[:,1],'-+',label='ward')
    plot(ward_num_clusters_log,ward_intra_dist_log[:,2],'-+',label='ward')
    plot(kmean_num_clusters_log,kmean_intra_dist_log[:,0],'-v',label='kmean')
    plot(kmean_num_clusters_log,kmean_intra_dist_log[:,1],'-v',label='kmean')
    plot(kmean_num_clusters_log,kmean_intra_dist_log[:,2],'-v',label='kmean')
    plot(aff_num_clusters,aff_intra_dist[0],'*',label='aff')
    plot(aff_num_clusters,aff_intra_dist[1],'*',label='aff')
    plot(aff_num_clusters,aff_intra_dist[2],'*',label='aff')
#.........这里部分代码省略.........
开发者ID:TinyOS-Camp,项目名称:DDEA-DEV,代码行数:101,代码来源:pack_cluster.py


示例19: getStockMarketStructure

def getStockMarketStructure(symbol_dict):
 	
# Choose a time period reasonnably calm (not too long ago so that we get
# high-tech firms, and before the 2008 crash)
	d1 = datetime.datetime(2009, 1, 1)
	d2 = datetime.datetime(2011, 1, 1)
#d1 = datetime.datetime.now() - timedelta(days=365*2)
#d2 = datetime.datetime.now()- timedelta(days=1)
# kraft symbol has now changed from KFT to MDLZ in yahoo
        symbols, names = np.array(list(symbol_dict.items())).T

        quotes = [finance.quotes_historical_yahoo(symbol, d1, d2, asobject=True)
          for symbol in symbols]

        open = np.array([q.open for q in quotes]).astype(np.float)
        close = np.array([q.close for q in quotes]).astype(np.float)

# The daily variations of the quotes are what carry most information
        variation = close - open

###############################################################################
# Learn a graphical structure from the correlations
        edge_model = covariance.GraphLassoCV()

# standardize the time series: using correlations rather than covariance
# is more efficient for structure recovery
        X = variation.copy().T
        X /= X.std(axis=0)
        edge_model.fit(X)

###############################################################################
# Cluster using affinity propagation

        _, labels = cluster.affinity_propagation(edge_model.covariance_)
        n_labels = labels.max()

        for i in range(n_labels + 1):
            print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))

###############################################################################
# Find a low-dimension 

鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python cluster.estimate_bandwidth函数代码示例发布时间:2022-05-27
下一篇:
Python calibration.CalibratedClassifierCV类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap