• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python hierarchy.fcluster函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中scipy.cluster.hierarchy.fcluster函数的典型用法代码示例。如果您正苦于以下问题:Python fcluster函数的具体用法?Python fcluster怎么用?Python fcluster使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了fcluster函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: hcluster_cols

	def hcluster_cols(self, thresh):
		try:
			link = linkage(self.X.T, method='complete', metric = 'cosine')
			assignments = fcluster(link, thresh, 'distance')

		except:
			link = linkage(self.X.T, method='complete', metric = 'euclidean')
			assignments = fcluster(link, thresh, 'distance')

		col_ind = np.arange(len(self.crimes))
		d = pd.DataFrame(zip(col_ind, assignments)).groupby(1)[0].aggregate(lambda x: tuple(x))
		df_new = pd.DataFrame(index = np.arange(len(self.names)))
		for i in d:
			cols = []
			for w in i:
			    cols.append(w)
			if len(cols) > 1:
				df_new[str(self.crimes[cols])] = np.mean(self.X[:,cols], axis = 1)
			else:
			    df_new[str(self.crimes[cols[0]])] = self.X[:,cols[0]]

		# plt.figure(figsize=(10,20))
		# dendro = dendrogram(link, color_threshold=thresh, leaf_font_size=13, labels = self.crimes, orientation = 'left')
		# plt.subplots_adjust(top=.99, bottom=0.5, left=0.05, right=0.99)
		# plt.show()

		self.df = df_new
		self.crimes = df_new.columns.values
开发者ID:nhu2000,项目名称:hood_project,代码行数:28,代码来源:pca_class.py


示例2: cluster_fps

    def cluster_fps(self):
        clkg = hcluster.linkage(self.dm,method = 'average') 
        coarse_r = hcluster.fcluster(clkg,0.3,criterion = 'distance')
        self.coarse_r = coarse_r

        bcount = np.bincount(coarse_r)
        knum = len(np.nonzero(bcount > 1)[0])

        s = self.density_matrix.shape
        if False and len(s) >1 and s[0] > 10 and s[1] > 10 and knum < min(s) / 2:
            (u,s,vt) = la.svds(self.sps_matrixs,k = knum)
            self.u = u
            print '============'
        else:
            
            self.result = self.coarse_r
            return (clkg,clkg)
 

#rankA = npla.matrix_rank(self.sps_matrixs)
#        if rankA < 3:
        a = np.matrix(np.diag(s)) * np.matrix(vt)
        pd = dist.pdist(np.array(a.T),'cosine')
        pd[np.abs(pd) < 1e-11] = 0
        lkg = hcluster.linkage(pd,method = 'average')
        self.lkg = lkg

        self.result = hcluster.fcluster(lkg,self.svd_cluster_thr,criterion = 'distance')

#        self.result = hcluster.fcluster(lkg,1)

# self.result = hcluster.fclusterdata(u,0.7,metric = 'cosine', criterion = 'distance',method = 'average')
        return (lkg,clkg)
开发者ID:lrpopeyou,项目名称:cluster_fp_user,代码行数:33,代码来源:cluster_by_grid.py


示例3: elbow

 def elbow(self, no_plot=False):
     """Plot within groups variance vs. number of clusters.
     Elbow criterion could be used to determine number of clusters.
     """
     from scipy.cluster.hierarchy import fcluster
     import matplotlib.pyplot as plt
     idx = fcluster(self.Z, len(self.data), criterion='maxclust')
     nclust = list(np.arange(1, np.sqrt(idx.max() / 2) + 1, dtype=int))
     within_grp_var = []
     mean_var = []
     for n in nclust:
         idx = fcluster(self.Z, n, criterion='maxclust')
         grp = [np.flatnonzero(idx == c) for c in np.unique(idx)]
         # between_grp_var = Group([self.data[ix].R.uv for ix in grp]).var
         var = [100*self.data[ix].var for ix in grp]
         within_grp_var.append(var)
         mean_var.append(np.mean(var))
     if not no_plot:
         plt.boxplot(within_grp_var, positions=nclust)
         plt.plot(nclust, mean_var, 'k')
         plt.xlabel('Number of clusters')
         plt.ylabel('Variance')
         plt.title('Within-groups variance vs. number of clusters')
         plt.show()
     else:
         return nclust, within_grp_var
开发者ID:drcoronel,项目名称:apsg,代码行数:26,代码来源:core.py


示例4: refineEnsemble

def refineEnsemble(ens, lower=.5, upper=10.):
    """Refine a PDB ensemble based on RMSD criterions.""" 

    from scipy.cluster.hierarchy import linkage, fcluster
    from scipy.spatial.distance import squareform
    from collections import Counter

    ### calculate pairwise RMSDs ###
    RMSD = ens.getRMSDs(pairwise=True)

    # convert the RMSD table to the compressed form
    v = squareform(RMSD)

    ### apply upper threshold ###
    Z_upper = linkage(v, method='complete')
    labels = fcluster(Z_upper, upper, criterion='distance')
    most_common_label = Counter(labels).most_common(1)[0][0]
    I = np.where(labels==most_common_label)[0]

    ### apply lower threshold ###
    Z_lower = linkage(v, method='single')
    labels = fcluster(Z_lower, lower, criterion='distance')
    uniq_labels = np.unique(labels)

    clusters = []
    for label in uniq_labels:
        indices = np.where(labels==label)[0]
        clusters.append(indices)

    J = np.ones(len(clusters), dtype=int) * -1
    rmsd = None
    for i, cluster in enumerate(clusters):
        if len(cluster) > 0:
            # find the conformations with the largest coverage 
            # (the weight of the ref should be 1)
            weights = [ens[j].getWeights().sum() for j in cluster]
            js = np.where(weights==np.max(weights))[0]

            # in the case where there are multiple structures with the same weight,
            # the one with the smallest rmsd wrt the ens._coords is selected. 
            if len(js) > 1:
                # rmsd is not calulated unless necessary for the sake of efficiency
                rmsd = ens.getRMSDs() if rmsd is None else rmsd
                j = js[np.argmin(rmsd[js])]
            else:
                j = js[0]
            J[i] = cluster[j]
        else:
            J[i] = cluster[0]

    ### refine ensemble ###
    K = np.intersect1d(I, J)

    reens = ens[K]

    return reens
开发者ID:fongchun,项目名称:ProDy,代码行数:56,代码来源:legacy.py


示例5: cutTree

def cutTree(z, threshold, crit):
    try:
        z = np.clip(z,0,9999999)
        tree = hac.fcluster(z, threshold, criterion = crit)
        return tree
    except ValueError, e:
        print("cutTree: %s" % str(e))
        tree = hac.fcluster(z, 50, criterion = "euclidean")
        print "negative values in matrix"
        return tree
开发者ID:helgejo,项目名称:codelibrary,代码行数:10,代码来源:clustering.py


示例6: process_stay

def process_stay(imei,traj):
#    print imei,'------------------------>',traj.shape
    r = 20
    interval = 60*8
#    wfs = wfs[:1000]
#    traj = traj[:1000]
    if len(traj.shape) < 1 or traj.shape[0] <2:
        return
    x = traj['x']
    y = traj['y']
    in_sample = False
#print x,y
    if sample_range is not None:
        for (cx,cy,cr) in sample_range:
            crange = math.sqrt(math.pow(cx-x[0],2) + math.pow(cy-y[0],2))
            if crange < cr:
                in_sample = True
                break
    #ids = grid_util.get_grid_ids(np.median(x),np.median(y),300,3)
        if not in_sample:
            return
    
    ids = G.get_gridids_with_align(np.median(x),np.median(y))
#
#    print traj
    dm = get_pdist(traj,100,convert_sig = True)
    dm[np.abs(dm) < 1e-3] = 0
#    print dm
#    print dm.shape
#lkg = hcluster.linkage(traj[...,:2],metric = 'euclidean',method = 'average')
#    print dm
#    print dm.shape
    lkg = hcluster.linkage(dm,method = 'average')
    rst = hcluster.fcluster(lkg,0.7,criterion = 'distance') #rough dist
    rst_merge = hcluster.fcluster(lkg,0.2,criterion = 'distance') #rough dist
    seg = []
    for i in range(len(rst) + 1):
        if i == 0 or i == len(rst) or rst[i] != rst[i-1]:
            seg.append(i)
#
#    print rst
#    print rst_merge
#    print seg
    for (s,e) in zip(seg[:-1],seg[1:]):
        seg_traj = traj[s:e]
        seg_id = rst_merge[s:e]
        itl = seg_traj[-1]['t'] - seg_traj[0]['t']
        if itl > interval:
            print_merge_fp(ids,imei,seg_traj,seg_id,itl) 
开发者ID:lrpopeyou,项目名称:cluster_fp_user,代码行数:49,代码来源:get_stayd.py


示例7: clusterTrajectories

def clusterTrajectories(
    trajectories, fname, path, metric_func=trajectoryDissimilarityL2, user_distance_matrix=None, criterion="distance"
):
    """
	trajectories: the trajectories need to be in XY coordinates
	"""
    plot_path = utils.queryPath(path + "/plots")
    if user_distance_matrix is None:
        distance_matrix = getTrajectoryDistanceMatrix(trajectories, metric_func)
        writeToCSV.saveData(distance_matrix, path + "/" + fname)  # save the distance_matrix
    else:
        distance_matrix = user_distance_matrix
        assert len(distance_matrix) == len(
            trajectories
        ), "distance_matrix (n, n) and trajectories(n) should have same number of samples"

    print "distance_matrix:\n", distance_matrix

    v = DIST.squareform(distance_matrix)
    cluster_result = HAC.linkage(v, method="average")
    dg = HAC.dendrogram(cluster_result)
    plt.xlabel("cluster_dengrogram_{fname}".format(fname=fname))
    plt.savefig("{path}/cluster_dengrogram_{fname}.png".format(fname=fname, path=plot_path))
    plt.clf()

    if criterion == "distance":
        if metric_func == trajectoryDissimilarityL2:
            this_cluster_label = HAC.fcluster(
                Z=cluster_result, t=1 * 1000, criterion="distance"
            )  # distance for l2 measure
        elif metric_func == trajectoryDissimilarityCenterMass:
            this_cluster_label = HAC.fcluster(
                Z=cluster_result, t=1.5, criterion="distance"
            )  # distance for center of mass measure
    elif criterion == "inconsistent":
        this_cluster_label = HAC.fcluster(Z=cluster_result, t=0.8, criterion="inconsistent")

    print "this_cluster_label:", this_cluster_label, "number of clusters:", len(set(this_cluster_label))

    """Plot the representative trajectories"""
    plotRepresentativeTrajectory(
        this_cluster_label,
        trajectories,
        fname="cluster_centroids_{n}_classes".format(n=len(set(this_cluster_label))),
        path=plot_path,
        show=False,
    )

    return this_cluster_label, [this_cluster_label], []
开发者ID:Jim61C,项目名称:vessel-trajectory-modeller,代码行数:49,代码来源:clustering_worker.py


示例8: clusterize_hierarchical

def clusterize_hierarchical(peakels, matrix_dist, cut, clip=False):
    """

    :param clip:
    :param peakels:
    :param matrix_dist:
    :param method:
    :param cut:
    """
    #having negative value in the matrix distance
    # leading to a valueerror
    # clip i order to prevent negative value in the matrix distance
    if clip:
        np.clip(matrix_dist, 0, 1, matrix_dist)
    k = linkage(matrix_dist, method='complete')

    #dist = maxdists(k)
    #fit = norm.fit(dist)
    #cut = np.percentile(dist, 10.0)  #norm.ppf(5.0, loc=fit[0], scale=fit[1])

    k2 = fcluster(k, cut, criterion='distance')  #, criterion='distance')
    clust_by_id = ddict(list)
    for i, v in enumerate(k2):
        clust_by_id[v].append(peakels[i])
    return clust_by_id.values()
开发者ID:jerkos,项目名称:eledelphe,代码行数:25,代码来源:clustering.py


示例9: main

def main(): #clustering and write output
    if len(pep_array)>1:
        matrix=[]
        for i in range(0,len(pep_array)):
            matrix.append(pep_array[i][4].replace('\"',"").split(','))

        dataMatrix=numpy.array(matrix,dtype=float)
        d = sch.distance.pdist(dataMatrix,metric)# vector of pairwise distances
        if metric=="correlation":
            D = numpy.clip(d,0,2) #when using correlation, all values in distance matrix should be in range[0,2]
        else:
            D=d
        try:
            cutoff=float(t)
        except ValueError:
            print "please provide a numeric value for --t"; sys.exit()
        L = sch.linkage(D, method,metric)
        ind = sch.fcluster(L,cutoff,'distance')#distance is dissmilarity(1-correlation)
        p=numpy.array(pep_array)
        p=numpy.column_stack([p,ind])
        formatoutput(p)
    else:
        p=numpy.array(pep_array)
        p=numpy.column_stack([p,[0]])
        formatoutput(p)
开发者ID:Nausx,项目名称:SpliceVista,代码行数:25,代码来源:clusterpeptide.py


示例10: user_fp_group

def user_fp_group(data,key,user,filter = 'mid',merge = False,thr = 0.2):
#data = np.fromiter(data,dtype = dt)
    if len(data.shape) == 0 or data.shape[0] == 1:
        print '\t'.join([key,user,'%s' % data['wf_list'],str(data['x']),str(data['y']),'1'])
        return
    dists = get_pdist(data,100)
#print dists
    clusters = hcluster.linkage(dists,method = 'average')
#   print clusters
    r = hcluster.fcluster(clusters,thr,'distance')
    ids = np.unique(r)
    sz = []
    for id in ids:
        sz.append(data[r==id].shape[0])
    
    mid_size = max(1.1,max(sz) / 2.0)
    for id in ids:
        d = data[r==id]
        if filter == 'mid' and d.shape[0] < mid_size:
            continue
        if merge == True:
            print '\t'.join([key,user,wf_to_str(get_mean_wf(d)),str(np.median(d['x'])),str(np.median(d['y'])),str(get_largest_dur(d)),str(d.shape[0])])
            continue
        for od in d:
            print '\t'.join([key,user,od['wf_list'],str(od['x']),str(od['y']),str(od['t']),str(id)])
开发者ID:lrpopeyou,项目名称:cluster_fp_user,代码行数:25,代码来源:cluster_fps2.py


示例11: process

def process(tag,infos,wf_lists,count):
    if wf_lists == None or infos == None:
        return

    x = infos['x']
    y = infos['y']
    imeis = infos['imei']
#wf_lists = np.fromiter(wf_lists,dtype = np.array)

    std_x = np.std(x)
    std_y = np.std(y)
    users_num = len(np.unique(imeis))
    if users_num < 3:
        return 
    if len(wf_lists.shape) < 2 or wf_lists.shape[1] < 2:
        return
    dists = sci_dist.pdist(wf_lists,'cosine')        
    dists[(dists < 1e-10)] = 0
    clusters = hierarchy.linkage(dists,method ='average')
    r = hierarchy.fcluster(clusters,0.3,'distance')

    for c in np.unique(r):
        idx = (r==c)
        c_x = np.median(x[idx] )
        c_y = np.median(y[idx] )
        c_std_x = np.std(x[idx])
        c_std_y = np.std(y[idx])
        c_user = len(np.unique(imeis[idx]))
        wfs = wf_lists[idx]
        wf =  np.sum(wfs,axis=0) / len(wfs)
        wf = [ '%d' % sig for sig in wf ]
        print '%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d' % (tag,'\t'.join(wf),c_x,c_y,c_user,std_x,std_y,c_std_x,c_std_y,count)
开发者ID:lrpopeyou,项目名称:cluster_fp_user,代码行数:32,代码来源:count_users_var.py


示例12: run_entity_model

def run_entity_model(cdev, cprc):
    print '____________________________________________________'
    print 'running entity model'
    hdev, hprc, hmapping, entcorp, er = process_entities(cdev, cprc)
    print 'removed', len(cdev)- len(hdev), 'documents', len(hdev), 'left'
    voc = build_voc(entcorp, 2)
    
    ent_vectorizer = CountVectorizer(vocabulary = voc)
    E = ent_vectorizer.fit_transform(hdev)
    
    Eclean, emapping = filter_rare(E, 0)

    E_dense = np.matrix(Eclean).astype('float')
    E_scaled = preprocessing.scale(E_dense)
    E_normalized = preprocessing.normalize(E_scaled, norm='l2')
    
    EMatrix = pairwise_distances(E_normalized, metric='cosine')
    EL = fastcluster.linkage(EMatrix, method='average')
    flat_eclust = hierarchy.fcluster(EL, 0.5, 'distance')
    ec = organize_clusters(flat_eclust, th = 3)
    
    ecf = []
    for cl in ec:
        ecf.append([hmapping[emapping[t]] for t in cl])
    print 'detected', len(ecf), 'entity clusters'      
    return ecf, voc
开发者ID:gaphex,项目名称:Oracle,代码行数:26,代码来源:tweet_proc.py


示例13: main

def main():
#     distMatrix = loadDistanceMatrix()
#    linkage = saveLinkage(distMatrix)
#     linkage = loadLinkage()
#     loadFCluster()
#     R = dendrogram(linkage, truncate_mode='level',  p=4, show_contracted=True)
#     afile = open(r'/home/rojosewe/Dropbox/MAI90/tesis/structs/R5000.pkl', 'wb')
#     pickle.dump(R, afile);
#     afile.close();
    linkage = loadLinkage()
    print len(linkage)
    k = 1.5
#   18 -> 54 
#   19 -> 46 
    
    R = dendrogram(linkage, color_threshold=6.8, show_contracted=True)
    pylab.savefig( "/home/rojosewe/Dropbox/MAI90/tesis/images/wordClustering/dgram446.8.png" )
#    print "cheese!"
    T = sch.fcluster(linkage, k, 'distance')
    n = len(T)
 #   print len(T)
    # calculate labels
    labels = np.zeros((n, 1))
    print str(k) + ": " + str(max(T))
    for i in range(n):
        labels[i,0] = int(T[i]);
    with open(datafolder + 'labels.csv', 'wb') as csvfile:
        csvw = csv.writer(csvfile);
        for i in range(n):
            csvw.writerow(labels[i,:])
            
    print 'done writing'
开发者ID:rojosewe,项目名称:TesisScrips,代码行数:32,代码来源:dendragramBuilder.py


示例14: cluster_words

def cluster_words(k):

    ts = os.listdir('types')
    ts.sort(key=alphanum_key)
    ts = np.array(ts)

    T = fcluster(Z,k,criterion='maxclust')

    def words(i):
        cluster = ts[T == i]
        print(len(cluster))
        allwords = []

        for t in cluster:
            fname = 'types/{}'.format(t)
            with open(fname) as file:
                data = json.loads(file.read())
            desc = data['description']
            words = re.findall('\w+', desc.lower())
            allwords.extend(words)

        allwords = [word for word in allwords if word not in stop_words]

        counts = Counter(allwords)
        return counts

    return [words(i+1) for i in range(k)]
开发者ID:MareinK,项目名称:datamining,代码行数:27,代码来源:words.py


示例15: get_ROIs

def get_ROIs(df_sequence,x,limit_meters):
	# encontrar puntos de transacciones origen
	X,locations,pi_locations = get_latlong_points(df_sequence)
	if len(locations) == 1:
		return [[{"lat":X[0,0],"long":X[0,1]}],1.0]
	elif len(locations) < 1:
		return None
	# construir dendrograma
	Z = linkage(X,'weighted',lambda x,y: vincenty(x,y).meters)
	clusters = fcluster(Z,limit_meters,criterion='distance')
	centroids = []
	nums_by_clusters =[]
	pi_sums = []
	the_clusters = []
	# join pi_sums of locations that are in the same cluster
	for i in range(len(clusters)):
		indice = buscar_locacion(the_clusters,clusters[i])
		if indice < 0:
			the_clusters.append(clusters[i])
			indice = len(the_clusters)-1
			pi_sums.append(0)
			nums_by_clusters.append(0)
			centroids.append({"lat":0,"long":0})
		pi_sums[indice] += pi_locations[i]
		centroids[indice]["lat"] += X[i,0]
		centroids[indice]["long"] += X[i,1]
		nums_by_clusters[indice] += 1

	the_indexs, the_sum = get_upToX_pi_locations(np.asarray(pi_sums),x)
	the_centroids = []
	for i in the_indexs:
		the_centroids.append({"lat":centroids[i]["lat"]/nums_by_clusters[i],"long":centroids[i]["long"]/nums_by_clusters[i]})
	return [the_centroids,the_sum]
开发者ID:cinai,项目名称:ODtrips,代码行数:33,代码来源:tfe.py


示例16: main

def main(): #clustering and write output
    matrix=[]
    for i in range(0,len(proteinarray)):
        if calculate_ratio=="True":
            ratio_array=convert2ratio(proteinarray[i][2:],ref)
            matrix.append(ratio_array)
        else:
            matrix.append(proteinarray[i][2:])

    dataMatrix=np.array(matrix,dtype=float)
    if log_transform=="True":
        dataMatrix=np.log2(dataMatrix)

    if len(proteinarray)>1:
        d = sch.distance.pdist(dataMatrix,metric)# vector of pairwise distances
        if metric=="correlation":
            D = np.clip(d,0,2) #when using correlation, all values in distance matrix should be in range[0,2]
        else:
            D=d
        try:
            cutoff=float(t)
        except ValueError:
            print "please provide a numeric value for --t"; sys.exit()
        L = sch.linkage(D, method,metric)
        ind = sch.fcluster(L,cutoff,'distance')#distance is dissmilarity(1-correlation)
        p=np.array(proteinarray)[:,[0,1]] #slice first and second column of original data
        p=np.concatenate((p,dataMatrix),axis=1) # replace transformed data
        p=np.column_stack([p,ind]) # add cluster result to the last column
        formatoutput(p)
    else:
        p=np.array(proteinarray)[:,[0,1]]
        p=np.concatenate((p,dataMatrix),axis=1)
        p=np.column_stack([p,[0]])
        formatoutput(p)
开发者ID:yafeng,项目名称:pqpq_python,代码行数:34,代码来源:pqpq2.py


示例17: _run_hier_clust_on_centroids

    def _run_hier_clust_on_centroids(self,method='average'):
        '''
        runs hierarchical clustering based on the centroids of the data per scipy's methods

        '''

        uniqueLabels = np.sort(np.unique(self.templateLabels))
        centroids = np.array([self.templateMat[np.where(self.templateLabels == i)[0],:].mean(axis=0) for i in uniqueLabels])
               
        self.y = pdist(centroids)
        self.z = hierarchy.linkage(self.y,method)
        r2 = hierarchy.inconsistent(self.z,2)

        ## rank the average of linkage hieghts by standard deviation the report the averages
        meanHeights = r2[:,0]
        stdHeights = r2[:,1]
        rankedInds = np.argsort(stdHeights)[::-1]
        bestCutPoints = meanHeights[rankedInds]

        ## save centroid labels for all cuts of the dentragram
        allCentroidLabels = {}
        rankedK = []
        for cp in bestCutPoints:
            centroidLabels = hierarchy.fcluster(self.z,t=cp,criterion='distance')
            k = len(np.unique(centroidLabels))
            if allCentroidLabels.has_key(str(k)) == True:
                continue
            
            allCentroidLabels[str(k)] = centroidLabels 
            rankedK.append(k)
        
        centroidLabels = allCentroidLabels[str(rankedK[0])]
    
        ## save the top xx modes 
        self.bestModeLabels = []
        print 'doing ranking...'
       
        for rk in rankedK[:25]:
            centroidLabels = allCentroidLabels[str(rk)]
            modeLabels = self._get_mode_labels(self.templateLabels,centroidLabels,uniqueLabels)
            self.bestModeLabels.append(modeLabels)
        
        ## provide silvalue ranks in case we wish to reorder the top xx modes by sil value
        self.modeSilValues = []
        self.modeSizes = []
        allEvents = [self.templateData]

        for count in range(len(self.bestModeLabels)):
            numClusters = np.unique(self.bestModeLabels[count]).size
            silValues = get_silhouette_values(allEvents,[self.bestModeLabels[count]],subsample=self.noiseSample,
                                              minNumEvents=5000,resultsType='raw')
            silMean = silValues['0'].mean()
            self.modeSilValues.append(silValues['0'].mean())
            self.modeSizes.append(numClusters)

        silValues = get_silhouette_values(allEvents,[self.templateLabels],subsample=self.noiseSample,
                                          minNumEvents=5000,resultsType='raw')
        self.clusterSilValues = silValues['0'].mean()
        self.modeSilValues = np.array(self.modeSilValues)
        self.modeSizes = np.array(self.modeSizes)
开发者ID:ajrichards,项目名称:cytostream,代码行数:60,代码来源:TemplateFileCreator.py


示例18: run_ngram_model

def run_ngram_model(cdev, cprc):
    print '____________________________________________________'
    print 'running n-gram model'
    wcorp = []
    for i in cprc:
        wcorp.append(' '.join(cprc[i]['words']))
        
    vectorizer = CountVectorizer(analyzer='word', binary=True, min_df=max(int(len(wcorp)*0.0005), 5), ngram_range=(2,3))
    X = vectorizer.fit_transform(wcorp)
    Xclean, mapping = filter_rare(X)
    
    Xdense = np.matrix(Xclean).astype('float')
    X_scaled = preprocessing.scale(Xdense)
    X_normalized = preprocessing.normalize(X_scaled, norm='l2')
    
    textMatrix = pairwise_distances(X_normalized, metric='cosine')
    L = fastcluster.linkage(textMatrix, method='average')
    flat_textclust = hierarchy.fcluster(L, 0.5, 'distance')
    ttc = organize_clusters(flat_textclust)
    
    ncf = []
    for cl in ttc:
        ncf.append([mapping[t] for t in cl])
    print 'detected', len(ncf), 'n-gram clusters'     
    return ncf
开发者ID:gaphex,项目名称:Oracle,代码行数:25,代码来源:tweet_proc.py


示例19: get_cluster

    def get_cluster(self,cluster_data):
        cluster_value = []
        for index in range(len(cluster_data)):
            if cluster_data[index] == True:
                cluster_value.append(index)

        dimension = len(cluster_value)
        distance_matrix=[[0 for row in range(dimension)] for col in range(dimension)]
        for row in range(dimension):
            for col in range(dimension):
                distance_matrix[row][col]=abs(cluster_value[row]-cluster_value[col])
        distance_array = distance.squareform(distance_matrix)
        clusters=hierarchy.linkage(distance_array, method='weighted', metric='euclidean')
        T = hierarchy.fcluster(clusters, self.cluster_distance, criterion='distance')
        temp_holder = {}
        for item in range(max(T)):
            temp_holder[item+1] = []

        for index in range(dimension):
            temp_holder[T[index]].append(cluster_value[index])

        # print temp_holder
        # print T
        # print cluster_value
        return temp_holder
开发者ID:souryapoddar290990,项目名称:sourya,代码行数:25,代码来源:test5.py


示例20: clustering_scipy_dendrogram

def clustering_scipy_dendrogram(features, n_clust, metric='euclidean', method = 'complete'):
  """
  """
  #x = pdist(features, metric)
  z = hac.linkage(features, method = method)
  #d = hac.dendrogram(z, p=30, truncate_mode=None, color_threshold=None, get_leaves=True, orientation='top', labels=None, count_sort=False, distance_sort=False, show_leaf_counts=True, no_plot=False, no_labels=False, color_list=None, leaf_font_size=None, leaf_rotation=None, leaf_label_func=None, no_leaves=False, show_contracted=False, link_color_func=None)
  #plt.show()
  
  #num_col = d['color_list']
  #cnt = Counter(num_col)
  #print cnt
  
  #n_clust = 100
  clusters = hac.fcluster(z, n_clust, criterion='maxclust')
  #print clusters
  
  num_elem = Counter(clusters)
  print num_elem
  
  centroids = to_codebook(features, clusters)
  #print temp
  #for i in range(len(temp)):
    #plt.plot(temp[i])
  
  #fig = plt.figure()
  #for ii in range(len(centroids)):
    #plt.subplot(4,2,ii)
    #plt.plot(centroids[ii])
    #plt.ylabel(np.array(ii+1))
  #plt.show()
  
  np.save('centroids',np.array(centroids))
  
  return clusters, centroids
开发者ID:kaustuvkanti,项目名称:Experiments,代码行数:34,代码来源:dump_transition_for_clustering.py



注:本文中的scipy.cluster.hierarchy.fcluster函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python hierarchy.fclusterdata函数代码示例发布时间:2022-05-27
下一篇:
Python hierarchy.dendrogram函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap