• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python hierarchy.to_tree函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中scipy.cluster.hierarchy.to_tree函数的典型用法代码示例。如果您正苦于以下问题:Python to_tree函数的具体用法?Python to_tree怎么用?Python to_tree使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了to_tree函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: make_tree

def make_tree(X, C, method='single'):
    if method == 'single':
        tree = to_tree(single(C))
    elif method == 'ward':
        tree = to_tree(ward(X))
    elif method == 'average':
        tree = to_tree(average(C))
    return Tree(root=construct_node(tree))
开发者ID:sharadmv,项目名称:trees,代码行数:8,代码来源:agglomerative.py


示例2: classify_by_scores

def classify_by_scores(M, threshold, loci, return_file_names=None):

    M_array = ssd.squareform(M)

    Z = linkage(M_array, method='average')

    root = to_tree(Z)
    root = clone_graph(root)

    nodes = get_nodes(root)
    id2node = {node.id: node for node in nodes}

    leaf_ids = leaves_list(Z)

    cnt = 0
    i = 0
    total_count = 1

    pool = []

    while True:
        cur_node = id2node[leaf_ids[i]]
        parent_dist = cur_node.parent.dist

        while parent_dist < threshold:
            cur_node = cur_node.parent
            parent_dist = cur_node.parent.dist

        cur_leaf_ids = get_leaves(cur_node)

        pool.append([id for id in cur_leaf_ids])

        total_count += cur_node.count

        i += len(cur_leaf_ids)

        if i >= len(leaf_ids)-1:
            break
        cnt += 1

    clusters = [l for l in pool if len(l) > 1]
    singles = [l[0] for l in pool if len(l) == 1]

    clusters = sorted(clusters, key=lambda x: len(x), reverse=True)

    if return_file_names:

        clusters_fn = []

        for cluster in clusters:

            clusters_fn.append([os.path.basename(loci[i].file_name) for i in cluster])

        singles_fn = [ os.path.basename(loci[i].file_name) for i in singles]

        return singles_fn, clusters_fn

    else:

        return singles, clusters
开发者ID:kyrgyzbala,项目名称:NewSystems,代码行数:60,代码来源:dendrogram.py


示例3: __init__

    def __init__(self, flat_cluster, cluster = None, curve_list = None):
        from scipy.cluster.hierarchy import to_tree
        from numpy import asarray, sort

        self.flat = flat_cluster # FlatClusters object
        self.co_analysis = self.flat.get_co_analysis() #CoAnalysis object
        self.cluster = cluster #Cluster object

        if not cluster == None:
            self.curve_list = cluster.list_curve_indexes()
        else:
            self.curve_list = curve_list

        self.Z = self.co_analysis.get_hierarchical_cluster()

        root = to_tree(self.Z) # root of entire cluster!
        curves = asarray(self.curve_list) # list of curves in this cluster

        # Get the cluster node that corresponds to the curves in the cluster above
        self.cluster_node = get_cluster_node(root, root.left, root.right, curves)
        self.id = self.cluster_node.get_id()

        # Get the right and left cluster nodes
        self.left = self.cluster_node.left
        self.right = self.cluster_node.right

        # Get the left and right cluster lists
        self.left_list = sort(any_pre_order(root, self.left))
        self.right_list = sort(any_pre_order(root, self.right))
开发者ID:hilaryjoyce,项目名称:smfs-data-analysis,代码行数:29,代码来源:clustering.py


示例4: cluster_alchemy

def cluster_alchemy(dataset, gamma=None, filter=False):
    doc_proc = dp.DocumentsProcessor(dataset)
    if gamma:
        tfidf_matrix, f_score_dict, params = doc_proc.get_data_with_alchemy(gamma=gamma, filter=filter)
    else:
        tfidf_matrix, f_score_dict, params = doc_proc.get_data_with_alchemy()

    print 'starting clustering: found %s document and %s features' \
          % (tfidf_matrix.shape[0], tfidf_matrix.shape[1])

    linkage_matrix = hr.average(tfidf_matrix.toarray())

    t = hr.to_tree(linkage_matrix, rd=True)

    clusters = {}

    for node in t[1]:
        if not node.is_leaf():
            l = []
            clusters[node.get_id()] = collect_leaf_nodes(node, l)

    f = f_score(clusters, f_score_dict)

    l = print_f_score_dict(f)

    params['avg_f_score'] = average_f_score(f, tfidf_matrix.shape[0])
    params['all_fscore'] = l

    print 'average f_score: %s' % params['avg_f_score']
    return params
开发者ID:Neuro17,项目名称:LOD-doc-clustering,代码行数:30,代码来源:classifier.py


示例5: _process_block

    def _process_block():
        """Initialize nested dictionary for d3, then recursively iterate through tree and create the dict."""
        tree = to_tree(linkage, rd=False)

        _add_node(tree, bcluster_dendro)
        _label_tree(bcluster_dendro["children"][-1])  # get the last element
        return bcluster_dendro
开发者ID:Lilykos,项目名称:clusterix,代码行数:7,代码来源:bcluster.py


示例6: __get_column_dendrogram__

    def __get_column_dendrogram__(self):
	#root and nodes have the coloumn clustered data
        root, nodes = hcluster.to_tree(self.cluster_object.column_clustering, rd=True)
	#node_id2node is a list
        node_id2node = {}
	#dendogram is a graph having node as starting address and a list followed by every node
        dendrogram = {"nodes":{}}
	
	#iterate through all nodes
        for node in nodes:
	    print ("id is:", id)
            node_id = node.id
	    # if node is leaf node
            if node.count == 1:
                node_id2node[node_id] = {"count":1, "distance":0}

            else:
	    # assign left and right child in form of graph to a node_id2
                node_left_child = node.get_left().id
                node_right_child = node.get_right().id
                node_id2node[node_id] = {"count":node.count, "distance":round(node.dist, 3), "left_child": node_left_child, "right_child": node_right_child}

	#assigning parent as the number of node in id2node
        for n in node_id2node:
            node = node_id2node[n]
            if node["count"] != 1:
                node_id2node[node["left_child"]]["parent"] = n
                node_id2node[node["right_child"]]["parent"] = n

	#if array list of nodes is not present in the dandrogram
        for n in node_id2node:
             if not n in dendrogram["nodes"]:
                dendrogram["nodes"][n] = node_id2node[n]

        return dendrogram
开发者ID:karandeepmadaan,项目名称:paji,代码行数:35,代码来源:inchlib_clust.py


示例7: hierarchical_clustering_to_dendrogram

def hierarchical_clustering_to_dendrogram(clustering):
    """Converts an array representing a clustering to a dendrogram.

    Args:
        clustering (ndarray): A hierarchical clustering matrix, in the form
            returned by scipy.hierarchical.linkage.

    Returns:
        (networkx.DiGraph): A dendrogram. Each node in the dendrogram has the
        'distance' attribute, which is the threshold at which its children
        are merged in the clustering.
    """
    root = _hierarchy.to_tree(clustering)

    tree = _nx.DiGraph()
    tree.add_node(root.id, distance=root.dist)

    if root.left:
        queue = [(root, root.left), (root, root.right)]

    while queue:
        parent, child = queue.pop(0)

        tree.add_edge(parent.id, child.id)
        tree.node[child.id]['distance'] = float(child.dist)

        if child.left:
            queue.append((child, child.left))

        if child.right:
            queue.append((child, child.right))

    return tree
开发者ID:eldridgejm,项目名称:umetric,代码行数:33,代码来源:core.py


示例8: to_dict

  def to_dict(self, correlation_matrix, linkage_matrix):

    from scipy.cluster import hierarchy
    tree = hierarchy.to_tree(linkage_matrix, rd=False)
    leaves_list = hierarchy.leaves_list(linkage_matrix)

    d = {}

    # http://w3facility.org/question/scipy-dendrogram-to-json-for-d3-js-tree-visualisation/
    # https://gist.github.com/mdml/7537455

    def add_node(node):
      if node.is_leaf(): return
      cluster_id = node.get_id() - len(linkage_matrix) - 1
      row = linkage_matrix[cluster_id]
      d[cluster_id+1] = {
        'datasets': [i+1 for i in sorted(node.pre_order())],
        'height': row[2],
      }

      # Recursively add the current node's children
      if node.left: add_node(node.left)
      if node.right: add_node(node.right)

    add_node(tree)

    return d
开发者ID:xia2,项目名称:xia2,代码行数:27,代码来源:MultiCrystalAnalysis.py


示例9: check_leaves_list_iris

 def check_leaves_list_iris(self, method):
     # Tests leaves_list(Z) on the Iris data set
     X = eo['iris']
     Y = pdist(X)
     Z = linkage(X, method)
     node = to_tree(Z)
     assert_equal(node.pre_order(), leaves_list(Z))
开发者ID:FrankZhao66,项目名称:scipy,代码行数:7,代码来源:test_hierarchy.py


示例10: tfidf_covariance

def tfidf_covariance(texts, savepath):
    if not savepath.endswith("/"):
        savepath = savepath + "/"
    if os.path.exists(savepath + "__linkage_average.npy"):
        Z = np.load(savepath + "__linkage_average.npy")
    else:
        if not os.path.exists(savepath):
            os.makedirs(savepath)
        from sklearn.feature_extraction.text import TfidfVectorizer
        vectorizer = TfidfVectorizer(input = str,
                                 strip_accents = 'ascii',
                                 analyzer ='word',
                                 max_features=5000)
        y = vectorizer.fit_transform(" ".join(text) for text in texts)
        Z = linkage(y.todense(), method='average', metric='euclidean')
        np.save(savepath + "__linkage_average.npy", Z)

    if os.path.exists(savepath + "__covariance__.npy"):
        Cov = np.load(savepath + "__covariance__.npy")
        observables = HierarchicalObservation(Cov)
    else:
        root, nodes = to_tree(Z, rd=True)
        assign_parents(root)
        adj_mat = get_adjacency_matrix(nodes)
        deg_mat = get_degree_matrix(nodes)
        sigma = 5
        laplacian = np.diag(deg_mat) - adj_mat + 1/(sigma**2) * np.eye(len(deg_mat))
        Cov = np.linalg.inv(laplacian)[:len(texts), :len(texts)]
        np.save(savepath + "__covariance__.npy", Cov)
        observables = HierarchicalObservation(Cov)
    return observables
开发者ID:JonathanRaiman,项目名称:PythonObjectLM,代码行数:31,代码来源:covariance.py


示例11: plot_dendrogram

def plot_dendrogram(Z, dendogram_file_name):

    root = to_tree(Z)
    threshold = root.dist / 3.0
    all_leaves = get_leaves(root)

    plt.figure(figsize=(30, 30))
    title = 'Hierarchical Clustering Dendrogram( %d leaves)' % len(all_leaves)
    xlabel = 'loci'
    ylabel = 'distance'

    fancy_dendrogram(
        Z,
        leaf_rotation=90.,  # rotates the x axis labels
        leaf_font_size=4.,  # font size for the x axis labels
        annotate_above=10,
        max_d=threshold,
        title=title,
        xlabel=xlabel,
        ylabel=ylabel
    )

    # plt.savefig(os.path.join(report_path, 'dendrogram_distance_array.eps'), format='eps', dpi=900)
    if dendogram_file_name.endswith('pdf'):
        plt.savefig(dendogram_file_name, format='pdf')
    elif dendogram_file_name.endswith('png'):
        plt.savefig(dendogram_file_name, format='png')
    else:
        raise NotImplemented('File format has to be either png or pdf')

    plt.close()
    return threshold
开发者ID:kyrgyzbala,项目名称:NewSystems,代码行数:32,代码来源:dendrogram.py


示例12: create_cluster_heatmap

    def create_cluster_heatmap(self, compress=False, compressed_value="median", write_data=True):
        """Creates cluster heatmap representation in inchlib format. By setting compress parameter to True you can
        cut the dendrogram in a distance to decrease the row size of the heatmap to specified count. 
        When compressing the type of the resulted value of merged rows is given by the compressed_value parameter (median, mean).
        When the metadata are nominal (text values) the most frequent is the result after compression.
        By setting write_data to False the data features won't be present in the resulting format."""
        self.dendrogram = {"data": self.__get_cluster_heatmap__(write_data)}

        self.compress = compress
        self.compressed_value = compressed_value
        self.compress_cluster_treshold = 0
        if self.compress and self.compress >= 0:
            self.compress_cluster_treshold = self.__get_distance_treshold__(compress)
            print("Distance treshold for compression:", self.compress_cluster_treshold)
            if self.compress_cluster_treshold >= 0:
                self.__compress_data__()
        else:
            self.compress = False

        if self.header and write_data:
            self.dendrogram["data"]["feature_names"] = [h for h in self.header]
        elif self.header and not write_data:
            self.dendrogram["data"]["feature_names"] = []
        
        if self.axis == "both" and len(self.cluster_object.column_clustering):
            column_dendrogram = hcluster.to_tree(self.cluster_object.column_clustering)            
            self.dendrogram["column_dendrogram"] = self.__get_column_dendrogram__()
开发者ID:AlfiyaZi,项目名称:InCHlib.js,代码行数:27,代码来源:inchlib_clust_dev.py


示例13: guide_tree_from_sequences

def guide_tree_from_sequences(sequences,
                              metric=kmer_distance,
                              display_tree = False):
    """ Build a UPGMA tree by applying metric to sequences

    Parameters
    ----------
    sequences : list of skbio.Sequence objects (or subclasses)
      The sequences to be represented in the resulting guide tree.
    metric : function
      Function that returns a single distance value when given a pair of
      skbio.Sequence objects.
    display_tree : bool, optional
      Print the tree before returning.

    Returns
    -------
    skbio.TreeNode

    """
    guide_dm = DistanceMatrix.from_iterable(
                    sequences, metric=metric, key='id')
    guide_lm = average(guide_dm.condensed_form())
    guide_tree = to_tree(guide_lm)
    if display_tree:
        guide_d = dendrogram(guide_lm, labels=guide_dm.ids, orientation='right',
               link_color_func=lambda x: 'black')
    return guide_tree
开发者ID:lsl5,项目名称:An-Introduction-To-Applied-Bioinformatics,代码行数:28,代码来源:__init__.py


示例14: test_Q_subtree_pre_order

 def test_Q_subtree_pre_order(self):
     # Tests that pre_order() works when called on sub-trees.
     X = hierarchy_test_data.Q_X
     Z = linkage(X, 'single')
     node = to_tree(Z)
     assert_equal(node.pre_order(), (node.get_left().pre_order()
                                     + node.get_right().pre_order()))
开发者ID:abudulemusa,项目名称:scipy,代码行数:7,代码来源:test_hierarchy.py


示例15: guide_tree_from_sequences

def guide_tree_from_sequences(sequences,
                              distance_fn=kmer_distance,
                              display_tree = False):
    """ Build a UPGMA tree by applying distance_fn to sequences

    Parameters
    ----------
    sequences : skbio.SequenceCollection
      The sequences to be represented in the resulting guide tree.
    sequence_distance_fn : function
      Function that returns and skbio.DistanceMatrix given an
      skbio.SequenceCollection.
    display_tree : bool, optional
      Print the tree before returning.

    Returns
    -------
    skbio.TreeNode

    """
    guide_dm = sequences.distances(distance_fn)
    guide_lm = average(guide_dm.condensed_form())
    guide_tree = to_tree(guide_lm)
    if display_tree:
        guide_d = dendrogram(guide_lm, labels=guide_dm.ids, orientation='right',
               link_color_func=lambda x: 'black')
    return guide_tree
开发者ID:gitter-badger,项目名称:An-Introduction-To-Applied-Bioinformatics,代码行数:27,代码来源:__init__.py


示例16: __get_column_dendrogram__

    def __get_column_dendrogram__(self):
        root, nodes = hcluster.to_tree(self.cluster_object.column_clustering, rd=True)
        node_id2node = {}
        dendrogram = {"nodes":{}}

        for node in nodes:
            node_id = node.id
            if node.count == 1:
                node_id2node[node_id] = {"count":1, "distance":0}

            else:
                node_left_child = node.get_left().id
                node_right_child = node.get_right().id
                node_id2node[node_id] = {"count":node.count, "distance":round(node.dist, 3), "left_child": node_left_child, "right_child": node_right_child}

        for n in node_id2node:
            node = node_id2node[n]
            if node["count"] != 1:
                node_id2node[node["left_child"]]["parent"] = n
                node_id2node[node["right_child"]]["parent"] = n

        for n in node_id2node:
             if not n in dendrogram["nodes"]:
                dendrogram["nodes"][n] = node_id2node[n]

        return dendrogram
开发者ID:AlfiyaZi,项目名称:InCHlib.js,代码行数:26,代码来源:inchlib_clust_dev.py


示例17: scipy_algo

def scipy_algo(dataset, abstract=False):
    doc_proc = dp.DocumentsProcessor(dataset)
    tfidf_matrix, f_score_dict = doc_proc.get_data(abstract)

    svd = TruncatedSVD(tfidf_matrix.shape[0])
    lsa = make_pipeline(svd, Normalizer(copy=False))

    #tfidf_matrix = lsa.fit_transform(tfidf_matrix)

    print 'starting clustering after lsa: found %s document and %s features' \
          % (tfidf_matrix.shape[0], tfidf_matrix.shape[1])

    linkage_matrix = hr.average(tfidf_matrix.toarray())
    #linkage_matrix = hr.average(tfidf_matrix)

    t = hr.to_tree(linkage_matrix, rd=True)

    clusters = {}

    for node in t[1]:
        if not node.is_leaf():
            l = []
            clusters[node.get_id()] = collect_leaf_nodes(node, l)

    f = f_score(clusters, f_score_dict)

    print_f_score_dict(f)

    avg_f_score = average_f_score(f, tfidf_matrix.shape[0])
    print 'average f_score: %s' % avg_f_score
    return avg_f_score
开发者ID:Neuro17,项目名称:LOD-doc-clustering,代码行数:31,代码来源:classifier.py


示例18: cluster_dandelion_2

def cluster_dandelion_2(dataset, gamma=0.91, filter=False):
    #duplicato, mi serve solo per tornare la linkage_matrix
    doc_proc = dp.DocumentsProcessor(dataset)
    if gamma:
        tfidf_matrix, f_score_dict, params = doc_proc.get_data_with_dandelion(
            gamma=gamma, filter=filter)
    else:
        tfidf_matrix, f_score_dict, params = doc_proc.get_data_with_dandelion()

    svd = TruncatedSVD(tfidf_matrix.shape[0])
    lsa = make_pipeline(svd, Normalizer(copy=False))

    tfidf_matrix = lsa.fit_transform(tfidf_matrix)

    #linkage_matrix = hr.average(tfidf_matrix.toarray())
    linkage_matrix = hr.average(tfidf_matrix)

    t = hr.to_tree(linkage_matrix, rd=True)

    clusters = {}

    for node in t[1]:
        if not node.is_leaf():
            l = []
            clusters[node.get_id()] = collect_leaf_nodes(node, l)

    f = f_score(clusters, f_score_dict)

    l = print_f_score_dict(f)

    params['avg_f_score'] = average_f_score(f, tfidf_matrix.shape[0])
    params['all_fscore'] = l

    return linkage_matrix
开发者ID:Neuro17,项目名称:LOD-doc-clustering,代码行数:34,代码来源:classifier.py


示例19: test_iris_subtree_pre_order

 def test_iris_subtree_pre_order(self):
     # Tests that pre_order() works when called on sub-trees.
     X = eo['iris']
     Y = pdist(X)
     Z = linkage(X, 'single')
     node = to_tree(Z)
     assert_equal(node.pre_order(), (node.get_left().pre_order()
                                     + node.get_right().pre_order()))
开发者ID:FrankZhao66,项目名称:scipy,代码行数:8,代码来源:test_hierarchy.py


示例20: plot_leaf_ordering

def plot_leaf_ordering(X, method, metric):
    dists = distance.squareform(distance.pdist(X, metric=metric))
    dists2 = distance.squareform(distance.pdist(X.T, metric=metric))

    Z = hierarchy.linkage(X, method=method, metric=metric)
    Z2 = hierarchy.linkage(X.T, method=method, metric=metric)

    t,rd = hierarchy.to_tree(Z, True)
    t2,rd2 = hierarchy.to_tree(Z2, True)

    M = optimal_scores(Z, rd, dists)
    order_tree(Z, rd, M)
    M2 = optimal_scores(Z2, rd2, dists2)
    order_tree(Z2, rd2, M2)

    rr = t.pre_order()
    rr2 = t2.pre_order()

    import matplotlib.pyplot as plt
    from matplotlib.gridspec import GridSpec

    fig = plt.figure(figsize=(8,8))
    gs = GridSpec(2, 2, top=0.95, bottom=0.05, left=0.05, right=0.95,
                  hspace=0.01, wspace=0.01,
                  width_ratios=(1,3), height_ratios=(1,3))

    ax01 = fig.add_subplot(gs[0,1])
    ax10 = fig.add_subplot(gs[1,0])
    ax11 = fig.add_subplot(gs[1,1])

    hierarchy.dendrogram(Z2, ax=ax01)
    ax01.set_axis_off()
    hierarchy.dendrogram(Z, orientation='right', ax=ax10)
    ax10.set_axis_off()

    ax11.matshow(X[np.ix_(rr,rr2)], cmap="Blues", aspect="auto")
    ax11.tick_params(**{s:'off' for s in ('top', 'bottom', 'right')})
    ax11.tick_params(labeltop='off', labelleft='off', labelright='on')

    ax11.set_xticks(np.arange(len(rr2)))
    ax11.set_xticklabels(rr2, fontsize=5.0)
    ax11.set_yticks(np.arange(len(rr)))
    ax11.set_yticklabels(rr, fontsize=5.0)

    plt.show()
开发者ID:jamestwebber,项目名称:jtw-utils,代码行数:45,代码来源:optimal_leaf_ordering.py



注:本文中的scipy.cluster.hierarchy.to_tree函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python hierarchy.ward函数代码示例发布时间:2022-05-27
下一篇:
Python hierarchy.single函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap