• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python bicluster.SpectralCoclustering类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.cluster.bicluster.SpectralCoclustering的典型用法代码示例。如果您正苦于以下问题:Python SpectralCoclustering类的具体用法?Python SpectralCoclustering怎么用?Python SpectralCoclustering使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了SpectralCoclustering类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: print_similarity_matrix

def print_similarity_matrix(sphns, model, model2=None):
    print "      ",
    for phn1 in sphns:
        print phn1, " ",
    print ""
    m = np.ndarray((len(sphns), len(sphns)), dtype=np.float32)
    for i, phn1 in enumerate(sphns):
        print phn1.ljust(4) + ":",
        for j, phn2 in enumerate(sphns):
            sim = model.similarity(phn1, phn2)
            if model2 != None:
                sim -= model2.similarity(phn1, phn2)
            print "%0.2f" % sim,
            m[i][j] = sim
        print ""
    phn_order = [phn for phn in sphns]

    if BICLUSTER:
        #model = SpectralBiclustering(n_clusters=4, method='log',
        model = SpectralCoclustering(n_clusters=n_clusters,
                                             random_state=0)
        model.fit(m)
        print "INDICES:",
        indices = [model.get_indices(i) for i in xrange(n_clusters)]
        print indices
        tmp = []
        for i in xrange(n_clusters):
            tmp.extend([phn_order[indices[i][0][j]] for j in xrange(len(indices[i][0]))])
        phn_order = tmp
        fit_data = m[np.argsort(model.row_labels_)]
        fit_data = fit_data[:, np.argsort(model.column_labels_)]
        m = fit_data

    return phn_order, m
开发者ID:cequencer,项目名称:speech_embeddings,代码行数:34,代码来源:train_word2vec.py


示例2: test_spectral_coclustering

def test_spectral_coclustering():
    """Test Dhillon's Spectral CoClustering on a simple problem."""
    param_grid = {'svd_method': ['randomized', 'arpack'],
                  'n_svd_vecs': [None, 20],
                  'mini_batch': [False, True],
                  'init': ['k-means++'],
                  'n_init': [10],
                  'n_jobs': [1]}
    random_state = 0
    S, rows, cols = make_biclusters((30, 30), 3, noise=0.5,
                                    random_state=random_state)
    S -= S.min()  # needs to be nonnegative before making it sparse
    S = np.where(S < 1, 0, S)  # threshold some values
    for mat in (S, csr_matrix(S)):
        for kwargs in ParameterGrid(param_grid):
            model = SpectralCoclustering(n_clusters=3,
                                         random_state=random_state,
                                         **kwargs)
            model.fit(mat)

            assert_equal(model.rows_.shape, (3, 30))
            assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
            assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
            assert_equal(consensus_score(model.biclusters_,
                                         (rows, cols)), 1)
开发者ID:ChicoQ,项目名称:scikit-learn,代码行数:25,代码来源:test_spectral.py


示例3: main

def main():
    origin = open('10k.txt', 'r')

    lines = origin.readlines()

    x = []
    label = []

    for l in lines:
        l = l.split(',')
        ip1 = l[2].split('.')
        ip2 = l[3].split('.')
        d = [datetime.fromtimestamp(int(l[1][0:11])).hour, int("%02x%02x%02x%02x"%(int(ip1[0]),int(ip1[1]),int(ip1[2]),int(ip1[3])),16), int("%02x%02x%02x%02x" % (int(ip2[0]),int(ip2[1]),int(ip2[2]),int(ip2[3])),16)] + l[4:6] + l[7:10]
        x.append(d)

    data = np.array(x, dtype='float32')

    model = SpectralCoclustering(n_clusters=5)
    model.fit(data)

    print model.rows_

    for i in range(5):
        print "Cluster" + str(i) + ':'
        for j in range(10000):
            if model.rows_[i][j]:
                print j,
        print ' '
开发者ID:royxue,项目名称:KDD99_Coclustering,代码行数:28,代码来源:niara_cluster.py


示例4: find_disjoint_biclusters

    def find_disjoint_biclusters(self, biclusters_number=50):
        data = np.asarray_chkfinite(self.matrix)
        data[data == 0] = 0.000001
        coclustering = SpectralCoclustering(n_clusters=biclusters_number, random_state=0)
        coclustering.fit(data)

        biclusters = set()
        for i in range(biclusters_number):
            rows, columns = coclustering.get_indices(i)
            row_set = set(rows)
            columns_set = set(columns)
            if len(row_set) > 0 and len(columns_set) > 0:
                density = self._calculate_box_cluster_density(row_set, columns_set)
                odd_columns = set()
                for column in columns_set:
                    col_density = self._calculate_column_density(column, row_set)
                    if col_density < density / 4:
                        odd_columns.add(column)
                columns_set.difference_update(odd_columns)
                if len(columns_set) == 0:
                    continue

                odd_rows = set()
                for row in row_set:
                    row_density = self._calculate_row_density(row, columns_set)
                    if row_density < density / 4:
                        odd_rows.add(row)
                row_set.difference_update(odd_rows)

                if len(row_set) > 0 and len(columns_set) > 0:
                    density = self._calculate_box_cluster_density(row_set, columns_set)
                    biclusters.add(Bicluster(row_set, columns_set, density))

        return biclusters
开发者ID:luntos,项目名称:bianalyzer,代码行数:34,代码来源:spectral_coclustering.py


示例5: biclustering

def biclustering(input,num_clusters):
	global agent1_dict
	data = np.matrix(input)
	model = SpectralCoclustering(n_clusters=num_clusters,random_state=0) 
	model.fit(data)
	#create agent 1 dictionary
	agent1_dict = {}
	for c in range(num_clusters): 	
		agent1_dict[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
	return agent1_dict
开发者ID:sneha6791,项目名称:Thesis,代码行数:10,代码来源:music_gt_2_a2.py


示例6: main

def main():
    origin = open('kddcup.txt', 'r')

    lines = origin.readlines()

    x = []
    label = []

    for l in lines:
        l = l.split(',')
        d = l[0:1] + l[4:19] + l[21:-1]
        label.append(l[-1])
        x.append(d)

    data = np.array(x, dtype='float32')

    model = SpectralCoclustering(n_clusters=5)
    model.fit(data)

    evaluation = []

    draw_n_x = []
    draw_n_y = []
    draw_a_x = []
    draw_a_y = []

    for cluster in model.rows_:
        normal = 0.0
        attack = 0.0
        graph_x = []
        graph_y = []
        for idx in range(len(cluster)):
            if cluster[idx]:
                if label[idx] == 'normal.\n':
                    normal += 1
                else:
                    attack += 1
                graph_x.append(data[27])
                graph_y.append(data[30])
        evaluation.append(normal / (normal + attack))

        if normal > attack:
            draw_n_x += graph_x
            draw_n_y += graph_y
        else:
            draw_a_x += graph_x
            draw_a_y += graph_y

    pl.plot(draw_n_x, draw_n_y, 'ro')
    pl.plot(draw_a_x, draw_a_y, 'go')

    print evaluation
    pl.show()
开发者ID:royxue,项目名称:KDD99_Coclustering,代码行数:53,代码来源:cluster_data.py


示例7: biclustering

def biclustering(data,num_clusters):
	clusters = {}
	data = np.asmatrix(data)
	model = SpectralCoclustering(n_clusters=num_clusters,random_state=0)
	#model = SpectralBiclustering(n_clusters=num_clusters)
	model.fit(data)
	for c in range(num_clusters):
		clusters[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
	#fit_data = data[np.argsort(model.row_labels_)]
	#fit_data = fit_data[:, np.argsort(model.column_labels_)]
	#plot(fit_data)
	return clusters
开发者ID:sneha6791,项目名称:Thesis,代码行数:12,代码来源:current_working.py


示例8: biclustering

def biclustering(input,num_clusters):
	global agent1_dict
	data = np.matrix(input)
	model = SpectralCoclustering(n_clusters=num_clusters,random_state=0) 
	model.fit(data)
	#create agent 1 dictionary
	agent1_dict = {}
	for c in range(num_clusters): 	
		agent1_dict[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
	fit_data = data[np.argsort(model.row_labels_)]
	fit_data = fit_data[:, np.argsort(model.column_labels_)]
	plot(fit_data)
	return agent1_dict
开发者ID:sneha6791,项目名称:Thesis,代码行数:13,代码来源:music_gt_1.py


示例9: cluster_data

def cluster_data(flavors, whisky):
    corr_whisky = pd.DataFrame.corr(flavors.transpose())
    model = SpectralCoclustering(n_clusters=6, random_state=0)
    model.fit(corr_whisky)
    whisky['Group'] = pd.Series(model.row_labels_, index=whisky.index)
    whisky = whisky.ix[np.argsort(model.row_labels_)]
    whisky = whisky.reset_index(drop=True)
    correlation = pd.DataFrame.corr(whisky.iloc[:, 2:14].transpose())
    correlation = np.array(correlation)
    # print(np.sum(model.rows_, axis=1))
    # print(np.sum(model.rows_, axis=0))
    # print(model.row_labels_)
    # print(correlation)
    plot_correlations(correlation)
开发者ID:piotrbla,项目名称:pyExamples,代码行数:14,代码来源:pandas_test.py


示例10: plot_coclusters_raw_data

def plot_coclusters_raw_data(time_ms, t=False):
    # take the transpose of sliced matrix
    if t:
        channels_data = slice_matrix(matrix, time_ms)
    else:
        channels_data = slice_matrix(matrix, time_ms)
    print len(channels_data), len(channels_data[1])
    z_score = stats.zscore(channels_data)
    plt.title('Z Score Biclustering Over %i ms' % time_ms)
    spectral_model = SpectralCoclustering()
    spectral_model.fit(z_score)
    fit_data = z_score[np.argsort(spectral_model.row_labels_)]
    fit_data = fit_data[:, np.argsort(spectral_model.column_labels_)]
    plt.matshow(fit_data, cmap=plt.cm.Blues)
    plt.savefig('z_score_raw_coclustering_all_ts_%i_T_%s.svg' % (time_ms, str(t)))
开发者ID:exp0nge,项目名称:eeg-viz,代码行数:15,代码来源:z_score.py


示例11: cocluster

 def cocluster(self, mx, blockdiag=False):
     logging.info('Co-clustering Tade..')
     if blockdiag:
         logging.info('blockdiag')
         clusser = SpectralCoclustering(n_jobs=-1)
     else: # checkerboard
         logging.info('checkerboard')
         clusser = SpectralBiclustering(n_jobs=-1, n_clusters=(4,3))
         #n_clusters=3, svd_method='randomized',
     clusser.fit(mx)
     logging.info('Argsorting mx rows..')
     mx = mx[np.argsort(clusser.row_labels_)]
     self.prev = self.prev[np.argsort(clusser.row_labels_)]
     logging.info('Argsorting mx cases..')
     mx = mx[:, np.argsort(clusser.column_labels_)]
     self.case = self.case[np.argsort(clusser.column_labels_)]
     return mx
开发者ID:makrai,项目名称:misc,代码行数:17,代码来源:coclust_tade.py


示例12: main

def main():
    files = [DATA_DIR + file for file in os.listdir(DATA_DIR) if fnmatch.fnmatch(file, '*.csv')]

    for i in files:
        print('processing', i, '...')
        table = get_data(i)
        cl = SpectralCoclustering(n_clusters=2, random_state=0)
        cl.fit(table)

        # using http://scikit-learn.org/stable/auto_examples/bicluster/plot_spectral_coclustering.html
        fit_data = table[np.argsort(cl.row_labels_)]
        fit_data = fit_data[:, np.argsort(cl.column_labels_)]

        plt.matshow(fit_data, cmap=plt.cm.Reds)
        plt.title(i[len(DATA_DIR):])
        # plt.show()
        plt.savefig(i[len(DATA_DIR):-4] + '.pdf')
开发者ID:danielgeng,项目名称:cs249_data_science,代码行数:17,代码来源:biclustering.py


示例13: main

def main(model):
    store = pd.HDFStore(model)
    
    from_ = store['from_'][0][0]
    to = store['to'][0][0]
    assert from_ == 0
    
    trace_fpath = store['trace_fpath'][0][0]
    Theta_zh = store['Theta_zh'].values
    Psi_oz = store['Psi_sz'].values
    count_z = store['count_z'].values[:, 0]

    Psi_oz = Psi_oz / Psi_oz.sum(axis=0)
    Psi_zo = (Psi_oz * count_z).T
    Psi_zo = Psi_zo / Psi_zo.sum(axis=0)
    obj2id = dict(store['source2id'].values)
    hyper2id = dict(store['hyper2id'].values)
    id2obj = dict((v, k) for k, v in obj2id.items())

    ZtZ = Psi_zo.dot(Psi_oz)
    ZtZ = ZtZ / ZtZ.sum(axis=0)
    L = ZtZ
    #ZtZ[ZtZ < (ZtZ.mean())] = 0
    L[ZtZ >= 1.0 / (len(ZtZ))] = 1
    L[L != 1] = 0

    colormap = toyplot.color.brewer.map("Purples", domain_min=0, domain_max=1, reverse=True)
    print(colormap)
    canvas = toyplot.matrix((L.T, colormap), label="P[z' | z]", \
            colorshow=False, tlabel="To z'", llabel="From")[0]
    #canvas.axes(ylabel='From z', xlabel='To z\'')
    toyplot.pdf.render(canvas, 'tmat.pdf')

    model = SpectralCoclustering(n_clusters=3)
    model.fit(L)
    fit_data = L[np.argsort(model.row_labels_)]
    fit_data = fit_data[:, np.argsort(model.column_labels_)]
    canvas = toyplot.matrix((fit_data, colormap), label="P[z' | z']", \
            colorshow=False)[0]
    toyplot.pdf.render(canvas, 'tmat-cluster.pdf')
    
    #AtA = Psi_oz.dot(Psi_zo)
    #np.fill_diagonal(AtA, 0)
    #AtA = AtA / AtA.sum(axis=0)

    store.close()
开发者ID:flaviovdf,项目名称:tribeflow,代码行数:46,代码来源:tmat-toyplot.py


示例14: biclustering

def biclustering(input_list,num_clusters):
	global agent1_dict
	#clustering agent 1
	data = np.matrix(input_list)
	#plot(data)#original data
	
	#model = SpectralBiclustering(n_clusters=num_clusters) #Biclustering refer http://scikit-learn.org/stable/auto_examples/bicluster/plot_spectral_biclustering.html#example-bicluster-plot-spectral-biclustering-py

	model = SpectralCoclustering(n_clusters=num_clusters,random_state=0) #Coclustering refer http://scikit-learn.org/stable/auto_examples/bicluster/plot_spectral_coclustering.html

	model.fit(data)
	#create agent 1 dictionary
	agent1_dict = {}
	for c in range(num_clusters): 	
		agent1_dict[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
	fit_data = data[np.argsort(model.row_labels_)]
	fit_data = fit_data[:, np.argsort(model.column_labels_)]
	plot(fit_data)
	return agent1_dict
开发者ID:sneha6791,项目名称:Thesis,代码行数:19,代码来源:music_gametheory.py


示例15: SpectralCoclustering

#mM[np.where(np.logical_and(mM >= 0.25, mM < 1.25))] = 0.5
#mM[mM >= 1.25] = 1


#####

matDF = pd.DataFrame(MatOut).set_index(np.array(indx))
matDF.columns = areas


# Original plot
plt.matshow(MatOut, cmap=plt.cm.Blues)
plt.title("Original dataset")
clusters = 8 #6 

model = SpectralCoclustering(n_clusters=clusters)
#model = SpectralBiclustering(n_clusters=clusters)
model.fit(matDF)
    
    

fitData_c = matDF.columns[np.argsort(model.column_labels_)]
matDF = matDF[fitData_c]
fitData_i = matDF.index[ np.argsort(model.row_labels_)]
matDF = matDF.reindex(fitData_i)

column_names =  np.array([i[13:16] for i in fitData_c])

# plot
fig = plt.figure()
ax = fig.add_subplot(111)
开发者ID:kpokk,项目名称:Neural-Response-to-Images,代码行数:31,代码来源:biclustering.py


示例16: make_biclusters

from sklearn.datasets import make_biclusters
from sklearn.datasets import samples_generator as sg
from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.metrics import consensus_score

data, rows, columns = make_biclusters(
    shape=(300, 300), n_clusters=5, noise=5,
    shuffle=False, random_state=0)

plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Original dataset")

data, row_idx, col_idx = sg._shuffle(data, random_state=0)
plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Shuffled dataset")

model = SpectralCoclustering(n_clusters=5, random_state=0)
model.fit(data)
score = consensus_score(model.biclusters_,
                        (rows[:, row_idx], columns[:, col_idx]))

print "consensus score: {:.3f}".format(score)

fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]

plt.matshow(fit_data, cmap=plt.cm.Blues)
plt.title("After biclustering; rearranged to show biclusters")

plt.show()
开发者ID:2011200799,项目名称:scikit-learn,代码行数:30,代码来源:plot_spectral_coclustering.py


示例17: nx_graph_from_biadjacency_matrix

for row in ratings:
    user_id = user_ids.index(row[0])
    profile_id = profile_ids.index(row[1])
    user_profile_matrix[user_id,profile_id] = row[2]

#find number of users and movies in each bicluster
'''G = nx_graph_from_biadjacency_matrix(user_movie_matrix)
nx.draw(G)
plt.show()'''

#initialize and carry out clustering
K=50


scc = SpectralCoclustering(n_clusters = K,svd_method='arpack')
scc.fit(user_profile_matrix)

#labels
row_labels = scc.row_labels_
column_labels = scc.column_labels_

bicluster_num_users=np.zeros(K)
bicluster_num_profiles=np.zeros(K)

bicluster_list_users=[]

bicluster_list_profiles=[]

for i in range(K):
    bicluster_list_users.append([])
开发者ID:eburas77,项目名称:Stat640,代码行数:30,代码来源:biclusteringdating.py


示例18: SpectralCoclustering

corr_flavors = pd.DataFrame.corr(flavors)
corr_flavors

plt.figure(figsize=(10,10))
plt.pcolor(corr_flavors)
plt.colorbar()
plt.savefig('./python_case_studies/whisky/corr_flavors.pdf')

corr_whisky = pd.DataFrame.corr(flavors.transpose())
plt.figure(figsize=(10,10))
plt.pcolor(corr_whisky)
plt.axis('tight')
plt.colorbar()
plt.savefig('./python_case_studies/whisky/corr_whisky.pdf')

model = SpectralCoclustering(n_clusters=6, random_state=0)
model.fit(corr_whisky)
model.rows_

np.sum(model.rows_, axis=1)

np.sum(model.rows_, axis=0)

model.row_labels_

whisky['Group'] = pd.Series(model.row_labels_, index=whisky.index)
whisky = whisky.ix[np.argsort(model.row_labels_)]
whisky = whisky.reset_index(drop=True)

correlations = pd.DataFrame.corr(whisky.iloc[:,2:14].transpose())
correlations = np.array(correlations)
开发者ID:okcorral,项目名称:python,代码行数:31,代码来源:whiskies.py


示例19: return

    this form of dimensionality reduction, some methods may perform better.
    """
    return ("#NUMBER" if token[0].isdigit() else token for token in tokens)


class NumberNormalizingVectorizer(TfidfVectorizer):
    def build_tokenizer(self):
        tokenize = super(NumberNormalizingVectorizer, self).build_tokenizer()
        return lambda doc: list(number_normalizer(tokenize(doc)))


dir = ROOT_DIR+'\\processed_data\\'
data = pickle.load(open(dir+'FT_raw_corpus_2013.p', 'rb'));

vectorizer = NumberNormalizingVectorizer(stop_words='english', min_df=5)
cocluster = SpectralCoclustering(n_clusters= 20,
                                 svd_method='arpack', random_state=0)
kmeans = MiniBatchKMeans(n_clusters=20, batch_size=20000,
                         random_state=0)

print("Vectorizing...")
X = vectorizer.fit_transform(data)

print("Coclustering...")
start_time = time()
cocluster.fit(X)
y_cocluster = cocluster.row_labels_

print("MiniBatchKMeans...")
start_time = time()
y_kmeans = kmeans.fit_predict(X)
开发者ID:sunnyjiahui,项目名称:CS230,代码行数:31,代码来源:biclustering.py


示例20: TfidfVectorizer

listOfAbstracts = []
for paper in papers:
    if 'Abstract' in paper['MedlineCitation']['Article'].keys():
        listOfAbstracts.append(mergeAbstract(paper['MedlineCitation']['Article']['Abstract']['AbstractText']))

# Create TF-IDF matrix
vect = TfidfVectorizer(max_df = 1)
tfidf = vect.fit_transform(listOfAbstracts)



# Non-negative Matrix Factorization
num_topics = 2
num_top_words = 5
nmf = decomposition.NMF(n_components=num_topics, random_state=1)
doctopic = nmf.fit_transform(tfidf)
topic_words = []
vocab = np.array(vect.get_feature_names())

for topic in nmf.components_:
    word_idx = np.argsort(topic)[::-1][0:num_top_words]
    topic_words.append([vocab[i] for i in word_idx])

# Coclustering
cocluster = SpectralCoclustering(n_clusters=5,svd_method='arpack', random_state=0)
cocluster.fit(tfidf)
y_cocluster = cocluster.row_labels_
x_cocluster = cocluster.column_labels_

# print(np.array(vect.get_feature_names())[x_cocluster == 4])
开发者ID:valentina-s,项目名称:PubMedMining,代码行数:30,代码来源:pubmedplay.py



注:本文中的sklearn.cluster.bicluster.SpectralCoclustering类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python dbscan_.dbscan函数代码示例发布时间:2022-05-27
下一篇:
Python _k_means.csr_row_norm_l2函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap