本文整理汇总了Python中sklearn.cluster.bicluster.SpectralCoclustering类的典型用法代码示例。如果您正苦于以下问题:Python SpectralCoclustering类的具体用法?Python SpectralCoclustering怎么用?Python SpectralCoclustering使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SpectralCoclustering类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: print_similarity_matrix
def print_similarity_matrix(sphns, model, model2=None):
print " ",
for phn1 in sphns:
print phn1, " ",
print ""
m = np.ndarray((len(sphns), len(sphns)), dtype=np.float32)
for i, phn1 in enumerate(sphns):
print phn1.ljust(4) + ":",
for j, phn2 in enumerate(sphns):
sim = model.similarity(phn1, phn2)
if model2 != None:
sim -= model2.similarity(phn1, phn2)
print "%0.2f" % sim,
m[i][j] = sim
print ""
phn_order = [phn for phn in sphns]
if BICLUSTER:
#model = SpectralBiclustering(n_clusters=4, method='log',
model = SpectralCoclustering(n_clusters=n_clusters,
random_state=0)
model.fit(m)
print "INDICES:",
indices = [model.get_indices(i) for i in xrange(n_clusters)]
print indices
tmp = []
for i in xrange(n_clusters):
tmp.extend([phn_order[indices[i][0][j]] for j in xrange(len(indices[i][0]))])
phn_order = tmp
fit_data = m[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]
m = fit_data
return phn_order, m
开发者ID:cequencer,项目名称:speech_embeddings,代码行数:34,代码来源:train_word2vec.py
示例2: test_spectral_coclustering
def test_spectral_coclustering():
"""Test Dhillon's Spectral CoClustering on a simple problem."""
param_grid = {'svd_method': ['randomized', 'arpack'],
'n_svd_vecs': [None, 20],
'mini_batch': [False, True],
'init': ['k-means++'],
'n_init': [10],
'n_jobs': [1]}
random_state = 0
S, rows, cols = make_biclusters((30, 30), 3, noise=0.5,
random_state=random_state)
S -= S.min() # needs to be nonnegative before making it sparse
S = np.where(S < 1, 0, S) # threshold some values
for mat in (S, csr_matrix(S)):
for kwargs in ParameterGrid(param_grid):
model = SpectralCoclustering(n_clusters=3,
random_state=random_state,
**kwargs)
model.fit(mat)
assert_equal(model.rows_.shape, (3, 30))
assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
assert_equal(consensus_score(model.biclusters_,
(rows, cols)), 1)
开发者ID:ChicoQ,项目名称:scikit-learn,代码行数:25,代码来源:test_spectral.py
示例3: main
def main():
origin = open('10k.txt', 'r')
lines = origin.readlines()
x = []
label = []
for l in lines:
l = l.split(',')
ip1 = l[2].split('.')
ip2 = l[3].split('.')
d = [datetime.fromtimestamp(int(l[1][0:11])).hour, int("%02x%02x%02x%02x"%(int(ip1[0]),int(ip1[1]),int(ip1[2]),int(ip1[3])),16), int("%02x%02x%02x%02x" % (int(ip2[0]),int(ip2[1]),int(ip2[2]),int(ip2[3])),16)] + l[4:6] + l[7:10]
x.append(d)
data = np.array(x, dtype='float32')
model = SpectralCoclustering(n_clusters=5)
model.fit(data)
print model.rows_
for i in range(5):
print "Cluster" + str(i) + ':'
for j in range(10000):
if model.rows_[i][j]:
print j,
print ' '
开发者ID:royxue,项目名称:KDD99_Coclustering,代码行数:28,代码来源:niara_cluster.py
示例4: find_disjoint_biclusters
def find_disjoint_biclusters(self, biclusters_number=50):
data = np.asarray_chkfinite(self.matrix)
data[data == 0] = 0.000001
coclustering = SpectralCoclustering(n_clusters=biclusters_number, random_state=0)
coclustering.fit(data)
biclusters = set()
for i in range(biclusters_number):
rows, columns = coclustering.get_indices(i)
row_set = set(rows)
columns_set = set(columns)
if len(row_set) > 0 and len(columns_set) > 0:
density = self._calculate_box_cluster_density(row_set, columns_set)
odd_columns = set()
for column in columns_set:
col_density = self._calculate_column_density(column, row_set)
if col_density < density / 4:
odd_columns.add(column)
columns_set.difference_update(odd_columns)
if len(columns_set) == 0:
continue
odd_rows = set()
for row in row_set:
row_density = self._calculate_row_density(row, columns_set)
if row_density < density / 4:
odd_rows.add(row)
row_set.difference_update(odd_rows)
if len(row_set) > 0 and len(columns_set) > 0:
density = self._calculate_box_cluster_density(row_set, columns_set)
biclusters.add(Bicluster(row_set, columns_set, density))
return biclusters
开发者ID:luntos,项目名称:bianalyzer,代码行数:34,代码来源:spectral_coclustering.py
示例5: biclustering
def biclustering(input,num_clusters):
global agent1_dict
data = np.matrix(input)
model = SpectralCoclustering(n_clusters=num_clusters,random_state=0)
model.fit(data)
#create agent 1 dictionary
agent1_dict = {}
for c in range(num_clusters):
agent1_dict[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
return agent1_dict
开发者ID:sneha6791,项目名称:Thesis,代码行数:10,代码来源:music_gt_2_a2.py
示例6: main
def main():
origin = open('kddcup.txt', 'r')
lines = origin.readlines()
x = []
label = []
for l in lines:
l = l.split(',')
d = l[0:1] + l[4:19] + l[21:-1]
label.append(l[-1])
x.append(d)
data = np.array(x, dtype='float32')
model = SpectralCoclustering(n_clusters=5)
model.fit(data)
evaluation = []
draw_n_x = []
draw_n_y = []
draw_a_x = []
draw_a_y = []
for cluster in model.rows_:
normal = 0.0
attack = 0.0
graph_x = []
graph_y = []
for idx in range(len(cluster)):
if cluster[idx]:
if label[idx] == 'normal.\n':
normal += 1
else:
attack += 1
graph_x.append(data[27])
graph_y.append(data[30])
evaluation.append(normal / (normal + attack))
if normal > attack:
draw_n_x += graph_x
draw_n_y += graph_y
else:
draw_a_x += graph_x
draw_a_y += graph_y
pl.plot(draw_n_x, draw_n_y, 'ro')
pl.plot(draw_a_x, draw_a_y, 'go')
print evaluation
pl.show()
开发者ID:royxue,项目名称:KDD99_Coclustering,代码行数:53,代码来源:cluster_data.py
示例7: biclustering
def biclustering(data,num_clusters):
clusters = {}
data = np.asmatrix(data)
model = SpectralCoclustering(n_clusters=num_clusters,random_state=0)
#model = SpectralBiclustering(n_clusters=num_clusters)
model.fit(data)
for c in range(num_clusters):
clusters[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
#fit_data = data[np.argsort(model.row_labels_)]
#fit_data = fit_data[:, np.argsort(model.column_labels_)]
#plot(fit_data)
return clusters
开发者ID:sneha6791,项目名称:Thesis,代码行数:12,代码来源:current_working.py
示例8: biclustering
def biclustering(input,num_clusters):
global agent1_dict
data = np.matrix(input)
model = SpectralCoclustering(n_clusters=num_clusters,random_state=0)
model.fit(data)
#create agent 1 dictionary
agent1_dict = {}
for c in range(num_clusters):
agent1_dict[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]
plot(fit_data)
return agent1_dict
开发者ID:sneha6791,项目名称:Thesis,代码行数:13,代码来源:music_gt_1.py
示例9: cluster_data
def cluster_data(flavors, whisky):
corr_whisky = pd.DataFrame.corr(flavors.transpose())
model = SpectralCoclustering(n_clusters=6, random_state=0)
model.fit(corr_whisky)
whisky['Group'] = pd.Series(model.row_labels_, index=whisky.index)
whisky = whisky.ix[np.argsort(model.row_labels_)]
whisky = whisky.reset_index(drop=True)
correlation = pd.DataFrame.corr(whisky.iloc[:, 2:14].transpose())
correlation = np.array(correlation)
# print(np.sum(model.rows_, axis=1))
# print(np.sum(model.rows_, axis=0))
# print(model.row_labels_)
# print(correlation)
plot_correlations(correlation)
开发者ID:piotrbla,项目名称:pyExamples,代码行数:14,代码来源:pandas_test.py
示例10: plot_coclusters_raw_data
def plot_coclusters_raw_data(time_ms, t=False):
# take the transpose of sliced matrix
if t:
channels_data = slice_matrix(matrix, time_ms)
else:
channels_data = slice_matrix(matrix, time_ms)
print len(channels_data), len(channels_data[1])
z_score = stats.zscore(channels_data)
plt.title('Z Score Biclustering Over %i ms' % time_ms)
spectral_model = SpectralCoclustering()
spectral_model.fit(z_score)
fit_data = z_score[np.argsort(spectral_model.row_labels_)]
fit_data = fit_data[:, np.argsort(spectral_model.column_labels_)]
plt.matshow(fit_data, cmap=plt.cm.Blues)
plt.savefig('z_score_raw_coclustering_all_ts_%i_T_%s.svg' % (time_ms, str(t)))
开发者ID:exp0nge,项目名称:eeg-viz,代码行数:15,代码来源:z_score.py
示例11: cocluster
def cocluster(self, mx, blockdiag=False):
logging.info('Co-clustering Tade..')
if blockdiag:
logging.info('blockdiag')
clusser = SpectralCoclustering(n_jobs=-1)
else: # checkerboard
logging.info('checkerboard')
clusser = SpectralBiclustering(n_jobs=-1, n_clusters=(4,3))
#n_clusters=3, svd_method='randomized',
clusser.fit(mx)
logging.info('Argsorting mx rows..')
mx = mx[np.argsort(clusser.row_labels_)]
self.prev = self.prev[np.argsort(clusser.row_labels_)]
logging.info('Argsorting mx cases..')
mx = mx[:, np.argsort(clusser.column_labels_)]
self.case = self.case[np.argsort(clusser.column_labels_)]
return mx
开发者ID:makrai,项目名称:misc,代码行数:17,代码来源:coclust_tade.py
示例12: main
def main():
files = [DATA_DIR + file for file in os.listdir(DATA_DIR) if fnmatch.fnmatch(file, '*.csv')]
for i in files:
print('processing', i, '...')
table = get_data(i)
cl = SpectralCoclustering(n_clusters=2, random_state=0)
cl.fit(table)
# using http://scikit-learn.org/stable/auto_examples/bicluster/plot_spectral_coclustering.html
fit_data = table[np.argsort(cl.row_labels_)]
fit_data = fit_data[:, np.argsort(cl.column_labels_)]
plt.matshow(fit_data, cmap=plt.cm.Reds)
plt.title(i[len(DATA_DIR):])
# plt.show()
plt.savefig(i[len(DATA_DIR):-4] + '.pdf')
开发者ID:danielgeng,项目名称:cs249_data_science,代码行数:17,代码来源:biclustering.py
示例13: main
def main(model):
store = pd.HDFStore(model)
from_ = store['from_'][0][0]
to = store['to'][0][0]
assert from_ == 0
trace_fpath = store['trace_fpath'][0][0]
Theta_zh = store['Theta_zh'].values
Psi_oz = store['Psi_sz'].values
count_z = store['count_z'].values[:, 0]
Psi_oz = Psi_oz / Psi_oz.sum(axis=0)
Psi_zo = (Psi_oz * count_z).T
Psi_zo = Psi_zo / Psi_zo.sum(axis=0)
obj2id = dict(store['source2id'].values)
hyper2id = dict(store['hyper2id'].values)
id2obj = dict((v, k) for k, v in obj2id.items())
ZtZ = Psi_zo.dot(Psi_oz)
ZtZ = ZtZ / ZtZ.sum(axis=0)
L = ZtZ
#ZtZ[ZtZ < (ZtZ.mean())] = 0
L[ZtZ >= 1.0 / (len(ZtZ))] = 1
L[L != 1] = 0
colormap = toyplot.color.brewer.map("Purples", domain_min=0, domain_max=1, reverse=True)
print(colormap)
canvas = toyplot.matrix((L.T, colormap), label="P[z' | z]", \
colorshow=False, tlabel="To z'", llabel="From")[0]
#canvas.axes(ylabel='From z', xlabel='To z\'')
toyplot.pdf.render(canvas, 'tmat.pdf')
model = SpectralCoclustering(n_clusters=3)
model.fit(L)
fit_data = L[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]
canvas = toyplot.matrix((fit_data, colormap), label="P[z' | z']", \
colorshow=False)[0]
toyplot.pdf.render(canvas, 'tmat-cluster.pdf')
#AtA = Psi_oz.dot(Psi_zo)
#np.fill_diagonal(AtA, 0)
#AtA = AtA / AtA.sum(axis=0)
store.close()
开发者ID:flaviovdf,项目名称:tribeflow,代码行数:46,代码来源:tmat-toyplot.py
示例14: biclustering
def biclustering(input_list,num_clusters):
global agent1_dict
#clustering agent 1
data = np.matrix(input_list)
#plot(data)#original data
#model = SpectralBiclustering(n_clusters=num_clusters) #Biclustering refer http://scikit-learn.org/stable/auto_examples/bicluster/plot_spectral_biclustering.html#example-bicluster-plot-spectral-biclustering-py
model = SpectralCoclustering(n_clusters=num_clusters,random_state=0) #Coclustering refer http://scikit-learn.org/stable/auto_examples/bicluster/plot_spectral_coclustering.html
model.fit(data)
#create agent 1 dictionary
agent1_dict = {}
for c in range(num_clusters):
agent1_dict[c] = model.get_indices(c)[0].tolist() #0 row indices, 1 column indices
fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]
plot(fit_data)
return agent1_dict
开发者ID:sneha6791,项目名称:Thesis,代码行数:19,代码来源:music_gametheory.py
示例15: SpectralCoclustering
#mM[np.where(np.logical_and(mM >= 0.25, mM < 1.25))] = 0.5
#mM[mM >= 1.25] = 1
#####
matDF = pd.DataFrame(MatOut).set_index(np.array(indx))
matDF.columns = areas
# Original plot
plt.matshow(MatOut, cmap=plt.cm.Blues)
plt.title("Original dataset")
clusters = 8 #6
model = SpectralCoclustering(n_clusters=clusters)
#model = SpectralBiclustering(n_clusters=clusters)
model.fit(matDF)
fitData_c = matDF.columns[np.argsort(model.column_labels_)]
matDF = matDF[fitData_c]
fitData_i = matDF.index[ np.argsort(model.row_labels_)]
matDF = matDF.reindex(fitData_i)
column_names = np.array([i[13:16] for i in fitData_c])
# plot
fig = plt.figure()
ax = fig.add_subplot(111)
开发者ID:kpokk,项目名称:Neural-Response-to-Images,代码行数:31,代码来源:biclustering.py
示例16: make_biclusters
from sklearn.datasets import make_biclusters
from sklearn.datasets import samples_generator as sg
from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.metrics import consensus_score
data, rows, columns = make_biclusters(
shape=(300, 300), n_clusters=5, noise=5,
shuffle=False, random_state=0)
plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Original dataset")
data, row_idx, col_idx = sg._shuffle(data, random_state=0)
plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Shuffled dataset")
model = SpectralCoclustering(n_clusters=5, random_state=0)
model.fit(data)
score = consensus_score(model.biclusters_,
(rows[:, row_idx], columns[:, col_idx]))
print "consensus score: {:.3f}".format(score)
fit_data = data[np.argsort(model.row_labels_)]
fit_data = fit_data[:, np.argsort(model.column_labels_)]
plt.matshow(fit_data, cmap=plt.cm.Blues)
plt.title("After biclustering; rearranged to show biclusters")
plt.show()
开发者ID:2011200799,项目名称:scikit-learn,代码行数:30,代码来源:plot_spectral_coclustering.py
示例17: nx_graph_from_biadjacency_matrix
for row in ratings:
user_id = user_ids.index(row[0])
profile_id = profile_ids.index(row[1])
user_profile_matrix[user_id,profile_id] = row[2]
#find number of users and movies in each bicluster
'''G = nx_graph_from_biadjacency_matrix(user_movie_matrix)
nx.draw(G)
plt.show()'''
#initialize and carry out clustering
K=50
scc = SpectralCoclustering(n_clusters = K,svd_method='arpack')
scc.fit(user_profile_matrix)
#labels
row_labels = scc.row_labels_
column_labels = scc.column_labels_
bicluster_num_users=np.zeros(K)
bicluster_num_profiles=np.zeros(K)
bicluster_list_users=[]
bicluster_list_profiles=[]
for i in range(K):
bicluster_list_users.append([])
开发者ID:eburas77,项目名称:Stat640,代码行数:30,代码来源:biclusteringdating.py
示例18: SpectralCoclustering
corr_flavors = pd.DataFrame.corr(flavors)
corr_flavors
plt.figure(figsize=(10,10))
plt.pcolor(corr_flavors)
plt.colorbar()
plt.savefig('./python_case_studies/whisky/corr_flavors.pdf')
corr_whisky = pd.DataFrame.corr(flavors.transpose())
plt.figure(figsize=(10,10))
plt.pcolor(corr_whisky)
plt.axis('tight')
plt.colorbar()
plt.savefig('./python_case_studies/whisky/corr_whisky.pdf')
model = SpectralCoclustering(n_clusters=6, random_state=0)
model.fit(corr_whisky)
model.rows_
np.sum(model.rows_, axis=1)
np.sum(model.rows_, axis=0)
model.row_labels_
whisky['Group'] = pd.Series(model.row_labels_, index=whisky.index)
whisky = whisky.ix[np.argsort(model.row_labels_)]
whisky = whisky.reset_index(drop=True)
correlations = pd.DataFrame.corr(whisky.iloc[:,2:14].transpose())
correlations = np.array(correlations)
开发者ID:okcorral,项目名称:python,代码行数:31,代码来源:whiskies.py
示例19: return
this form of dimensionality reduction, some methods may perform better.
"""
return ("#NUMBER" if token[0].isdigit() else token for token in tokens)
class NumberNormalizingVectorizer(TfidfVectorizer):
def build_tokenizer(self):
tokenize = super(NumberNormalizingVectorizer, self).build_tokenizer()
return lambda doc: list(number_normalizer(tokenize(doc)))
dir = ROOT_DIR+'\\processed_data\\'
data = pickle.load(open(dir+'FT_raw_corpus_2013.p', 'rb'));
vectorizer = NumberNormalizingVectorizer(stop_words='english', min_df=5)
cocluster = SpectralCoclustering(n_clusters= 20,
svd_method='arpack', random_state=0)
kmeans = MiniBatchKMeans(n_clusters=20, batch_size=20000,
random_state=0)
print("Vectorizing...")
X = vectorizer.fit_transform(data)
print("Coclustering...")
start_time = time()
cocluster.fit(X)
y_cocluster = cocluster.row_labels_
print("MiniBatchKMeans...")
start_time = time()
y_kmeans = kmeans.fit_predict(X)
开发者ID:sunnyjiahui,项目名称:CS230,代码行数:31,代码来源:biclustering.py
示例20: TfidfVectorizer
listOfAbstracts = []
for paper in papers:
if 'Abstract' in paper['MedlineCitation']['Article'].keys():
listOfAbstracts.append(mergeAbstract(paper['MedlineCitation']['Article']['Abstract']['AbstractText']))
# Create TF-IDF matrix
vect = TfidfVectorizer(max_df = 1)
tfidf = vect.fit_transform(listOfAbstracts)
# Non-negative Matrix Factorization
num_topics = 2
num_top_words = 5
nmf = decomposition.NMF(n_components=num_topics, random_state=1)
doctopic = nmf.fit_transform(tfidf)
topic_words = []
vocab = np.array(vect.get_feature_names())
for topic in nmf.components_:
word_idx = np.argsort(topic)[::-1][0:num_top_words]
topic_words.append([vocab[i] for i in word_idx])
# Coclustering
cocluster = SpectralCoclustering(n_clusters=5,svd_method='arpack', random_state=0)
cocluster.fit(tfidf)
y_cocluster = cocluster.row_labels_
x_cocluster = cocluster.column_labels_
# print(np.array(vect.get_feature_names())[x_cocluster == 4])
开发者ID:valentina-s,项目名称:PubMedMining,代码行数:30,代码来源:pubmedplay.py
注:本文中的sklearn.cluster.bicluster.SpectralCoclustering类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论