本文整理汇总了Python中sklearn.cluster.affinity_propagation函数的典型用法代码示例。如果您正苦于以下问题:Python affinity_propagation函数的具体用法?Python affinity_propagation怎么用?Python affinity_propagation使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了affinity_propagation函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: learn_number_clusters
def learn_number_clusters(ds, method='ap'):
if method=='ap':
from sklearn import cluster, covariance
# _, labels = cluster.affinity_propagation(ds.M)
# return labels
# elif method=='ap2':
edge_model = covariance.GraphLassoCV(verbose=True)
# standardize the time series: using correlations rather than covariance
# is more efficient for structure recovery
X = ds.M.values.copy().T
X /= X.std(axis=0)
print '--- B'
edge_model.fit(X)
_, labels = cluster.affinity_propagation(edge_model.covariance_)
return labels
elif method=='rbm':
from sklearn import neural_network
model = neural_network.BernoulliRBM(n_components=100,
#random_state=0,
#n_iter=npasses,
verbose=True
)
X = model.fit_transform(ds.M)
return X
开发者ID:jjsnlee,项目名称:Kaggle_LearningSocialNetworks,代码行数:25,代码来源:kaggle_work.py
示例2: cluster_keywords
def cluster_keywords(data,min_size=2, cluster_preference=True, verbose=True):
start_cluster_idx = 0
clusters = np.zeros(len(data['labels']), dtype=int)
# create graph
label_co_occ = data['co_occ']
g = nx.from_numpy_matrix(label_co_occ)
if(cluster_preference):
# define preferences
net_sizes = np.array([np.sqrt(1+m) for m in data['label_expenses']])
M = np.percentile(label_co_occ,75.)
m = np.min(label_co_occ)
sizeM = np.percentile(net_sizes,75.)
preference = m + np.asarray([min(s,sizeM) for s in net_sizes])*((M-m)/sizeM)
for comp in sorted(nx.connected_components(g), key=len, reverse=True):
l = len(comp)
if(l >= min_size):
temp_co_occ = (label_co_occ[:,comp])[comp,:]
if(cluster_preference):
[n_clusters, temp_clusters] = affinity_propagation(temp_co_occ,
preference=preference[comp],
max_iter=500,
convergence_iter=40)
else:
[n_clusters, temp_clusters] = affinity_propagation(temp_co_occ,
max_iter=500,
convergence_iter=40)
for i in xrange(l):
clusters[comp[i]] = temp_clusters[i] + start_cluster_idx
start_cluster_idx += len(n_clusters)
if(verbose):
print('Found component of size ' + str(l) + ' and added ' \
+ str(len(n_clusters)) + ' clusters')
else:
# only one cluster for this component
if(verbose):
print('Found component of size ' + str(l) + ' so do not run affinity_propagation')
for n in comp:
clusters[n] = start_cluster_idx
start_cluster_idx += 1
return g, clusters
开发者ID:sds-dubois,项目名称:ExpenseAnalyzer,代码行数:44,代码来源:graph_tools.py
示例3: clust
def clust(vectorfile,matrixfile,clusted):
fid2fname = {}
for line in open(vectorfile) :
line = line.strip().split('\t')
fid2fname.setdefault(int(line[0]), line[1:])
N = len(fid2fname)
rowlist = []
collist = []
datalist = []
for line in open(matrixfile) :
line = line.strip().split('\t')
if len(line) < 3 : continue
f1, f2, sim = line[:3]
rowlist.append(int(f1))
collist.append(int(f2))
datalist.append(float(sim))
for id in fid2fname :
rowlist.append(int(id))
collist.append(int(id))
datalist.append(1.0)
row = np.array(rowlist)
col = np.array(collist)
data = np.array(datalist)
graph = coo_matrix((data, (row, col)), shape=(N, N))
###############################################################################
# Force the solver to be arpack, since amg is numerically
# unstable on this example
# labels = spectral_clustering(graph, n_clusters=160, eigen_solver='arpack')
_, labels = cluster.affinity_propagation(graph)
n_labels = labels.max()
print ("nlabels:",n_labels)
# for i in range(n_labels + 1):
# print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))
cluster2fid = {}
for index, lab in enumerate(labels) :
cluster2fid.setdefault(lab, [])
cluster2fid[lab].append(index)
normal_data = open("normal-data.txt", 'w')
easy_data=open("easy-data-500.txt", 'w')
for index, lab in enumerate(cluster2fid) :
for fid in cluster2fid[lab] :
strx=""
for i in range(0, len(fid2fname[fid])):
strx+=str(fid2fname[fid][i])+"\t"
print >> normal_data,strx+'\t'+str(index)
print >> easy_data,strx+'\t'+str(fid)+'\t'+str(index)
开发者ID:DvHuang,项目名称:Wifi_Clust,代码行数:56,代码来源:b.py
示例4: affinityprop
def affinityprop(correlations,names):
n_clusters=0
a,labels = cluster.affinity_propagation(correlations)
#print labels
print "Affinity Propagation Clusters"
for i in range(labels.max()+1):
print 'Cluster %i: %s' % ((i+1),
', '.join(names[labels==i]))
if len(names[labels==i]) > 1:
n_clusters+=1
print "Number of Clusters with more than 1 element: " + str(n_clusters)
return n_clusters
开发者ID:winteram,项目名称:FashionPins,代码行数:12,代码来源:spectralcluster.py
示例5: test_affinity_propagation
def test_affinity_propagation(self):
iris = datasets.load_iris()
similality = np.cov(iris.data)
df = pdml.ModelFrame(similality)
result = df.cluster.affinity_propagation()
expected = cluster.affinity_propagation(similality)
self.assertEqual(len(result), 2)
self.assert_numpy_array_almost_equal(result[0], expected[0])
self.assertTrue(isinstance(result[1], pdml.ModelSeries))
self.assert_index_equal(result[1].index, df.index)
self.assert_numpy_array_equal(result[1].values, expected[1])
开发者ID:Sandy4321,项目名称:pandas-ml,代码行数:14,代码来源:test_cluster.py
示例6: discover_clusters
def discover_clusters(var):
from sklearn import cluster, covariance
# Learn a graphical structure from the correlations
edge_model = covariance.GraphLassoCV()
edge_model.fit(var)
# Cluster using affinity propagation
_, labels = cluster.affinity_propagation(edge_model.covariance_)
n_labels = labels.max()
for i in xrange(n_labels + 1):
print 'Cluster %i: %s' % (i, \
', '.join(var.columns[labels == i]))
del cluster, covariance
return labels, edge_model.precision_.copy()
开发者ID:xiahongze,项目名称:au_finance,代码行数:15,代码来源:market_structure.py
示例7: makeClusters
def makeClusters(self, toks, cnts, rootName = 'ROOT',saveSimMat=None):
'''
'''
S = self.w2v.getSimMat(toks)
if(saveSimMat):
file_iter=open(saveSimMat,"wb")
logger=file_iter.write
logger("word|%s\n"%("|".join(toks)))
for ind,tok in enumerate(toks):
list_toks=[str(round(k,3)) for k in S[ind]]
str_join="|".join((tok,"|".join((list_toks))))
logger("%s\n"%str_join)
file_iter.close()
ctable = []
n = len(toks)
x = range(n)
ntoks = np.array(toks)
ncnts = np.array(cnts, dtype='float')
ncnts = ncnts/ncnts.sum()
k = 0
while (True):
Sk = S[np.ix_(x,x)]
ntoks = ntoks[x]
ncnts = ncnts[x]
xk, labels = cluster.affinity_propagation(Sk)
n_labels = labels.max()
for i in xrange(n_labels + 1):
cidx = labels == i
ctoks = ntoks[cidx]
ccnts = ncnts[cidx]
pidx = ccnts.argsort()[::-1][0]
cname = ntoks[xk[i]] #cluster center
clname = ctoks[pidx] #most frequent node in cluster
#temp = {'LEVEL':k, 'CLUSTER': (i+1), 'CENTER': cname, 'NAME': ' '.join(clname[:-2].split('_NG_')), 'MEMBERS': ctoks}
temp = {'LEVEL':k, 'CLUSTER': (i+1), 'CENTER': cname, 'NAME': ' '.join(cname[:-2].split('_NG_')), 'MEMBERS': ctoks}
ctable.append(temp)
k+=1
#break
x = xk
if len(xk) <= 3:
break
self.ctable = ctable
self.G = self.ctable2G_()
return ctable
开发者ID:saurabh-singh-17,项目名称:secgov,代码行数:48,代码来源:mcsa_clusters.py
示例8: cluster_measurement_points
def cluster_measurement_points(m_matrix, m_name, corr_bnd = [0.1,0.9],alg='aff'):
exemplars_dict = dict()
if m_matrix.shape[1] == 0:
return [], exemplars_dict, [], []
elif m_matrix.shape[1] == 1:
exemplars_ = [0]
labels_= [0]
exemplars_name = m_name
else:
distmat_input = find_norm_dist_matrix(m_matrix)
# Find representative set of sensor measurements
min_dist_ = np.sqrt(2*(1-(corr_bnd[1])))
max_dist_ = np.sqrt(2*(1-(corr_bnd[0])))
if alg == 'pack':
log.info('use pack clustering algoirthm')
exemplars_, labels_ = max_pack_cluster(distmat_input, min_dist=min_dist_, max_dist=max_dist_)
else:
log.info('use affinity clustering algoirthm')
SIMM_MAT = 2 - distmat_input
exemplars_, labels_ = cluster.affinity_propagation(SIMM_MAT, damping=0.5)
num_clusters = int(labels_.max()+1)
log.info('-' * 40)
log.info(str(num_clusters) + 'clusters out of ' + str(len(labels_)) + 'measurements')
log.info('-' * 40)
validity, intra_dist, inter_dist = compute_cluster_err(distmat_input, labels_)
log.info('validity: ' + str(round(validity,2)) + ', intra_dist: ' +
str(np.round(intra_dist,2)) + ', inter_dist: ' +
str(np.round(inter_dist,2)))
log.info('-' * 40)
exemplars_name = list(np.array(m_name)[exemplars_])
for label_id, (m_idx,exemplar_label) in enumerate(zip(exemplars_, exemplars_name)):
log.info(str(exemplar_label))
children_set = list(set(np.nonzero(labels_ == label_id)[0]) - set([m_idx]))
log.info('Label ' + str(label_id) + ' : ' + str(m_idx) + '<--' + str(children_set) )
exemplars_dict.update({exemplar_label : list(np.array(m_name)[children_set])})
return m_matrix[:, exemplars_], exemplars_dict, exemplars_, labels_
开发者ID:TinyOS-Camp,项目名称:DDEA-DEV,代码行数:46,代码来源:pack_cluster.py
示例9: corr_cluster
def corr_cluster(corr_file, matrix_file, cluster_dir):
corr_frame = pa.DataFrame.from_csv(corr_file).abs()
#use the abs value of corr to cluster
freq_matrix = pa.DataFrame.from_csv(matrix_file)
_, labels = cluster.affinity_propagation(corr_frame)
n_label = labels.max()
names = corr_frame.index
#output cluster file
if corr_file[-1] == "/":
cluster_file = corr_file.split("/")[-2] + "_cluster"
else:
cluster_file = corr_file.split("/")[-1] + "_cluster"
cluster_file = codecs.open(os.path.join(cluster_dir, cluster_file),
"w",
encoding="utf-8")
for i in range(n_label + 1):
#compute the average correlation between cluster and out-cluster
clus = np.array(names[labels == i])
in_cluster = corr_frame[clus].ix[clus]
up_index = np.triu_indices(len(clus), 1)
aver_corr = np.array(in_cluster)[up_index].mean()
out_clus = np.array(names[labels != i])
out_cluster = corr_frame[clus].ix[out_clus]
out_aver_corr = np.array(out_cluster).mean()
#compute the variance of the entity
in_aver_var = freq_matrix[clus].var(axis=1).mean()
obj = {"cluster": map(str, clus), "in_aver_corr": aver_corr,
"out_aver_corr": out_aver_corr,
"in_aver_var": in_aver_var}
cluster_file.write(json.dumps(obj, ensure_ascii=False) + "\n")
cluster_file.flush()
cluster_file.close()
开发者ID:Tskatom,项目名称:twitter_finance,代码行数:39,代码来源:corr_cluster.py
示例10: cluster_measurement_points
def cluster_measurement_points(m_matrix,m_name,corr_bnd=[0.1,0.9],alg='aff'):
exemplars_dict={}
if m_matrix.shape[1]==0:
return [],exemplars_dict,[],[]
elif m_matrix.shape[1]==1:
exemplars_=[0]
labels_=[0]
exemplars_name=m_name
else:
distmat_input=find_norm_dist_matrix(m_matrix)
# Find representative set of sensor measurements
min_dist_=np.sqrt(2*(1-(corr_bnd[1])))
max_dist_=np.sqrt(2*(1-(corr_bnd[0])))
if alg=='pack':
print 'use pack clustering algoirthm'
exemplars_,labels_=max_pack_cluster(distmat_input,min_dist=min_dist_,max_dist=max_dist_)
else:
print 'use affinity clustering algoirthm'
SIMM_MAT=2-distmat_input
exemplars_,labels_=cluster.affinity_propagation(SIMM_MAT,damping=0.5)
num_clusters=int(labels_.max()+1)
print '-------------------------------------------------------------------------'
print num_clusters, 'clusters out of ', len(labels_), 'measurements'
print '-------------------------------------------------------------------------'
validity,intra_dist,inter_dist=compute_cluster_err(distmat_input,labels_)
print 'validity:',round(validity,2),', intra_dist: ',np.round(intra_dist,2),', inter_dist: ',np.round(inter_dist,2)
print '-------------------------------------------------------------------------'
exemplars_name=list(np.array(m_name)[exemplars_])
for label_id,(m_idx,exemplar_label) in enumerate(zip(exemplars_,exemplars_name)):
print exemplar_label
children_set=list(set(np.nonzero(labels_==label_id)[0])-set([m_idx]))
print 'Label ', label_id, ': ',m_idx,'<--', children_set
exemplars_dict.update({exemplar_label:list(np.array(m_name)[children_set])})
return m_matrix[:,exemplars_], exemplars_dict,exemplars_,labels_
开发者ID:TinyOS-Camp,项目名称:DDEA-DEV,代码行数:37,代码来源:pack_cluster.py
示例11: affProp
def affProp(instance_path, res_folder, strategy = 2):
instances = instance_path.rsplit('/', 1)[0] + '/'
file = instance_path.rsplit('/', 1)[1]
input_type = '.' + file.rsplit('.', 1)[1]
file = file.rsplit('.', 1)[0]
data, row_names = parse.read(instances + file + input_type)
print 'Size of data matrix: ', data.shape
if len(data) <> len(row_names):
print 'Af prop error: data and row_names have diff. lens', len(data), len(row_names)
#save_matrix_fig(data, res_folder, file+'_in')
sim_matrix = []
#
try:
sim_matrix = np.load(res_folder+file+'_sim'+str(strategy)+'.npy')
print 'Sim matrix %s found.' %(res_folder+file+'_sim'+str(strategy)+'.npy')
except:
print 'Sim matrix %s NOT found!!!!' %(res_folder+file+'_sim'+str(strategy)+'.npy')
sim_matrix = pp.strategy(data, 'sim',strategy)
np.save(res_folder+file+'_sim'+str(strategy), sim_matrix)
old_n_clusters = 0
old_non_clustered = 0
# list to save labels from all iterations, so we can later pick the best clustering
res_from_diff_params = {}
nr_clusters_from_diff_params = {}
non_clustered_from_diff_params = {}
distribution_from_diff_params = {}
best_iteration = -1
sec_best_iteration = -1
n = sim_matrix.shape[0]
min_non_clusterd = n
s_min_non_clusterd = n
max_std_dev = n
sec_threshold = 0.0001
n_iterations = 20 # must be an odd number
sim_matrix[sim_matrix == 0] = -1e10
#min_preferance = 0
#min_preferance *= np.max(sim_matrix[sim_matrix > 0])
min_preferance = np.min(sim_matrix[sim_matrix > 0]) -10
max_preferance = np.median(sim_matrix[sim_matrix > 0])
print 'min_preferance, ', min_preferance
print 'max_preferance, ', max_preferance
if min_preferance > max_preferance:
raise Exception('Something is wrong with preferance setting: %d %d',
min_preferance, max_preferance)
elif min_preferance == max_preferance:
n_iterations = 1
pref_list = [min_preferance]
pref_step = (max_preferance-min_preferance) / n_iterations
# cluster the data with DBSCAN ---------------------------------------------
for iteration in range(n_iterations):
if iteration == 0:
preference = min_preferance
else:
preference += pref_step
labels = []
print '_______________________________________________________'
print 'Aff. Prop. with preferance =', preference
_, labels = affinity_propagation(sim_matrix, preference=preference)
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
num_per_cluster = {}
for i in range(n_clusters):
num_per_cluster[i] = 0
for label in labels:
for i in range(n_clusters):
if label == i:
num_per_cluster[i] += 1;
# TODO: criteria for skiping or breaking the loop ---------------------------------------------
# skip the iteration if the number of clusters is as before
if iteration == 0:
old_n_clusters = n_clusters
#elif n_clusters >= old_n_clusters:
# break
old_n_clusters = n_clusters
# increase the preferance
if n_clusters == 1:
print 'DEBUG: Aff prop. n_clusters == 1, going to next iteration'
min_preferance = preference
max_preferance += (max_preferance - min_preferance) / 2
pref_step = (max_preferance-min_preferance) / (n_iterations-iteration)
print 'min = %f, max = %f, step = %f' %(min_preferance, max_preferance, pref_step)
continue
# lower the preferance
if n_clusters >= 0.1*n:
print 'DEBUG: Aff prop. n_clusters = %i, TOO HIGH!!!' %n_clusters
max_preferance = preference
min_preferance = preference - pref_step
pref_step = (max_preferance-min_preferance) / (n_iterations-iteration)
print 'min = %f, max = %f, step = %f' %(min_preferance, max_preferance, pref_step)
#.........这里部分代码省略.........
开发者ID:igor-93,项目名称:strucutreDet,代码行数:101,代码来源:cluster_affPropagation.py
示例12: range
gl_prec = graph.precision_
gl_alphas =graph.cv_alphas_
gl_scores = np.mean(graph.grid_scores, axis=1)
plt.figure()
sns.heatmap(gl_prec)
plt.figure()
plt.plot(gl_alphas, gl_scores, marker='o', color='b', lw=2.0, label='GraphLassoCV')
plt.title("Graph Lasso Alpha Selection")
plt.xlabel("alpha")
plt.ylabel("score")
plt.legend()
#cluster using affinity propagation
_, labels = cluster.affinity_propagation(gl_cov)
num_labels = np.max(labels)
for i in range(num_labels+1):
print("Cluster %i: %s" %((i+1), ', '.join(names[labels==i])))
#find a low dim embedding for visualization
node_model = manifold.LocallyLinearEmbedding(n_components=2, n_neighbors=6, eigen_solver='dense')
embedding = node_model.fit_transform(X.T).T
#generate plots
plt.figure()
plt.clf()
ax = plt.axes([0.,0.,1.,1.])
plt.axis('off')
开发者ID:vsmolyakov,项目名称:fin,代码行数:30,代码来源:inv_cov.py
示例13: showCovariances
def showCovariances(names,variation):
###############################################################################
# Learn a graphical structure from the correlations
edge_model = covariance.GraphLassoCV()
# standardize the time series: using correlations rather than covariance
# is more efficient for structure recovery
X = variation.copy().T
X /= X.std(axis=0)
edge_model.fit(X)
###############################################################################
# Cluster using affinity propagation
_, labels = cluster.affinity_propagation(edge_model.covariance_)
n_labels = labels.max()
for i in range(n_labels + 1):
print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))
###############################################################################
# Find a low-dimension embedding for visualization: find the best position of
# the nodes (the stocks) on a 2D plane
# We use a dense eigen_solver to achieve reproducibility (arpack is
# initiated with random vectors that we don't control). In addition, we
# use a large number of neighbors to capture the large-scale structure.
node_position_model = manifold.LocallyLinearEmbedding(
n_components=2, eigen_solver='dense', n_neighbors=6)
embedding = node_position_model.fit_transform(X.T).T
###############################################################################
# Visualization
plt.figure(1, facecolor='w', figsize=(10, 8))
plt.clf()
ax = plt.axes([0., 0., 1., 1.])
plt.axis('off')
# Display a graph of the partial correlations
partial_correlations = edge_model.precision_.copy()
d = 1 / np.sqrt(np.diag(partial_correlations))
partial_correlations *= d
partial_correlations *= d[:, np.newaxis]
non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)
# Plot the nodes using the coordinates of our embedding
plt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels,
cmap=plt.cm.spectral)
# Plot the edges
start_idx, end_idx = np.where(non_zero)
#a sequence of (*line0*, *line1*, *line2*), where::
# linen = (x0, y0), (x1, y1), ... (xm, ym)
segments = [[embedding[:, start], embedding[:, stop]]
for start, stop in zip(start_idx, end_idx)]
values = np.abs(partial_correlations[non_zero])
lc = LineCollection(segments,
zorder=0, cmap=plt.cm.hot_r,
norm=plt.Normalize(0, .7 * values.max()))
lc.set_array(values)
lc.set_linewidths(15 * values)
ax.add_collection(lc)
# Add a label to each node. The challenge here is that we want to
# position the labels to avoid overlap with other labels
for index, (name, label, (x, y)) in enumerate(
zip(names, labels, embedding.T)):
dx = x - embedding[0]
dx[index] = 1
dy = y - embedding[1]
dy[index] = 1
this_dx = dx[np.argmin(np.abs(dy))]
this_dy = dy[np.argmin(np.abs(dx))]
if this_dx > 0:
horizontalalignment = 'left'
x = x + .002
else:
horizontalalignment = 'right'
x = x - .002
if this_dy > 0:
verticalalignment = 'bottom'
y = y + .002
else:
verticalalignment = 'top'
y = y - .002
plt.text(x, y, name, size=10,
horizontalalignment=horizontalalignment,
verticalalignment=verticalalignment,
bbox=dict(facecolor='w',
edgecolor=plt.cm.spectral(label / float(n_labels)),
alpha=.6))
plt.xlim(embedding[0].min() - .15 * embedding[0].ptp(),
embedding[0].max() + .10 * embedding[0].ptp(),)
plt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),
embedding[1].max() + .03 * embedding[1].ptp())
#.........这里部分代码省略.........
开发者ID:ravenshooter,项目名称:BA_Analysis,代码行数:101,代码来源:TimeSeriesAnalysis.py
示例14: dailyStockClusters
#.........这里部分代码省略.........
except:
print "Cant find ", ticker
symbols_edit = []
names_edit = []
for i, ticker in enumerate( symbols ):
if True in np.isnan(np.array(qclose[ticker])).tolist():
print ticker, " nans found, ticker removed"
del qclose[ticker]
del qopen[ticker]
else:
symbols_edit.append(ticker)
names_edit.append( names[i] )
# The daily variations of the quotes are what carry most information
variation = qclose - qopen
variation[ np.isnan(variation) ] = 0.
###############################################################################
# Learn a graphical structure from the correlations
edge_model = covariance.GraphLassoCV()
# standardize the time series: using correlations rather than covariance
# is more efficient for structure recovery
X = variation.copy()
#X = variation.copy().T
X /= X.std(axis=0)
edge_model.fit(X)
###############################################################################
# Cluster using affinity propagation
_, labels = cluster.affinity_propagation(edge_model.covariance_)
n_labels = labels.max()
for i in range(n_labels + 1):
print "Cluster "+str(i)+":"
for j in range(len(labels)):
if labels[j] == i:
print " ... "+names_edit[j]
#print('Cluster %i: %s' % ((i + 1), ', '.join(names_edit[labels == i])))
for i in range(n_labels + 1):
print "Cluster "+str(i)+":"
for j in range(len(labels)):
if labels[j] == i:
print " ... "+names_edit[j]
figure7path = 'Clustered_companyNames.png' # re-set to name without full path
figure7_htmlText = "\n<br><h3>Daily stock clustering analyis. Based on one year performance correlations.</h3>\n"
figure7_htmlText = figure7_htmlText + "\nClustering based on daily variation in Nasdaq 100 quotes.\n"
figure7_htmlText = figure7_htmlText + '''<br><img src="'''+figure7path+'''" alt="PyTAAA by DonaldPG" width="850" height="500"><br>\n'''
###############################################################################
# Find a low-dimension embedding for visualization: find the best position of
# the nodes (the stocks) on a 2D plane
# We use a dense eigen_solver to achieve reproducibility (arpack is
# initiated with random vectors that we don't control). In addition, we
# use a large number of neighbors to capture the large-scale structure.
node_position_model = manifold.LocallyLinearEmbedding(
n_components=2, eigen_solver='dense', n_neighbors=6)
embedding = node_position_model.fit_transform(X.T).T
开发者ID:DonaldPG,项目名称:PyTAAA,代码行数:67,代码来源:stock_cluster.py
示例15: clusterSymbol
#.........这里部分代码省略.........
# for q in quotes2:
# npquotesClose.append(q['Close'].values)
# npquotesOpen = np.array([q['Open'].values for q in quotes2])
# open2 = npquotesOpen
# npquotesClose = np.array([q['Close'].values for q in quotes2])
# close2 = npquotesClose
# print npquotesOpen
# print npquotesClose
variation = (close2 - open2)
symbol_dict = dict(zip(codearrs,nametitles))
symbols, names = np.array(symbol_dict.items()).T
edge_model = covariance.GraphLassoCV()
# standardize the time series: using correlations rather than covariance
# is more efficient for structure recovery
tempX = variation.T
# print tempX,'tempX len',len(tempX)
X = variation.copy().T
# print 'open len',len(open2),'close len',len(close2),'variation len',len(variation),'X len',len(X)
print 'type open',type(open2),'type close',type(close2),'type variation',type(variation),'type X',type(X)
print 'shape open',open2.shape,'shape close',close2.shape,'shape variation',variation.shape,'shape X',X.shape
X /= X.std(axis=0)
edge_model.fit(X)
# ###############################################################################
# # Cluster using affinity propagation
_, labels = cluster.affinity_propagation(edge_model.covariance_)
n_labels = labels.max()
# print names
# print 'type symbols',type(symbols),'type names',type(names)
# for name in names:
# print 'name',name
# print names[0],names[1],names[2],names[3]
# print 'lables',labels,'n_labels',n_labels,'type labels',type(labels)
randomtitles = pd.DataFrame()
for i in range(n_labels+1):
# print labels == i
print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))
if 1 < len(names[labels==i]) <= 3:
# print 'random cluster ',np.random.choice(names[labels==i],3)
tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],1)})
randomtitles = pd.concat([tmpdf, randomtitles])
elif 3 < len(names[labels==i]) <= 5:
tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],2)})
randomtitles = pd.concat([tmpdf, randomtitles])
elif 5 < len(names[labels==i]) <= 7:
tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],4)})
randomtitles = pd.concat([tmpdf, randomtitles])
elif 7 < len(names[labels==i]) :
tmpdf = pd.DataFrame({'title':np.random.choice(names[labels==i],5)})
randomtitles = pd.concat([tmpdf, randomtitles])
# print randomtitles
# for i in range(n_labels + 1):
# print 'Cluster '+str(i + 1)+', '+ names[labels == i]
# ###############################################################################
开发者ID:HGboda,项目名称:AlgorithmTrading,代码行数:67,代码来源:cluster_ver2_0.py
示例16: range
"WMT": "Wal-Mart",
"WAG": "Walgreen",
"HD": "Home Depot",
"GSK": "GlaxoSmithKline",
"PFE": "Pfizer",
"SNY": "Sanofi-Aventis",
"NVS": "Novartis",
"KMB": "Kimberly-Clark",
"R": "Ryder",
"GD": "General Dynamics",
"RTN": "Raytheon",
"CVS": "CVS",
"CAT": "Caterpillar",
"DD": "DuPont de Nemours",
}
symbols, names = np.array(symbol_dict.items()).T
quotes = [finance.quotes_historical_yahoo(symbol, d1, d2, asobject=True) for symbol in symbols]
# volumes = np.array([q.volume for q in quotes]).astype(np.float)
open = np.array([q.open for q in quotes]).astype(np.float)
close = np.array([q.close for q in quotes]).astype(np.float)
variation = close - open
correlations = np.corrcoef(variation)
_, labels = cluster.affinity_propagation(correlations)
for i in range(labels.max() + 1):
print "Cluster %i: %s" % ((i + 1), ", ".join(names[labels == i]))
开发者ID:yikuizhai,项目名称:scikit-learn,代码行数:30,代码来源:stock_market.py
示例17: range
DIST_MAT[i,j]=sqrt(norm(sample1-sample2))
cov_mat=COV_MAT
corr_mat=(np.diag(cov_mat)**(-0.5))*cov_mat*(np.diag(cov_mat)**(-0.5))
################################################################################
# Unsupervised clustering for sensors given the measurement correlation
# Find only a few represetative sensors out of many sensors
################################################################################
# exemplars are a set of representative signals for each cluster
# Smaller dampding input will generate more clusers, default is 0.5
# 0.5 <= damping <=0.99
################################################################################
#exemplars, labels = cluster.affinity_propagation(cov_mat,damping=0.5)
exemplars, labels = cluster.affinity_propagation(cov_mat)
n_labels = labels.max()
for i in range(n_labels + 1):
print('Cluster %i: %s' % ((i + 1), ', '.join(input_names[labels == i])))
###############################################################################
# Find a low-dimension embedding for visualization: find the best position of
# the nodes (the stocks) on a 2D plane
# We use a dense eigen_solver to achieve reproducibility (arpack is
# initiated with random vectors that we don't control). In addition, we
# use a large number of neighbors to capture the large-scale structure.
node_position_model = manifold.LocallyLinearEmbedding(
n_components=2, eigen_solver='dense', n_neighbors=3)
开发者ID:TinyOS-Camp,项目名称:DDEA-DEV,代码行数:31,代码来源:df_data_analysis.py
示例18: CLUSTERING_TEST
def CLUSTERING_TEST(distmat_input,min_corr=0.1,max_corr=0.9):
################################################################################
# Unsupervised clustering for sensors given the normalized euclidian distance
# of sensor data
# Find only a few represetative sensors out of many sensors
################################################################################
# exemplars are a set of representative signals for each cluster
# Smaller dampding input will generate more clusers, default is 0.5
# 0.5 <= damping <=0.99
################################################################################
print '==========================================================='
print 'Clustering Test'
print '==========================================================='
print 'Pack Clustering'
print '---------------------------'
min_dist_=np.sqrt(2*(1-(max_corr)))
max_dist_=np.sqrt(2*(1-(min_corr)))
pack_exemplars,pack_labels=max_pack_cluster(distmat_input,min_dist=min_dist_,max_dist=max_dist_)
pack_num_clusters=int(pack_labels.max()+1)
print '-------------------------------------------------------------------------'
print pack_num_clusters, 'clusters out of ', len(pack_labels), 'measurements'
print '-------------------------------------------------------------------------'
validity,intra_dist,inter_dist=compute_cluster_err(distmat_input,pack_labels)
print 'validity:',round(validity,2),', intra_dist: ',np.round(intra_dist,2),', inter_dist: ',np.round(inter_dist,2)
print '-------------------------------------------------------------------------'
max_num_clusters=pack_num_clusters
print 'Heirachical Clustering'
print '---------------------------'
ward_validity_log=[];
ward_intra_dist_log=[];
ward_inter_dist_log=[];
ward_num_clusters_log=[]
for k in range(2,max_num_clusters+1):
start_time = time.time()
ward = Ward(n_clusters=k).fit(distmat_input.T)
exec_time=time.time() - start_time
print exec_time, ' secs'
ward_labels=ward.labels_
ward_validity,ward_intra_dist,ward_inter_dist=compute_cluster_err(distmat_input,ward_labels)
ward_num_clusters=int(ward_labels.max()+1)
ward_validity_log.append(ward_validity);
ward_intra_dist_log.append(list(ward_intra_dist));
ward_inter_dist_log.append(list(ward_inter_dist));
ward_num_clusters_log.append(ward_num_clusters)
ward_intra_dist_log=np.array(ward_intra_dist_log);
ward_inter_dist_log=np.array(ward_inter_dist_log)
print 'K-Mean Clustering'
print '---------------------------'
kmean_validity_log=[];
kmean_intra_dist_log=[];
kmean_inter_dist_log=[];
kmean_num_clusters_log=[]
for k in range(2,max_num_clusters+1):
start_time = time.time()
kmean=KMeans(n_clusters=k).fit(distmat_input.T)
exec_time=time.time() - start_time
print exec_time, ' secs'
kmean_labels=kmean.labels_
kmean_validity,kmean_intra_dist,kmean_inter_dist=compute_cluster_err(distmat_input,kmean_labels)
kmean_num_clusters=int(kmean_labels.max()+1)
kmean_validity_log.append(kmean_validity);
kmean_intra_dist_log.append(list(kmean_intra_dist));
kmean_inter_dist_log.append(list(kmean_inter_dist));
kmean_num_clusters_log.append(kmean_num_clusters)
kmean_intra_dist_log=np.array(kmean_intra_dist_log);
kmean_inter_dist_log=np.array(kmean_inter_dist_log)
print 'Affinity Clustering'
print '---------------------------'
SIMM_MAT=2-distmat_input
start_time = time.time()
aff_exemplars, aff_labels = cluster.affinity_propagation(SIMM_MAT,damping=0.5)
exec_time=time.time() - start_time
print exec_time, ' secs'
aff_num_clusters=int(aff_labels.max()+1)
aff_validity,aff_intra_dist,aff_inter_dist=compute_cluster_err(distmat_input,aff_labels)
fig = plt.figure('Intra_dist')
fig.suptitle('Intra_dist')
plot(pack_num_clusters,intra_dist[0],'s',label='pack')
plot(pack_num_clusters,intra_dist[1],'s',label='pack')
plot(pack_num_clusters,intra_dist[2],'s',label='pack')
plot(ward_num_clusters_log,ward_intra_dist_log[:,0],'-+',label='ward')
plot(ward_num_clusters_log,ward_intra_dist_log[:,1],'-+',label='ward')
plot(ward_num_clusters_log,ward_intra_dist_log[:,2],'-+',label='ward')
plot(kmean_num_clusters_log,kmean_intra_dist_log[:,0],'-v',label='kmean')
plot(kmean_num_clusters_log,kmean_intra_dist_log[:,1],'-v',label='kmean')
plot(kmean_num_clusters_log,kmean_intra_dist_log[:,2],'-v',label='kmean')
plot(aff_num_clusters,aff_intra_dist[0],'*',label='aff')
plot(aff_num_clusters,aff_intra_dist[1],'*',label='aff')
plot(aff_num_clusters,aff_intra_dist[2],'*',label='aff')
#.........这里部分代码省略.........
开发者ID:TinyOS-Camp,项目名称:DDEA-DEV,代码行数:101,代码来源:pack_cluster.py
示例19: getStockMarketStructure
def getStockMarketStructure(symbol_dict):
# Choose a time period reasonnably calm (not too long ago so that we get
# high-tech firms, and before the 2008 crash)
d1 = datetime.datetime(2009, 1, 1)
d2 = datetime.datetime(2011, 1, 1)
#d1 = datetime.datetime.now() - timedelta(days=365*2)
#d2 = datetime.datetime.now()- timedelta(days=1)
# kraft symbol has now changed from KFT to MDLZ in yahoo
symbols, names = np.array(list(symbol_dict.items())).T
quotes = [finance.quotes_historical_yahoo(symbol, d1, d2, asobject=True)
for symbol in symbols]
open = np.array([q.open for q in quotes]).astype(np.float)
close = np.array([q.close for q in quotes]).astype(np.float)
# The daily variations of the quotes are what carry most information
variation = close - open
###############################################################################
# Learn a graphical structure from the correlations
edge_model = covariance.GraphLassoCV()
# standardize the time series: using correlations rather than covariance
# is more efficient for structure recovery
X = variation.copy().T
X /= X.std(axis=0)
edge_model.fit(X)
###############################################################################
# Cluster using affinity propagation
_, labels = cluster.affinity_propagation(edge_model.covariance_)
n_labels = labels.max()
for i in range(n_labels + 1):
print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))
###############################################################################
# Find a low-dimension
|
请发表评论