本文整理汇总了Python中numpy.triu_indices_from函数的典型用法代码示例。如果您正苦于以下问题:Python triu_indices_from函数的具体用法?Python triu_indices_from怎么用?Python triu_indices_from使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了triu_indices_from函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: add_stars
def add_stars(ax, P_mat, tri=True):
'''
Use the p matrix to add stars to the significant cells.
If triangle is True then only put stars in the lower triangle, otherwise
put them in all the cells
'''
# Import what you need
import numpy as np
# Get the indices you need
if tri:
i_inds, j_inds = np.triu_indices_from(P_mat, k=0)
else:
i_inds, j_inds = np.triu_indices_from(P_mat, k=P_mat.shape[0]*-1)
# Loop through all the measures and fill the arrays
for i, j in zip(i_inds, j_inds):
# Figure out the text you're going to put on the plot
star = ''
if 0.01 < P_mat[i,j] < 0.05:
star = '*'
elif 0.001 <= P_mat[i,j] < 0.01:
star = '**'
elif P_mat[i,j] < 0.001:
star = '***'
text = ax.text(i, j, star,
horizontalalignment='center',
verticalalignment='center',
color = 'k')
return ax
开发者ID:KirstieJane,项目名称:DESCRIBING_DATA,代码行数:33,代码来源:create_correlation_matrix.py
示例2: get_measurement_polynomials
def get_measurement_polynomials(self, noise = 0, seed = 0):
np.random.seed(seed)
k, d = self.k, self.d
params = self.get_parameters()
R = ring([x for x, _ in params], RR)[0]
names = {str(x) : R(x) for x in R.symbols}
xs = array([[names[self.x(i,j)] for j in xrange(k)] for i in xrange(d)])
params = [(names[x], v) for x, v in params]
# Second order moments (TODO: 3rd order moments)
P = zeros((d,d), dtype=np.object)
p = zeros((d,), dtype=np.object)
for i in xrange(d):
p[i] = sum(xs[i,k_] for k_ in xrange(k))# / k
for j in xrange(i, d):
P[i,j] = sum(xs[i,k_] * xs[j,k_] for k_ in xrange(k))# / k
# Project and profit
m = zeros((d,))
M = zeros((d,d))
for i in xrange(d):
m[i] = p[i].evaluate(params)
for j in xrange(i, d):
M[i,j] = P[i,j].evaluate(params)
M = M + noise * np.random.randn(d,d)
m = m + noise * np.random.randn(d)
# TODO: Something is wrong here
#m = M.sum(1)
# Finally return values.
return R, [f - f_
for f, f_ in zip(p.flatten(), m.flatten())] + [f - f_
for f, f_ in zip(P[triu_indices_from(P)], M[triu_indices_from(M)])]
开发者ID:sidaw,项目名称:polymom,代码行数:34,代码来源:examples.py
示例3: _expected_kid_and_std
def _expected_kid_and_std(real_imgs, gen_imgs, max_block_size=1024):
n_r, dim = real_imgs.shape
n_g = gen_imgs.shape[0]
n_blocks = int(np.ceil(max(n_r, n_g) / max_block_size))
sizes_r = np.full(n_blocks, n_r // n_blocks)
to_patch = n_r - n_blocks * (n_r // n_blocks)
if to_patch > 0:
sizes_r[-to_patch:] += 1
inds_r = np.r_[0, np.cumsum(sizes_r)]
assert inds_r[-1] == n_r
sizes_g = np.full(n_blocks, n_g // n_blocks)
to_patch = n_g - n_blocks * (n_g // n_blocks)
if to_patch > 0:
sizes_g[-to_patch:] += 1
inds_g = np.r_[0, np.cumsum(sizes_g)]
assert inds_g[-1] == n_g
ests = []
for i in range(n_blocks):
r = real_imgs[inds_r[i]:inds_r[i + 1]]
g = gen_imgs[inds_g[i]:inds_g[i + 1]]
k_rr = (np.dot(r, r.T) / dim + 1)**3
k_rg = (np.dot(r, g.T) / dim + 1)**3
k_gg = (np.dot(g, g.T) / dim + 1)**3
ests.append(-2 * k_rg.mean() +
k_rr[np.triu_indices_from(k_rr, k=1)].mean() +
k_gg[np.triu_indices_from(k_gg, k=1)].mean())
var = np.var(ests, ddof=1) if len(ests) > 1 else np.nan
return np.mean(ests), np.sqrt(var / len(ests))
开发者ID:Albert-Z-Guo,项目名称:tensorflow,代码行数:34,代码来源:classifier_metrics_test.py
示例4: plot_clustering_similarity
def plot_clustering_similarity(results, plot_dir=None, verbose=False, ext='png'):
HCA = results.HCA
# get all clustering solutions
clusterings = HCA.results.items()
# plot cluster agreement across embedding spaces
names = [k for k,v in clusterings]
cluster_similarity = np.zeros((len(clusterings), len(clusterings)))
cluster_similarity = pd.DataFrame(cluster_similarity,
index=names,
columns=names)
distance_similarity = np.zeros((len(clusterings), len(clusterings)))
distance_similarity = pd.DataFrame(distance_similarity,
index=names,
columns=names)
for clustering1, clustering2 in combinations(clusterings, 2):
name1 = clustering1[0].split('-')[-1]
name2 = clustering2[0].split('-')[-1]
# record similarity of distance_df
dist_corr = np.corrcoef(squareform(clustering1[1]['distance_df']),
squareform(clustering2[1]['distance_df']))[1,0]
distance_similarity.loc[name1, name2] = dist_corr
distance_similarity.loc[name2, name1] = dist_corr
# record similarity of clustering of dendrogram
clusters1 = clustering1[1]['labels']
clusters2 = clustering2[1]['labels']
rand_score = adjusted_rand_score(clusters1, clusters2)
MI_score = adjusted_mutual_info_score(clusters1, clusters2)
cluster_similarity.loc[name1, name2] = rand_score
cluster_similarity.loc[name2, name1] = MI_score
with sns.plotting_context(context='notebook', font_scale=1.4):
clust_fig = plt.figure(figsize = (12,12))
sns.heatmap(cluster_similarity, square=True)
plt.title('Cluster Similarity: TRIL: Adjusted MI, TRIU: Adjusted Rand',
y=1.02)
dist_fig = plt.figure(figsize = (12,12))
sns.heatmap(distance_similarity, square=True)
plt.title('Distance Similarity, metric: %s' % HCA.dist_metric,
y=1.02)
if plot_dir is not None:
save_figure(clust_fig, path.join(plot_dir,
'cluster_similarity_across_measures.%s' % ext),
{'bbox_inches': 'tight'})
save_figure(dist_fig, path.join(plot_dir,
'distance_similarity_across_measures.%s' % ext),
{'bbox_inches': 'tight'})
plt.close(clust_fig)
plt.close(dist_fig)
if verbose:
# assess relationship between two measurements
rand_scores = cluster_similarity.values[np.triu_indices_from(cluster_similarity, k=1)]
MI_scores = cluster_similarity.T.values[np.triu_indices_from(cluster_similarity, k=1)]
score_consistency = np.corrcoef(rand_scores, MI_scores)[0,1]
print('Correlation between measures of cluster consistency: %.2f' \
% score_consistency)
开发者ID:IanEisenberg,项目名称:Self_Regulation_Ontology,代码行数:59,代码来源:HCA_plots.py
示例5: mat2vec
def mat2vec(m,include_diag=False):
# Hack to be compatible with matlab column-wise instead of row-wise
if include_diag:
inddown = np.triu_indices_from(m,0)
else:
inddown = np.triu_indices_from(m,1)
inddown = (inddown[1], inddown[0])
return m[inddown]
开发者ID:yassinebha,项目名称:Proteus,代码行数:9,代码来源:tseries.py
示例6: test_simple_hessenberg_trafo
def test_simple_hessenberg_trafo():
# Made up discrete time TF
G = Transfer([1., -8., 28., -58., 67., -30.],
poly([1, 2, 3., 2, 3., 4, 1 + 1j, 1 - 1j]), dt=0.1)
H, _ = hessenberg_realization(G, compute_T=1, form='c', invert=1)
assert_(not np.any(H.a[triu_indices_from(H.a, k=2)]))
assert_(not np.any(H.b[:-1, 0]))
H = hessenberg_realization(G, form='o', invert=1)
assert_(not np.any(H.c[0, :-1]))
assert_(not np.any(H.a.T[triu_indices_from(H.a, k=2)]))
开发者ID:ilayn,项目名称:harold,代码行数:10,代码来源:test_system_funcs.py
示例7: LML_se
def LML_se(self,theta,returnGradients=False):
self.setTheta(theta)
K,r = self.cov(self.X,retr=True)
Ky = K.copy()
Ky += np.eye(self.X.shape[0])*self.var_n + np.eye(self.X.shape[0])*1e-8
L = self.cholSafe(Ky)
WlogDet = 2.*np.sum(np.log(np.diag(L)))
alpha, status = dpotrs(L, self.Y, lower=1)
dataFit = - np.sum(alpha * self.Y)
modelComplexity = -self.Y.shape[1] * WlogDet
normalizer = -self.Y.size * log2pi
logMarginalLikelihood = 0.5*(dataFit + modelComplexity + normalizer)
if returnGradients == False:
return logMarginalLikelihood
else:
Wi, status = dpotri(-L, lower=1)
Wi = np.asarray(Wi)
# copy bottom triangle to top triangle
triu = np.triu_indices_from(Wi,k=1)
Wi[triu] = Wi.T[triu]
# dL = change in LML, dK is change in Kernel(K)
dL_dK = 0.5 * (np.dot(alpha,alpha.T) - self.Y.shape[1] * Wi)
dL_dVarn = np.diag(dL_dK).sum()
varfGradient = np.sum(K* dL_dK)/self.var_f
dK_dr = -r*K
dL_dr = dK_dr * dL_dK
lengthscaleGradient = -np.sum(dL_dr*r)/self.charLen
grads = np.array([varfGradient, lengthscaleGradient, dL_dVarn])
return logMarginalLikelihood, grads
开发者ID:Troy-Wilson,项目名称:ASV-Autonomous-Bathymetry,代码行数:29,代码来源:OnlineGP.py
示例8: test_pairplot_reg
def test_pairplot_reg(self):
vars = ["x", "y", "z"]
g = ag.pairplot(self.df, diag_kind="hist", kind="reg")
for ax in g.diag_axes:
nt.assert_equal(len(ax.patches), 10)
for i, j in zip(*np.triu_indices_from(g.axes, 1)):
ax = g.axes[i, j]
x_in = self.df[vars[j]]
y_in = self.df[vars[i]]
x_out, y_out = ax.collections[0].get_offsets().T
npt.assert_array_equal(x_in, x_out)
npt.assert_array_equal(y_in, y_out)
nt.assert_equal(len(ax.lines), 1)
nt.assert_equal(len(ax.collections), 2)
for i, j in zip(*np.tril_indices_from(g.axes, -1)):
ax = g.axes[i, j]
x_in = self.df[vars[j]]
y_in = self.df[vars[i]]
x_out, y_out = ax.collections[0].get_offsets().T
npt.assert_array_equal(x_in, x_out)
npt.assert_array_equal(y_in, y_out)
nt.assert_equal(len(ax.lines), 1)
nt.assert_equal(len(ax.collections), 2)
for i, j in zip(*np.diag_indices_from(g.axes)):
ax = g.axes[i, j]
nt.assert_equal(len(ax.collections), 0)
开发者ID:mwaskom,项目名称:seaborn,代码行数:33,代码来源:test_axisgrid.py
示例9: test_pairplot
def test_pairplot(self):
vars = ["x", "y", "z"]
g = ag.pairplot(self.df)
for ax in g.diag_axes:
assert len(ax.patches) > 1
for i, j in zip(*np.triu_indices_from(g.axes, 1)):
ax = g.axes[i, j]
x_in = self.df[vars[j]]
y_in = self.df[vars[i]]
x_out, y_out = ax.collections[0].get_offsets().T
npt.assert_array_equal(x_in, x_out)
npt.assert_array_equal(y_in, y_out)
for i, j in zip(*np.tril_indices_from(g.axes, -1)):
ax = g.axes[i, j]
x_in = self.df[vars[j]]
y_in = self.df[vars[i]]
x_out, y_out = ax.collections[0].get_offsets().T
npt.assert_array_equal(x_in, x_out)
npt.assert_array_equal(y_in, y_out)
for i, j in zip(*np.diag_indices_from(g.axes)):
ax = g.axes[i, j]
nt.assert_equal(len(ax.collections), 0)
g = ag.pairplot(self.df, hue="a")
n = len(self.df.a.unique())
for ax in g.diag_axes:
assert len(ax.lines) == n
assert len(ax.collections) == n
开发者ID:mwaskom,项目名称:seaborn,代码行数:34,代码来源:test_axisgrid.py
示例10: plot_corr
def plot_corr(df, size=10):
"""Function plots a graphical correlation matrix for each pair of columns in the dataframe.
Input:
df: pandas DataFrame
size: vertical and horizontal size of the plot"""
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
corr = df.corr()
label = df.corr()
mask = np.tri(corr.shape[0], k=-1)
corr = np.ma.array(corr, mask=mask)
mask[np.triu_indices_from(mask)] = True
fig, ax = plt.subplots(figsize=(size, size))
ax.matshow(corr)
cmap = cm.get_cmap("jet", 10)
cmap.set_bad("w")
plt.xticks(range(len(label.columns)), label.columns, rotation=90)
plt.yticks(range(len(label.columns)), label.columns)
ax.imshow(corr, interpolation="nearest", cmap=cmap)
plt.show()
开发者ID:PandaStabber,项目名称:rfecvNano,代码行数:25,代码来源:helperFunctions.py
示例11: __init__
def __init__(self, master, x_train, y_train, x_test, y_test, evaluator, df, console):
Tk.Frame.__init__(self, master)
self.x_train = x_train
self.y_train = y_train
self.x_test = x_test
self.y_test = y_test
self.evaluator = evaluator
self.df = df
self.console = console
frame_train = Tk.Frame(self)
frame_train.pack(fill=Tk.BOTH, expand=1, padx=15, pady=15)
plt.figure(figsize=(12, 20))
plt.subplot(111)
# 背景色白色
sns.set(style="white")
# 特征关联矩阵(矩阵里不仅包含特征,还包括类别)
corr = df.corr()
# 隐藏矩阵的上三角
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# 画图
f, ax = plt.subplots(figsize=(11, 11))
cmap = sns.diverging_palette(220, 10, as_cmap=True)
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
plt.xticks(rotation=-90)
plt.yticks(rotation=0)
plt.title("Cardiotocography \"Feature-Feature\" & \"Feature-Label\" Correlations")
self.attach_figure(plt.gcf(), frame_train)
开发者ID:vincent2610,项目名称:cancer-assessment,代码行数:30,代码来源:frame_features_corr.py
示例12: unpad_randomize_and_flatten
def unpad_randomize_and_flatten(self, cm):
"""
1. Remove zero padding on Coulomb Matrix
2. Randomly permute the rows and columns for n_samples
3. Flatten each sample to upper triangular portion
Returns list of feature vectors
"""
max_atom_number = len(cm)
atom_number = 0
for i in cm[0]:
if atom_number == max_atom_number: break
elif i != 0.: atom_number += 1
else: break
upcm = cm[0:atom_number,0:atom_number]
row_norms = np.asarray(
[np.linalg.norm(row) for row in upcm], dtype=float)
rng = np.random.RandomState(self.seed)
e = rng.normal(size=row_norms.size)
p = np.argsort(row_norms+e)
rcm = upcm[p][:,p]
rcm = pad_array(rcm, len(cm))
rcm = rcm[np.triu_indices_from(rcm)]
return rcm
开发者ID:apappu97,项目名称:deepchem,代码行数:26,代码来源:transformers.py
示例13: threshold_matrix
def threshold_matrix(M, cost):
'''
M is the full association matrix.
cost is the percentage (0 to 100) at which you'd like to threshold
threshold_matrix first creates a copy of the input matrix, then
sets all diagonal values to 0. It next calculates the minimum spanning tree,
and ensures that those edges are *always* included in the thresholded
matrix.
then sets all values below the
appropriate percentile to 0
'''
# Make a copy of the matrix
thr_M = np.copy(M)
# Set all diagonal values to -999
thr_M[np.diag_indices_from(thr_M)] = -999
# Calculate minmum spanning tree
G = nx.from_numpy_matrix(M)
mst = nx.minimum_spanning_tree(G, weight='weight'*-1)
# Calculate the threshold value
thr = np.percentile(thr_M[np.triu_indices_from(thr_M, k=1)], cost)
# Set all values that are less than the threshold to 0
thr_M[thr_M < thr] = 0
# Set all values that are not zero to 1
thr_M[thr_M != 0] = 1
return thr_M
开发者ID:leetaey,项目名称:NSPN_CODE,代码行数:33,代码来源:networkx_functions.py
示例14: _process
def _process(self,data):
for x in data:
if data[x][1] not in self.data:
#prepares the data to visualise the xcor matrix of a specific batch number.
self.data[data[x][1]]={}
self.data[data[x][1]]['matrix']=numpy.identity(self.size)
self.data[data[x][1]]['ro_count']=0
self.data[data[x][1]]['matrix'][(data[x][2][1],data[x][2][0])]=data[x][0]
#self.addToProvState('batch_'+str(data[x][1]),self.data[data[x][1]]['matrix'],metadata={'matrix':str(self.data[data[x][1]]['matrix'])},dep=['batch_'+str(data[x][1])],ignore_inputs=False)
self.data[data[x][1]]['ro_count']+=1
if self.data[data[x][1]]['ro_count']==(self.size*(self.size-1))/2:
matrix=self.data[data[x][1]]['matrix']
d = pd.DataFrame(data=matrix,
columns=range(0,self.size),index=range(0,self.size))
mask = numpy.zeros_like(d, dtype=numpy.bool)
mask[numpy.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(d, mask=mask, cmap=cmap, vmax=1,
square=True,
linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
sns.plt.savefig("./plots/"+str(data[x][1])+"_plot.png")
self.write('output',(matrix,data[x][1]),metadata={'matrix':str(d),'batch':str(data[x][1])},dep=['batch_'+str(data[x][1])])
开发者ID:aspinuso,项目名称:VERCE,代码行数:35,代码来源:rtxcor_rays.py
示例15: convert_file
def convert_file(in_file, out_file, factors=[.25, 1, 4]):
with h5py.File(in_file, 'r') as inp:
func_ks = [
(df, k)
for df, g in inp.iteritems() if df != '_meta'
for k in g.iterkeys()
]
meds = {}
for df, k in func_ks:
with h5py.File(in_file, 'r') as inp:
divs = inp[df][k][()]
if df in meds:
med = meds[df]
else:
meds[df] = med = np.median(divs[np.triu_indices_from(divs)])
for factor in factors:
name = 'median * {}'.format(factor)
print '/'.join((df, k, name))
with h5py.File(out_file) as out:
g = out.require_group(df).require_group(k)
if name in g:
print '\talready there'
continue
km = sdm.sdm.make_km(divs, med * factor)
with h5py.File(out_file) as out:
out[df][k][name] = km
开发者ID:dougalsutherland,项目名称:hsfuap,代码行数:31,代码来源:make_kernels.py
示例16: plot_2_corr_heatmaps
def plot_2_corr_heatmaps(corr1, corr2, labels, title1, title2):
fig=plt.figure(figsize=(9, 8))
gs = gridspec.GridSpec(1, 2)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[0, 1])
sns.set(style="white")
# Generate a mask for the upper triangle
mask = np.zeros_like(corr1, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr1, mask=mask, cmap=cmap, vmax=.3,
square=True, xticklabels=labels, yticklabels=labels,
linewidths=.5, ax=ax1, cbar_kws={"shrink": .3}, annot=True)
ax1.set_title(title1)
sns.heatmap(corr2, mask=mask, cmap=cmap, vmax=.3,
square=True, xticklabels=labels, yticklabels=labels,
linewidths=.5, ax=ax2, cbar_kws={"shrink": .3}, annot=True)
ax2.set_title(title2)
fig.tight_layout()
plt.show()
开发者ID:returnandrisk,项目名称:meucci-python,代码行数:26,代码来源:rnr_meucci_functions.py
示例17: main
def main():
# Load list of pointing IDs
todo_file = rawdata_dir + 'todo_list.ascii.dat'
ID_list = np.genfromtxt(todo_file, skip_header=1, usecols=[0], unpack=True,
dtype=str)
N_los = len(ID_list)
# Load bins centers
bins_file = 'rbins.ascii.dat'
bin_centers = np.genfromtxt(bins_file, skip_header=1, usecols=[2], unpack=True)
N_bins = len(bin_centers)
# Round bin centers to three decimal places
bin_centers = np.round(bin_centers, 3)
# Make array of column names for pandas Dataframe
col_names = []
for i in range(N_bins):
name = str(bin_centers[i])
col_names.append(name)
# Recast as array
col_names = np.asarray(col_names)
# Create list of png's for use in making gif
png_list =[]
# Calculate correlation matrix for each l.o.s.
for ID in ID_list:
# Load counts from 1000 mocks with pandas
# Each row is a mock, each column is a bin
counts_filename = counts_dir + 'counts_all_' + ID + '.dat'
DF = pd.read_csv(counts_filename, sep='\s+', names=col_names)
# Calculate correlation matrix
corr = DF.corr()
# plot heatmap of matrix
plt.clf()
sns.set(style="white")
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
f, ax = plt.subplots(figsize=(11, 9))
cmap = sns.diverging_palette(145, 280, s=85, l=25, n=7, as_cmap=True)
sns.heatmap(corr, mask=mask, cmap=cmap,square=True, annot=True,
xticklabels=col_names, yticklabels=col_names, linewidths=.5,
cbar_kws={"shrink": .5}, ax=ax, vmin=-1.0, vmax=1.0)
plt.title('Correlation Matrix for l.o.s. ' + ID, fontsize=20)
plt.xlabel('Bin Center (kpc)', fontsize=18)
plt.ylabel('Bin Center (kpc)', fontsize=18)
fig_name = plots_dir + 'corr_matrix_' + ID + '.png'
plt.savefig(fig_name)
png_list.append(fig_name)
gif_name = plots_dir + 'corr_matrix.gif'
GIF_MOVIE(png_list, gif_name)
开发者ID:aszewciw,项目名称:gal_structure,代码行数:60,代码来源:real_errors_each_los.py
示例18: plot_feature_corr
def plot_feature_corr(X, f_sz = (11, 9)):
"""
Purpose: plot a correlation matrix for the features in X
Inputs: X: a pandas dataframe of feature values
f_sz: a tuple for the figure size
Output: the correlation matrix of X
"""
sns.set(style="white")
# Compute the correlation matrix
corr = X.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize= f_sz)
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3,
square=True, linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
return corr
开发者ID:jsabrams,项目名称:default_risk_prediction,代码行数:27,代码来源:model_fitting.py
示例19: find_collinearity_columns
def find_collinearity_columns(correlation):
"""
本函数找出多重共线性的列。
基本思路:
0.首先看correlation是否满秩。如果不满秩,说明存在多重共线性。
1. 找到correlation里面绝对值最大的row_idx,和col_idx,假设为A和C.
2.计算A列/C列与其他列的相关系数绝对值的均值,如果A列与其他列相关系数更大,则剔除A列,反之亦然。记录下来A列名
重复以上步骤
@params: correlation, 相关系数矩阵.dataframe
@returns: list of column names.
"""
bad_columns = []
while True:
rank = np.linalg.matrix_rank(correlation.values)
if rank == correlation.shape[0]:
break
correlation_copy = correlation.copy()
correlation = correlation.abs()
correlation.values[np.triu_indices_from(correlation.values, 0)] = 0.0 # 把上三角(包括对角线部分)设置为0.
col_idx, row_idx = correlation.unstack().argmax() # (col_idx, row_idx)
if correlation_copy.ix[row_idx, :].mean() > correlation_copy.ix[:, col_idx].mean():
bad_column = row_idx
else:
bad_column = col_idx
bad_columns.append(bad_column)
# 把该列名称从相关系数矩阵的行/列里去掉
correlation_copy.drop(bad_column, axis=0, inplace=True)
correlation_copy.drop(bad_column, axis=1, inplace=True)
correlation = correlation_copy
return bad_columns
开发者ID:FayolChang,项目名称:mlp,代码行数:31,代码来源:utils.py
示例20: get_candidate_taus_above_threshold
def get_candidate_taus_above_threshold(Ds, thresh, **kwargs):
upper_tri_Ds = Ds[np.triu_indices_from(Ds, k=1)]
if "nz_frac" in kwargs:
nz_frac = float(kwargs["nz_frac"])
common.print_log("Setting tau so that fraction of distances below threshold = {0}".format(nz_frac))
all_taus = np.array(sorted(upper_tri_Ds))
n_all_taus = len(all_taus)
idx = min(max(int(nz_frac*n_all_taus), 0), n_all_taus-1)
tau = all_taus[idx]
if tau < thresh:
common.print_log("Parameter tau was set below the minimum value which makes the graph connected. Changing it to {0}".format(thresh))
tau = thresh
candidate_taus = np.array([tau])
else:
grid_size = int(kwargs.get("grid_size", 20))
linspace_tau = bool(kwargs.get("linspace_tau", False))
if linspace_tau:
candidate_taus = np.linspace(thresh, np.max(Ds[Ds > 0]), grid_size)
else:
all_taus = np.array(sorted(upper_tri_Ds[upper_tri_Ds > thresh]))
n_all_taus = len(all_taus)
tau_indices = np.asarray(np.concatenate([np.linspace(0, 1, grid_size)]) * (n_all_taus - 1), dtype=int)
candidate_taus = sorted(all_taus[tau_indices])
nz_fracs = [100. * np.sum(upper_tri_Ds <= tau) / len(upper_tri_Ds) for tau in candidate_taus]
common.print_log("Found {0} candidate thresholds:".format(len(candidate_taus)), candidate_taus)
common.print_log("Percentage of distances below threshold:", nz_fracs)
return candidate_taus
开发者ID:anand-bhaskar,项目名称:gap,代码行数:31,代码来源:localization.py
注:本文中的numpy.triu_indices_from函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论