本文整理汇总了Python中sklearn.utils.extmath.norm函数的典型用法代码示例。如果您正苦于以下问题:Python norm函数的具体用法?Python norm怎么用?Python norm使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了norm函数的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_norm_squared_norm
def test_norm_squared_norm():
X = np.random.RandomState(42).randn(50, 63)
X *= 100 # check stability
X += 200
assert_almost_equal(np.linalg.norm(X.ravel()), norm(X))
assert_almost_equal(norm(X) ** 2, squared_norm(X), decimal=6)
assert_almost_equal(np.linalg.norm(X), np.sqrt(squared_norm(X)), decimal=6)
开发者ID:93sam,项目名称:scikit-learn,代码行数:8,代码来源:test_extmath.py
示例2: test_norm_squared_norm
def test_norm_squared_norm():
X = np.random.RandomState(42).randn(50, 63)
X *= 100 # check stability
X += 200
assert_almost_equal(np.linalg.norm(X.ravel()), norm(X))
assert_almost_equal(norm(X) ** 2, squared_norm(X), decimal=6)
assert_almost_equal(np.linalg.norm(X), np.sqrt(squared_norm(X)), decimal=6)
# Check the warning with an int array and np.dot potential overflow
assert_warns_message(
UserWarning, 'Array type is integer, np.dot may '
'overflow. Data should be float type to avoid this issue',
squared_norm, X.astype(int))
开发者ID:BasilBeirouti,项目名称:scikit-learn,代码行数:13,代码来源:test_extmath.py
示例3: mean_shift
def mean_shift(X, bandwidth, n_seeds, kernel_function='gaussian', max_iterations=100, proximity_thresh=5):
'''
---Parameters---
X : data in form (samples, dims)
bandwidth : radius of nearest neighbors
n_seeds :
kernel_update_function : can be "gaussian" or "flat" or your own kernel
proximity_thresh : minimum distance (in pixels) a new cluster must be away from previous ones
---Returns---
cluster_centers :
cluster_counts : how many pixels are with the neighborhood of each cluster
'''
import numpy as np
from sklearn.neighbors import BallTree, NearestNeighbors
from sklearn.utils import extmath
from sklearn.metrics.pairwise import euclidean_distances
from collections import defaultdict
if kernel_function == 'gaussian':
kernel_update_function = gaussian_kernel
elif kernel_function == 'flat':
kernel_update_function = flat_kernel
else:
kernel_update_function = kernel_function
n_points, n_features = X.shape
stop_thresh = 1e-2 * bandwidth # when mean has converged
cluster_centers = []
cluster_counts = []
# ball_tree = BallTree(X)# to efficiently look up nearby points
neighbors = NearestNeighbors(radius=bandwidth).fit(X)
seeds = X[(np.random.uniform(0,X.shape[0], n_seeds)).astype(np.int)]
# For each seed, climb gradient until convergence or max_iterations
for weighted_mean in seeds:
completed_iterations = 0
while True:
points_within = X[neighbors.radius_neighbors([weighted_mean], bandwidth, return_distance=False)[0]]
old_mean = weighted_mean # save the old mean
weighted_mean = kernel_update_function(old_mean, points_within, bandwidth)
converged = extmath.norm(weighted_mean - old_mean) < stop_thresh
if converged or completed_iterations == max_iterations:
# Only add cluster if it's different enough from other centers
if len(cluster_centers) > 0:
diff_from_prev = [np.linalg.norm(weighted_mean-cluster_centers[i], 2) for i in range(len(cluster_centers))]
if np.min(diff_from_prev) > proximity_thresh:
cluster_centers.append(weighted_mean)
cluster_counts.append(points_within.shape[0])
else:
cluster_centers.append(weighted_mean)
cluster_counts.append(points_within.shape[0])
break
completed_iterations += 1
return cluster_centers, cluster_counts
开发者ID:MerDane,项目名称:pyKinectTools,代码行数:59,代码来源:MeanShift.py
示例4: test_logistic_derivative_lipschitz_constant
def test_logistic_derivative_lipschitz_constant():
# Tests Lipschitz-continuity of of the derivative of logistic loss
rng = check_random_state(42)
grad_weight = 2.08e-1
lipschitz_constant = _logistic_derivative_lipschitz_constant(
X, mask, grad_weight)
for _ in range(20):
x_1 = rng.rand((w.shape[0] + 1)) * rng.randint(1000)
x_2 = rng.rand((w.shape[0] + 1)) * rng.randint(1000)
gradient_difference = extmath.norm(
_logistic_data_loss_and_spatial_grad_derivative(
X, y, x_1, mask, grad_weight)
- _logistic_data_loss_and_spatial_grad_derivative(
X, y, x_2, mask, grad_weight))
point_difference = extmath.norm(x_1 - x_2)
assert_true(
gradient_difference <= lipschitz_constant * point_difference)
开发者ID:CandyPythonFlow,项目名称:nilearn,代码行数:17,代码来源:test_graph_net.py
示例5: _pa
def _pa(self, loss_t, x_t):
denom = extmath.norm(x_t) ** 2.0
# special case when L_2 norm of x_t is zero (followed libol
# implementation)
if denom == 0:
return 1
d = loss_t / denom
return d
开发者ID:jsouza,项目名称:pamtl,代码行数:9,代码来源:partl_regression.py
示例6: test__squared_loss_derivative_lipschitz_constant
def test__squared_loss_derivative_lipschitz_constant():
# Tests Lipschitz-continuity of the derivative of _squared_loss loss
# function
rng = check_random_state(42)
grad_weight = 2.08e-1
lipschitz_constant = _squared_loss_derivative_lipschitz_constant(
X, mask, grad_weight)
for _ in range(20):
x_1 = rng.rand(*w.shape) * rng.randint(1000)
x_2 = rng.rand(*w.shape) * rng.randint(1000)
gradient_difference = extmath.norm(
_squared_loss_and_spatial_grad_derivative(X, y, x_1, mask,
grad_weight)
- _squared_loss_and_spatial_grad_derivative(X, y, x_2, mask,
grad_weight))
point_difference = extmath.norm(x_1 - x_2)
assert_true(
gradient_difference <= lipschitz_constant * point_difference)
开发者ID:CandyPythonFlow,项目名称:nilearn,代码行数:18,代码来源:test_graph_net.py
示例7: _reorth
def _reorth(basis, target, rows=None, alpha=0.5):
"""Reorthogonalize a vector using iterated Gram-Schmidt
Parameters
----------
basis: ndarray, shape (n_features, n_basis)
The matrix whose rows are a set of basis to reorthogonalize against
target: ndarray, shape (n_features,)
The target vector to be reorthogonalized
rows: {array-like, None}, default None
Indices of rows from basis to use. Use all if None
alpha: float, default 0.5
Parameter for determining whether to do a second reorthogonalization.
Returns
-------
reorthed_target: ndarray, shape (n_features,)
The reorthogonalized vector
"""
if rows is not None:
basis = basis[rows]
norm_target = norm(target)
norm_target_old = 0
n_reorth = 0
while norm_target < alpha * norm_target_old or n_reorth == 0:
for row in basis:
t = fast_dot(row, target)
target = target - t * row
norm_target_old = norm_target
norm_target = norm(target)
n_reorth += 1
if n_reorth > 4:
# target in span(basis) => accpet target = 0
target = np.zeros(basis.shape[0])
break
return target
开发者ID:amueller,项目名称:pca,代码行数:44,代码来源:tga.py
示例8: _bistochastic_normalize
def _bistochastic_normalize(X, max_iter=1000, tol=1e-5):
"""Normalize rows and columns of ``X`` simultaneously so that all
rows sum to one constant and all columns sum to a different
constant.
"""
# According to paper, this can also be done more efficiently with
# deviation reduction and balancing algorithms.
X = make_nonnegative(X)
X_scaled = X
dist = None
for _ in range(max_iter):
X_new, _, _ = _scale_normalize(X_scaled)
if issparse(X):
dist = norm(X_scaled.data - X.data)
else:
dist = norm(X_scaled - X_new)
X_scaled = X_new
if dist is not None and dist < tol:
break
return X_scaled
开发者ID:VirgileFritsch,项目名称:scikit-learn,代码行数:21,代码来源:spectral.py
示例9: test_tikhonov_regularization_vs_graph_net
def test_tikhonov_regularization_vs_graph_net():
# Test for one of the extreme cases of Graph-Net: That is, with
# l1_ratio = 0 (pure Smooth), we compare Graph-Net's performance
# with the analytical solution for Tikhonov Regularization
# XXX A small dataset here (this test is very lengthy)
G = get_gradient_matrix(w.size, mask)
optimal_model = np.dot(sp.linalg.pinv(
np.dot(X.T, X) + y.size * np.dot(G.T, G)), np.dot(X.T, y))
graph_net = BaseSpaceNet(
mask=mask_, alphas=1. * X.shape[0], l1_ratios=0., max_iter=400,
fit_intercept=False,
screening_percentile=100., standardize=False)
graph_net.fit(X_, y.copy())
coef_ = graph_net.coef_[0]
graph_net_perf = 0.5 / y.size * extmath.norm(
np.dot(X, coef_) - y) ** 2\
+ 0.5 * extmath.norm(np.dot(G, coef_)) ** 2
optimal_model_perf = 0.5 / y.size * extmath.norm(
np.dot(X, optimal_model) - y) ** 2\
+ 0.5 * extmath.norm(np.dot(G, optimal_model)) ** 2
assert_almost_equal(graph_net_perf, optimal_model_perf, decimal=1)
开发者ID:CandyPythonFlow,项目名称:nilearn,代码行数:22,代码来源:test_graph_net.py
示例10: test_lasso_vs_graph_net
def test_lasso_vs_graph_net():
# Test for one of the extreme cases of Graph-Net: That is, with
# l1_ratio = 1 (pure Lasso), we compare Graph-Net's performance with
# Scikit-Learn lasso
lasso = Lasso(max_iter=100, tol=1e-8, normalize=False)
graph_net = BaseSpaceNet(mask=mask, alphas=1. * X_.shape[0],
l1_ratios=1, is_classif=False,
penalty="graph-net", max_iter=100)
lasso.fit(X_, y)
graph_net.fit(X, y)
lasso_perf = 0.5 / y.size * extmath.norm(np.dot(
X_, lasso.coef_) - y) ** 2 + np.sum(np.abs(lasso.coef_))
graph_net_perf = 0.5 * ((graph_net.predict(X) - y) ** 2).mean()
np.testing.assert_almost_equal(graph_net_perf, lasso_perf, decimal=3)
开发者ID:CandyPythonFlow,项目名称:nilearn,代码行数:14,代码来源:test_space_net.py
示例11: f_regression_nosparse
def f_regression_nosparse(X, y, center=True):
"""Univariate linear regression tests
Quick linear model for testing the effect of a single regressor,
sequentially for many regressors.
This is done in 3 steps:
1. the regressor of interest and the data are orthogonalized
with respect to constant regressors
2. the cross correlation between data and regressors is computed
3. it is converted to an F score then to a p-value
Parameters
----------
X : {array-like, sparse matrix} shape = (n_samples, n_features)
The set of regressors that will tested sequentially.
y : array of shape(n_samples).
The data matrix
center : True, bool,
If true, X and y will be centered.
Returns
-------
F : array, shape=(n_features,)
F values of features.
pval : array, shape=(n_features,)
p-values of F-scores.
"""
X, y = check_arrays(X, y, dtype=np.float)
y = y.ravel()
if center:
y = y - np.mean(y)
X = X.copy('F') # faster in fortran
X -= X.mean(axis=0)
# compute the correlation
corr = np.dot(y, X)
# XXX could use corr /= row_norms(X.T) here, but the test doesn't pass
corr /= np.asarray(np.sqrt((X ** 2).sum(axis=0))).ravel()
corr /= norm(y)
# convert to p-value
degrees_of_freedom = y.size - (2 if center else 1)
F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
pv = stats.f.sf(F, 1, degrees_of_freedom)
return F, pv
开发者ID:CandyPythonFlow,项目名称:nilearn,代码行数:49,代码来源:sklearn_f_regression_nosparse.py
示例12: mean_shift
def mean_shift(X, bandwidth, seeds, kernel_update_function, max_iterations=10):
n_points, n_features = X.shape
stop_thresh = 1e-3 * bandwidth # when mean has converged
cluster_centers = []
ball_tree = BallTree(X) # to efficiently look up nearby points
# For each seed, climb gradient until convergence or max_iterations
for weighted_mean in seeds:
completed_iterations = 0
while True:
points_within = X[ball_tree.query_radius([weighted_mean], bandwidth*3)[0]]
old_mean = weighted_mean # save the old mean
weighted_mean = kernel_update_function(old_mean, points_within, bandwidth)
converged = extmath.norm(weighted_mean - old_mean) < stop_thresh
if converged or completed_iterations == max_iterations:
cluster_centers.append(weighted_mean)
break
completed_iterations += 1
return cluster_centers
开发者ID:denizsokmen,项目名称:cvProjects,代码行数:20,代码来源:part2.py
示例13: _iter
def _iter(X,
weighted_mean,
kernel_update_function,
bandwidth,
ball_tree,
stop_thresh,
max_iter):
"""Return the cluster center and the within points visited while iterated from the seed
to the centroid. This code has been isolated to be executed in parallel using JobLib."""
visited_points = set()
completed_iterations = 0
while True:
within_idx = ball_tree.query_radius([weighted_mean], bandwidth*3)[0]
[visited_points.add(x) for x in within_idx]
points_within = X[within_idx]
old_mean = weighted_mean # save the old mean
weighted_mean = kernel_update_function(old_mean, points_within, bandwidth)
converged = extmath.norm(weighted_mean - old_mean) < stop_thresh
if converged or completed_iterations == max_iter:
return weighted_mean, visited_points
completed_iterations += 1
开发者ID:arutaku,项目名称:mean_shift,代码行数:21,代码来源:mean_shift.py
示例14: mean_shift
def mean_shift(X, bandwidth=None, seeds=None, kernel="flat",
max_cluster_radius=-1., max_iterations=300):
"""Perform MeanShift Clustering of data using the specified kernel
Parameters
----------
X : array [n_samples, n_features]
Input points to be clustered
bandwidth : float,
Kernel bandwidth
seeds: array [n_seeds, n_features], optional
Points used as initial kernel locations
If not set, then use every point as a seed (which may
be very slow---consider using the `get_bin_seeds` function
to create a reduced set of seeds.
max_cluster_radius: float, default -1.
Used only in post-processing.
If negative, then each point is clustered into its nearest cluster.
If positive, then those points that are not within `max_cluster_radius`
of any cluster center are said to be 'orphans' that do not belong to
any cluster. Orphans are given cluster label -1.
Returns
-------
cluster_centers : array [n_clusters, n_features]
Coordinates of cluster centers
labels : array [n_samples]
cluster labels for each point
Notes
-----
See examples/plot_meanshift.py for an example.
"""
if seeds is None:
seeds = X
elif len(seeds) == 0:
raise ValueError, "If a list of seeds is provided it cannot be empty."
if not (kernel in KERNELS):
valid_kernels = " ".join(KERNELS)
raise ValueError, "Kernel %s is not valid. Valid kernel choices are: %s " % (kernel, valid_kernels)
# Set maximum neighbor query distance based on kernel
if kernel in ["flat"]:
query_distance = bandwidth
kernel_update_function = flat_kernel_update
print "Using flat kernel update"
elif kernel in ["gaussian"]:
query_distance = bandwidth * 3 # A bit arbitrary
kernel_update_function = gaussian_kernel_update
print "Using gaussian kernel update"
else:
raise ValueError, "Kernel %s not implemented correctly" % kernel
n_points, n_features = X.shape
stop_thresh = 1e-3 * bandwidth # when mean has converged
center_intensity_dict = {}
ball_tree = BallTree(X) # to efficiently look up nearby points
# For each seed, climb gradient until convergence or max_iterations
for weighted_mean in seeds:
completed_iterations = 0
while True:
# Find mean of points within bandwidth
points_within = X[ball_tree.query_radius([weighted_mean], query_distance)[0]]
if len(points_within) == 0:
break # Depending on seeding strategy this condition may occur
old_mean = weighted_mean # save the old mean
weighted_mean = kernel_update_function(old_mean, points_within, bandwidth)
# If converged or at max_iterations, addS the cluster
if extmath.norm(weighted_mean - old_mean) < stop_thresh or \
completed_iterations == max_iterations:
center_intensity_dict[tuple(weighted_mean)] = len(points_within)
break
completed_iterations += 1
# POST PROCESSING: remove near duplicate points
# If the distance between two kernels is less than the bandwidth,
# then we have to remove one because it is a duplicate. Remove the
# one with fewer points.
print "%d clusters before removing duplicates " % len(center_intensity_dict)
sorted_by_intensity = sorted(center_intensity_dict.items(),
key=lambda tup: tup[1], reverse=True)
sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])
unique = np.ones(len(sorted_centers), dtype=np.bool)
cc_tree = BallTree(sorted_centers)
for i, center in enumerate(sorted_centers):
if unique[i]:
neighbor_idxs = cc_tree.query_radius([center], bandwidth)[0]
unique[neighbor_idxs] = 0
unique[i] = 1 # leave the current point as unique
cluster_centers = sorted_centers[unique]
#.........这里部分代码省略.........
开发者ID:lemanou,项目名称:PCG_2016,代码行数:101,代码来源:clee_mean_shift.py
示例15: variable_bw_mean_shift
def variable_bw_mean_shift(X, bandwidth_array, seeds=None, max_iterations=300):
"""Variable bandwidth mean shift with gaussian kernel
Parameters
----------
X : array-like, shape=[n_samples, n_features]
Input data.
bandwidth : array[float], shape=[n_samples]
Kernel bandwidth.
seeds : array[float, float], shape=(n_seeds, n_features), optional
Point used as initial kernel locations. Default is
setting each point in input data as a seed.
max_iter : int, default 300
Maximum number of iterations, per seed point before the clustering
operation terminates (for that seed point), if has not converged yet.
Returns
-------
cluster_centers : array, shape=[n_clusters, n_features]
Coordinates of cluster centers.
labels : array, shape=[n_samples]
Cluster labels for each point.
Notes
-----
Code adapted from scikit-learn library.
"""
if not seeds:
seeds = X
n_points, n_features = X.shape
stop_thresh = 1e-3 * np.mean(bandwidth_array) # when mean has converged
center_intensity_dict = {}
cluster_centers = []
ball_tree = BallTree(X) # to efficiently look up nearby points
def gaussian_kernel(x, points, bandwidth):
distances = euclidean_distances(points, x)
weights = np.exp(-1 * (distances ** 2 / bandwidth ** 2))
return np.sum(points * weights, axis=0) / np.sum(weights)
# For each seed, climb gradient until convergence or max_iterations
for i, weighted_mean in enumerate(seeds):
completed_iterations = 0
while True:
points_within = X[ball_tree.query_radius([weighted_mean], bandwidth_array[i])[0]]
old_mean = weighted_mean # save the old mean
weighted_mean = gaussian_kernel(old_mean, points_within, bandwidth_array[i])
converged = extmath.norm(weighted_mean - old_mean) < stop_thresh
if converged or completed_iterations == max_iterations:
if completed_iterations == max_iterations:
print("reached max iterations")
cluster_centers.append(weighted_mean)
center_intensity_dict[tuple(weighted_mean)] = len(points_within)
break
completed_iterations += 1
# POST PROCESSING: remove near duplicate points
# If the distance between two kernels is less than the bandwidth,
# then we have to remove one because it is a duplicate. Remove the
# one with fewer points.
sorted_by_intensity = sorted(center_intensity_dict.items(), key=lambda tup: tup[1], reverse=True)
sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])
unique = np.ones(len(sorted_centers), dtype=np.bool)
ball_tree = BallTree(sorted_centers)
for i, center in enumerate(sorted_centers):
if unique[i]:
neighbor_idxs = ball_tree.query_radius([center], np.mean(bandwidth_array))[0]
unique[neighbor_idxs] = 0
unique[i] = 1 # leave the current point as unique
cluster_centers = sorted_centers[unique]
# ASSIGN LABELS: a point belongs to the cluster that it is closest to
nbrs = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(cluster_centers)
labels = np.zeros(n_points, dtype=np.int)
distances, idxs = nbrs.kneighbors(X)
labels = idxs.flatten()
return cluster_centers, labels
开发者ID:rohanp,项目名称:LDFMap,代码行数:89,代码来源:cluster.py
示例16: discretize
def discretize(vectors, copy=True, max_svd_restarts=30, n_iter_max=20,
random_state=None):
"""Search for a partition matrix (clustering) which is closest to the
eigenvector embedding.
Parameters
----------
vectors : array-like, shape: (n_samples, n_clusters)
The embedding space of the samples.
copy : boolean, optional, default: True
Whether to copy vectors, or perform in-place normalization.
max_svd_restarts : int, optional, default: 30
Maximum number of attempts to restart SVD if convergence fails
n_iter_max : int, optional, default: 30
Maximum number of iterations to attempt in rotation and partition
matrix search if machine precision convergence is not reached
random_state: int seed, RandomState instance, or None (default)
A pseudo random number generator used for the initialization of the
of the rotation matrix
Returns
-------
labels : array of integers, shape: n_samples
The labels of the clusters.
References
----------
- Multiclass spectral clustering, 2003
Stella X. Yu, Jianbo Shi
http://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf
Notes
-----
The eigenvector embedding is used to iteratively search for the
closest discrete partition. First, the eigenvector embedding is
normalized to the space of partition matrices. An optimal discrete
partition matrix closest to this normalized embedding multiplied by
an initial rotation is calculated. Fixing this discrete partition
matrix, an optimal rotation matrix is calculated. These two
calculations are performed until convergence. The discrete partition
matrix is returned as the clustering solution. Used in spectral
clustering, this method tends to be faster and more robust to random
initialization than k-means.
"""
from scipy.sparse import csc_matrix
from scipy.linalg import LinAlgError
random_state = check_random_state(random_state)
vectors = as_float_array(vectors, copy=copy)
eps = np.finfo(float).eps
n_samples, n_components = vectors.shape
# Normalize the eigenvectors to an equal length of a vector of ones.
# Reorient the eigenvectors to point in the negative direction with respect
# to the first element. This may have to do with constraining the
# eigenvectors to lie in a specific quadrant to make the discretization
# search easier.
norm_ones = np.sqrt(n_samples)
for i in range(vectors.shape[1]):
vectors[:, i] = (vectors[:, i] / norm(vectors[:, i])) \
* norm_ones
if vectors[0, i] != 0:
vectors[:, i] = -1 * vectors[:, i] * np.sign(vectors[0, i])
# Normalize the rows of the eigenvectors. Samples should lie on the unit
# hypersphere centered at the origin. This transforms the samples in the
# embedding space to the space of partition matrices.
vectors = vectors / np.sqrt((vectors ** 2).sum(axis=1))[:, np.newaxis]
svd_restarts = 0
has_converged = False
# If there is an exception we try to randomize and rerun SVD again
# do this max_svd_restarts times.
while (svd_restarts < max_svd_restarts) and not has_converged:
# Initialize first column of rotation matrix with a row of the
# eigenvectors
rotation = np.zeros((n_components, n_components))
rotation[:, 0] = vectors[random_state.randint(n_samples), :].T
# To initialize the rest of the rotation matrix, find the rows
# of the eigenvectors that are as orthogonal to each other as
# possible
c = np.zeros(n_samples)
for j in range(1, n_components):
# Accumulate c to ensure row is as orthogonal as possible to
# previous picks as well as current one
c += np.abs(np.dot(vectors, rotation[:, j - 1]))
rotation[:, j] = vectors[c.argmin(), :].T
#.........这里部分代码省略.........
开发者ID:arbelm2,项目名称:cbio,代码行数:101,代码来源:spectral.py
示例17: mean_shift
def mean_shift(X, intensities=None, bandwidth=None, seeds=None,
cluster_all=True, max_iterations=300, verbose=False, use_scipy=True):
"""mean_shift(X, intensities=None, bandwidth=None, seeds=None,
cluster_all=True, max_iterations=300, verbose=False, use_scipy=True)
Mean shift algorithm
Implementation taken from scikit-learn with two minor variants:
- Use (by default) scipy KD-trees, which are faster in our case
- weigthed version of mean-shift using `intensities` as
weights (i.e., we compute centers of mass rather than means)
Parameters
----------
X : array-like, shape=[n_samples, n_features]
Input data.
intensities : array-like, shape=[n_samples]
Voxel intensities, used to weight the mean
bandwidth : float
Kernel bandwidth.
seeds : array-like, shape=[n_seeds, n_features]
Point used as initial kernel locations.
use_scipy : bool
If true use cKDTree from scipy.spatial, otherwise
use NearestNeighbors from sklearn.neighbors
Returns
-------
cluster_centers : array, shape=[n_clusters, n_features]
Coordinates of cluster centers.
labels : array, shape=[n_samples]
Cluster labels for each point.
volumes : array, shape=[n_clusters]
Volume of each cluster (# of points in the cluster)
masses : array, shape=[n_clusters]
Mass of each cluster (sum of intensities of points in the cluster).
trajectories : list
MS trajectories for debugging purposes.
"""
if seeds is None:
seeds = X
n_points, n_features = X.shape
stop_thresh = 1e-3 * bandwidth # when mean has converged
center_volume_dict = {}
center_mass_dict = {}
# tee.log('Fitting NearestNeighbors on', n_points, 'points')
if use_scipy:
kdtree = cKDTree(X)
else:
nbrs = NearestNeighbors(radius=bandwidth).fit(X)
# For each seed, climb gradient until convergence or max_iterations
trajectories = {} # for each seed, a list of points
tee.log('Moving kernels for', len(seeds), 'seeds')
pbar = pb.ProgressBar(widgets=['Moving %d seeds: ' % len(seeds), pb.Percentage()],
maxval=len(seeds)).start()
for seed_no, my_mean in enumerate(seeds):
completed_iterations = 0
seed = my_mean
trajectories[seed_no] = []
while True:
# Find mean of points within bandwidth
if use_scipy:
i_nbrs = kdtree.query_ball_point(my_mean, r=bandwidth)
else:
i_nbrs = nbrs.radius_neighbors([my_mean], bandwidth,
return_distance=False)[0]
points_within = X[i_nbrs]
if len(points_within) == 0:
break # Depending on seeding strategy this condition may occur
my_old_mean = my_mean # save the old mean
if intensities is None:
my_mean = np.mean(points_within, axis=0)
else:
my_mean = np.average(points_within, axis=0, weights=intensities[i_nbrs])
# If converged or at max_iterations, addS the cluster
if extmath.norm(my_mean - my_old_mean) < stop_thresh or completed_iterations == max_iterations:
center_volume_dict[tuple(my_mean)] = len(points_within)
center_mass_dict[tuple(my_mean)] = sum(intensities[i_nbrs])
break
completed_iterations += 1
trajectories[seed_no].append(my_mean)
if verbose:
print('seed', seed, '-->', my_mean,
center_volume_dict[tuple(my_mean)], center_mass_dict[tuple(my_mean)], completed_iterations)
pbar.update(seed_no+1)
pbar.finish()
# POST PROCESSING: remove near duplicate points
#.........这里部分代码省略.........
开发者ID:paolo-f,项目名称:bcfind,代码行数:101,代码来源:mscd.py
示例18: _fit
def _fit(self, X):
"""Fit the model on X
Parameters
----------
X: array-like, shape (n_samples, n_features)
Training vector, where n_samples in the number of samples and
n_features is the number of features.
"""
self.nnzs = []
X_orig = X.copy()
if self.trim_proportion < 0 or self.trim_proportion > 0.5:
raise ValueError('`trim_proportion` must be between 0 and 0.5,'
' got %s.' % self.trim_proportion)
lam = 1.0 / np.sqrt(np.max(X.shape))
rng = check_random_state(self.random_state)
X = check_array(X)
self.obj = []
n_samples, n_features = X.shape
X = as_float_array(X, copy=self.copy)
# Center data
if self.centering == 'mean':
self.center_ = np.mean(X, axis=0)
elif self.centering == 'median':
self.center_ = np.median(X, axis=0)
else:
raise ValueError("`centering` must be 'mean' or 'median', "
"got %s" % self.centering)
X -= self.center_
if self.n_components is None:
n_components = X.shape[1]
elif not 0 <= self.n_components <= n_features:
raise ValueError("n_components=%r invalid for n_features=%d"
% (self.n_components, n_features))
else:
n_components = self.n_components
self.components_ = np.empty((n_components, n_features))
for k in range(n_components):
# compute k'th principle component
mu = rng.rand(n_features) - 0.5
mu = mu / norm(mu)
# initialize using a few EM iterations
for i in range(3):
dots = fast_dot(X, mu)
mu = fast_dot(dots.T, X)
mu = mu / norm(mu)
# grassmann average
for i in range(n_samples):
prev_mu = mu
dot_signs = np.sign(fast_dot(X, mu))
mu = _trimmed_mean(X * dot_signs[:, np.newaxis],
self.trim_proportion)
mu = mu / norm(mu)
if np.max(np.abs(mu - prev_mu)) < self.tol:
break
# store the estimated vector and possibly re-orthonormalize
if k > 0:
mu = _reorth(self.components_[:k-1], mu)
mu = mu / norm(mu)
self.components_[k] = mu
if k < n_components - 1:
X = X - fast_dot(fast_dot(X, mu)[:, np.newaxis],
mu[np.newaxis, :])
L = X + self.center_
S = X_orig - L
o = norm_(L, 'nuc') + lam*np.sum(np.abs(S))
#print('TGA Objective = ', o)
self.obj.append(o)
self.nnzs.append(np.sum(S > 0))
开发者ID:amueller,项目名称:pca,代码行数:80,代码来源:tga.py
示例19: _paii
def _paii(self, loss_t, x_t):
return loss_t / (extmath.norm(x_t) ** 2.0) + 1.0 / 2.0 * self.C
开发者ID:jsouza,项目名称:pamtl,代码行数:2,代码来源:partl_regression.py
注:本文中的sklearn.utils.extmath.norm函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论