本文整理汇总了Python中sklearn.utils.extmath._incremental_mean_and_var函数的典型用法代码示例。如果您正苦于以下问题:Python _incremental_mean_and_var函数的具体用法?Python _incremental_mean_and_var怎么用?Python _incremental_mean_and_var使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了_incremental_mean_and_var函数的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_incremental_variance_ddof
def test_incremental_variance_ddof():
# Test that degrees of freedom parameter for calculations are correct.
rng = np.random.RandomState(1999)
X = rng.randn(50, 10)
n_samples, n_features = X.shape
for batch_size in [11, 20, 37]:
steps = np.arange(0, X.shape[0], batch_size)
if steps[-1] != X.shape[0]:
steps = np.hstack([steps, n_samples])
for i, j in zip(steps[:-1], steps[1:]):
batch = X[i:j, :]
if i == 0:
incremental_means = batch.mean(axis=0)
incremental_variances = batch.var(axis=0)
# Assign this twice so that the test logic is consistent
incremental_count = batch.shape[0]
sample_count = batch.shape[0]
else:
result = _incremental_mean_and_var(
batch, incremental_means, incremental_variances,
sample_count)
(incremental_means, incremental_variances,
incremental_count) = result
sample_count += batch.shape[0]
calculated_means = np.mean(X[:j], axis=0)
calculated_variances = np.var(X[:j], axis=0)
assert_almost_equal(incremental_means, calculated_means, 6)
assert_almost_equal(incremental_variances,
calculated_variances, 6)
assert_equal(incremental_count, sample_count)
开发者ID:BasilBeirouti,项目名称:scikit-learn,代码行数:32,代码来源:test_extmath.py
示例2: test_incremental_mean_and_variance_ignore_nan
def test_incremental_mean_and_variance_ignore_nan():
old_means = np.array([535., 535., 535., 535.])
old_variances = np.array([4225., 4225., 4225., 4225.])
old_sample_count = np.array([2, 2, 2, 2], dtype=np.int32)
X = np.array([[170, 170, 170, 170],
[430, 430, 430, 430],
[300, 300, 300, 300]])
X_nan = np.array([[170, np.nan, 170, 170],
[np.nan, 170, 430, 430],
[430, 430, np.nan, 300],
[300, 300, 300, np.nan]])
X_means, X_variances, X_count = _incremental_mean_and_var(
X, old_means, old_variances, old_sample_count)
X_nan_means, X_nan_variances, X_nan_count = _incremental_mean_and_var(
X_nan, old_means, old_variances, old_sample_count)
assert_allclose(X_nan_means, X_means)
assert_allclose(X_nan_variances, X_variances)
assert_allclose(X_nan_count, X_count)
开发者ID:aniryou,项目名称:scikit-learn,代码行数:22,代码来源:test_extmath.py
示例3: test_incremental_variance_update_formulas
def test_incremental_variance_update_formulas():
# Test Youngs and Cramer incremental variance formulas.
# Doggie data from http://www.mathsisfun.com/data/standard-deviation.html
A = np.array([[600, 470, 170, 430, 300],
[600, 470, 170, 430, 300],
[600, 470, 170, 430, 300],
[600, 470, 170, 430, 300]]).T
idx = 2
X1 = A[:idx, :]
X2 = A[idx:, :]
old_means = X1.mean(axis=0)
old_variances = X1.var(axis=0)
old_sample_count = X1.shape[0]
final_means, final_variances, final_count = \
_incremental_mean_and_var(X2, old_means, old_variances,
old_sample_count)
assert_almost_equal(final_means, A.mean(axis=0), 6)
assert_almost_equal(final_variances, A.var(axis=0), 6)
assert_almost_equal(final_count, A.shape[0])
开发者ID:BasilBeirouti,项目名称:scikit-learn,代码行数:20,代码来源:test_extmath.py
示例4: test_incremental_variance_numerical_stability
def test_incremental_variance_numerical_stability():
# Test Youngs and Cramer incremental variance formulas.
def np_var(A):
return A.var(axis=0)
# Naive one pass variance computation - not numerically stable
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
def one_pass_var(X):
n = X.shape[0]
exp_x2 = (X ** 2).sum(axis=0) / n
expx_2 = (X.sum(axis=0) / n) ** 2
return exp_x2 - expx_2
# Two-pass algorithm, stable.
# We use it as a benchmark. It is not an online algorithm
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
def two_pass_var(X):
mean = X.mean(axis=0)
Y = X.copy()
return np.mean((Y - mean)**2, axis=0)
# Naive online implementation
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
# This works only for chunks for size 1
def naive_mean_variance_update(x, last_mean, last_variance,
last_sample_count):
updated_sample_count = (last_sample_count + 1)
samples_ratio = last_sample_count / float(updated_sample_count)
updated_mean = x / updated_sample_count + last_mean * samples_ratio
updated_variance = last_variance * samples_ratio + \
(x - last_mean) * (x - updated_mean) / updated_sample_count
return updated_mean, updated_variance, updated_sample_count
# We want to show a case when one_pass_var has error > 1e-3 while
# _batch_mean_variance_update has less.
tol = 200
n_features = 2
n_samples = 10000
x1 = np.array(1e8, dtype=np.float64)
x2 = np.log(1e-5, dtype=np.float64)
A0 = x1 * np.ones((n_samples // 2, n_features), dtype=np.float64)
A1 = x2 * np.ones((n_samples // 2, n_features), dtype=np.float64)
A = np.vstack((A0, A1))
# Older versions of numpy have different precision
# In some old version, np.var is not stable
if np.abs(np_var(A) - two_pass_var(A)).max() < 1e-6:
stable_var = np_var
else:
stable_var = two_pass_var
# Naive one pass var: >tol (=1063)
assert_greater(np.abs(stable_var(A) - one_pass_var(A)).max(), tol)
# Starting point for online algorithms: after A0
# Naive implementation: >tol (436)
mean, var, n = A0[0, :], np.zeros(n_features), n_samples // 2
for i in range(A1.shape[0]):
mean, var, n = \
naive_mean_variance_update(A1[i, :], mean, var, n)
assert_equal(n, A.shape[0])
# the mean is also slightly unstable
assert_greater(np.abs(A.mean(axis=0) - mean).max(), 1e-6)
assert_greater(np.abs(stable_var(A) - var).max(), tol)
# Robust implementation: <tol (177)
mean, var, n = A0[0, :], np.zeros(n_features), n_samples // 2
for i in range(A1.shape[0]):
mean, var, n = \
_incremental_mean_and_var(A1[i, :].reshape((1, A1.shape[1])),
mean, var, n)
assert_equal(n, A.shape[0])
assert_array_almost_equal(A.mean(axis=0), mean)
assert_greater(tol, np.abs(stable_var(A) - var).max())
开发者ID:BasilBeirouti,项目名称:scikit-learn,代码行数:76,代码来源:test_extmath.py
示例5: partial_fit
def partial_fit(self, X, y=None, check_input=True):
"""Incremental fit with X. All of X is processed as a single batch.
Parameters
----------
X: array-like, shape (n_samples, n_features)
Training data, where n_samples is the number of samples and
n_features is the number of features.
Returns
-------
self: object
Returns the instance itself.
"""
# ====== check the samples and cahces ====== #
if isinstance(X, Data):
X = X[:]
if check_input:
X = check_array(X, copy=self.copy, dtype=[np.float64, np.float32])
n_samples, n_features = X.shape
# check number of components
if self.n_components is None:
self.n_components_ = n_features
elif not 1 <= self.n_components <= n_features:
raise ValueError("n_components=%r invalid for n_features=%d, need "
"more rows than columns for IncrementalPCA "
"processing" % (self.n_components, n_features))
else:
self.n_components_ = self.n_components
# check the cache
if n_samples < n_features or self._nb_cached_samples > 0:
self._cache_batches.append(X)
self._nb_cached_samples += n_samples
# not enough samples yet
if self._nb_cached_samples < n_features:
return
else: # group mini batch into big batch
X = np.concatenate(self._cache_batches, axis=0)
self._cache_batches = []
self._nb_cached_samples = 0
n_samples = X.shape[0]
# ====== fit the model ====== #
if (self.components_ is not None) and (self.components_.shape[0] !=
self.n_components_):
raise ValueError("Number of input features has changed from %i "
"to %i between calls to partial_fit! Try "
"setting n_components to a fixed value." %
(self.components_.shape[0], self.n_components_))
# Update stats - they are 0 if this is the fisrt step
col_mean, col_var, n_total_samples = \
_incremental_mean_and_var(X, last_mean=self.mean_,
last_variance=self.var_,
last_sample_count=self.n_samples_seen_)
total_var = np.sum(col_var * n_total_samples)
if total_var == 0: # if variance == 0, make no sense to continue
return self
# Whitening
if self.n_samples_seen_ == 0:
# If it is the first step, simply whiten X
X -= col_mean
else:
col_batch_mean = np.mean(X, axis=0)
X -= col_batch_mean
# Build matrix of combined previous basis and new data
mean_correction = \
np.sqrt((self.n_samples_seen_ * n_samples) /
n_total_samples) * (self.mean_ - col_batch_mean)
X = np.vstack((self.singular_values_.reshape((-1, 1)) *
self.components_, X, mean_correction))
U, S, V = linalg.svd(X, full_matrices=False)
U, V = svd_flip(U, V, u_based_decision=False)
explained_variance = S ** 2 / n_total_samples
explained_variance_ratio = S ** 2 / total_var
self.n_samples_seen_ = n_total_samples
self.components_ = V[:self.n_components_]
self.singular_values_ = S[:self.n_components_]
self.mean_ = col_mean
self.var_ = col_var
self.explained_variance_ = explained_variance[:self.n_components_]
self.explained_variance_ratio_ = \
explained_variance_ratio[:self.n_components_]
if self.n_components_ < n_features:
self.noise_variance_ = \
explained_variance[self.n_components_:].mean()
else:
self.noise_variance_ = 0.
return self
开发者ID:imito,项目名称:odin,代码行数:89,代码来源:decompositions.py
注:本文中的sklearn.utils.extmath._incremental_mean_and_var函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论