本文整理汇总了Python中sklearn.datasets.samples_generator.make_low_rank_matrix函数的典型用法代码示例。如果您正苦于以下问题:Python make_low_rank_matrix函数的具体用法?Python make_low_rank_matrix怎么用?Python make_low_rank_matrix使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了make_low_rank_matrix函数的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_randomized_svd_infinite_rank
def test_randomized_svd_infinite_rank():
"""Check that extmath.randomized_svd can handle noisy matrices"""
n_samples = 100
n_features = 500
rank = 5
k = 10
# let us try again without 'low_rank component': just regularly but slowly
# decreasing singular values: the rank of the data matrix is infinite
X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
effective_rank=rank, tail_strength=1.0,
random_state=0)
assert_equal(X.shape, (n_samples, n_features))
# compute the singular values of X using the slow exact method
_, s, _ = linalg.svd(X, full_matrices=False)
# compute the singular values of X using the fast approximate method
# without the iterated power method
_, sa, _ = randomized_svd(X, k, n_iter=0)
# the approximation does not tolerate the noise:
assert_greater(np.abs(s[:k] - sa).max(), 0.1)
# compute the singular values of X using the fast approximate method with
# iterated power method
_, sap, _ = randomized_svd(X, k, n_iter=5)
# the iterated power method is still managing to get most of the structure
# at the requested rank
assert_almost_equal(s[:k], sap, decimal=3)
开发者ID:93sam,项目名称:scikit-learn,代码行数:31,代码来源:test_extmath.py
示例2: bench_b
def bench_b(power_list):
n_samples, n_features = 1000, 10000
data_params = {'n_samples': n_samples, 'n_features': n_features,
'tail_strength': .7, 'random_state': random_state}
dataset_name = "low rank matrix %d x %d" % (n_samples, n_features)
ranks = [10, 50, 100]
if enable_spectral_norm:
all_spectral = defaultdict(list)
all_frobenius = defaultdict(list)
for rank in ranks:
X = make_low_rank_matrix(effective_rank=rank, **data_params)
if enable_spectral_norm:
X_spectral_norm = norm_diff(X, norm=2, msg=False)
X_fro_norm = norm_diff(X, norm='fro', msg=False)
for n_comp in [np.int(rank/2), rank, rank*2]:
label = "rank=%d, n_comp=%d" % (rank, n_comp)
print(label)
for pi in power_list:
U, s, V, _ = svd_timing(X, n_comp, n_iter=pi, n_oversamples=2,
power_iteration_normalizer='LU')
if enable_spectral_norm:
A = U.dot(np.diag(s).dot(V))
all_spectral[label].append(norm_diff(X - A, norm=2) /
X_spectral_norm)
f = scalable_frobenius_norm_discrepancy(X, U, s, V)
all_frobenius[label].append(f / X_fro_norm)
if enable_spectral_norm:
title = "%s: spectral norm diff vs n power iteration" % (dataset_name)
plot_power_iter_vs_s(power_iter, all_spectral, title)
title = "%s: frobenius norm diff vs n power iteration" % (dataset_name)
plot_power_iter_vs_s(power_iter, all_frobenius, title)
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:35,代码来源:bench_plot_randomized_svd.py
示例3: test_randomized_svd_transpose_consistency
def test_randomized_svd_transpose_consistency():
"""Check that transposing the design matrix has limit impact"""
n_samples = 100
n_features = 500
rank = 4
k = 10
X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
effective_rank=rank, tail_strength=0.5,
random_state=0)
assert_equal(X.shape, (n_samples, n_features))
U1, s1, V1 = randomized_svd(X, k, n_iter=3, transpose=False,
random_state=0)
U2, s2, V2 = randomized_svd(X, k, n_iter=3, transpose=True,
random_state=0)
U3, s3, V3 = randomized_svd(X, k, n_iter=3, transpose='auto',
random_state=0)
U4, s4, V4 = linalg.svd(X, full_matrices=False)
assert_almost_equal(s1, s4[:k], decimal=3)
assert_almost_equal(s2, s4[:k], decimal=3)
assert_almost_equal(s3, s4[:k], decimal=3)
assert_almost_equal(np.dot(U1, V1), np.dot(U4[:, :k], V4[:k, :]),
decimal=2)
assert_almost_equal(np.dot(U2, V2), np.dot(U4[:, :k], V4[:k, :]),
decimal=2)
# in this case 'auto' is equivalent to transpose
assert_almost_equal(s2, s3)
开发者ID:93sam,项目名称:scikit-learn,代码行数:31,代码来源:test_extmath.py
示例4: test_randomized_svd_power_iteration_normalizer
def test_randomized_svd_power_iteration_normalizer():
# randomized_svd with power_iteration_normalized='none' diverges for
# large number of power iterations on this dataset
rng = np.random.RandomState(42)
X = make_low_rank_matrix(100, 500, effective_rank=50, random_state=rng)
X += 3 * rng.randint(0, 2, size=X.shape)
n_components = 50
# Check that it diverges with many (non-normalized) power iterations
U, s, V = randomized_svd(X, n_components, n_iter=2,
power_iteration_normalizer='none')
A = X - U.dot(np.diag(s).dot(V))
error_2 = linalg.norm(A, ord='fro')
U, s, V = randomized_svd(X, n_components, n_iter=20,
power_iteration_normalizer='none')
A = X - U.dot(np.diag(s).dot(V))
error_20 = linalg.norm(A, ord='fro')
assert_greater(np.abs(error_2 - error_20), 100)
for normalizer in ['LU', 'QR', 'auto']:
U, s, V = randomized_svd(X, n_components, n_iter=2,
power_iteration_normalizer=normalizer,
random_state=0)
A = X - U.dot(np.diag(s).dot(V))
error_2 = linalg.norm(A, ord='fro')
for i in [5, 10, 50]:
U, s, V = randomized_svd(X, n_components, n_iter=i,
power_iteration_normalizer=normalizer,
random_state=0)
A = X - U.dot(np.diag(s).dot(V))
error = linalg.norm(A, ord='fro')
assert_greater(15, np.abs(error_2 - error))
开发者ID:BasilBeirouti,项目名称:scikit-learn,代码行数:33,代码来源:test_extmath.py
示例5: test_randomized_svd_low_rank_with_noise
def test_randomized_svd_low_rank_with_noise():
"""Check that extmath.randomized_svd can handle noisy matrices"""
n_samples = 100
n_features = 500
rank = 5
k = 10
# generate a matrix X wity structure approximate rank `rank` and an
# important noisy component
X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
effective_rank=rank, tail_strength=0.5,
random_state=0)
assert_equal(X.shape, (n_samples, n_features))
# compute the singular values of X using the slow exact method
_, s, _ = linalg.svd(X, full_matrices=False)
# compute the singular values of X using the fast approximate method
# without the iterated power method
_, sa, _ = randomized_svd(X, k, n_iter=0)
# the approximation does not tolerate the noise:
assert_greater(np.abs(s[:k] - sa).max(), 0.05)
# compute the singular values of X using the fast approximate method with
# iterated power method
_, sap, _ = randomized_svd(X, k, n_iter=5)
# the iterated power method is helping getting rid of the noise:
assert_almost_equal(s[:k], sap, decimal=3)
开发者ID:93sam,项目名称:scikit-learn,代码行数:30,代码来源:test_extmath.py
示例6: test_fast_svd_low_rank
def test_fast_svd_low_rank():
"""Check that extmath.fast_svd is consistent with linalg.svd"""
n_samples = 100
n_features = 500
rank = 5
k = 10
# generate a matrix X of approximate effective rank `rank` and no noise
# component (very structured signal):
X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
effective_rank=rank, tail_strength=0.0, random_state=0)
assert_equal(X.shape, (n_samples, n_features))
# compute the singular values of X using the slow exact method
U, s, V = linalg.svd(X, full_matrices=False)
# compute the singular values of X using the fast approximate method
Ua, sa, Va = fast_svd(X, k)
assert_equal(Ua.shape, (n_samples, k))
assert_equal(sa.shape, (k,))
assert_equal(Va.shape, (k, n_features))
# ensure that the singular values of both methods are equal up to the real
# rank of the matrix
assert_almost_equal(s[:k], sa)
# check the singular vectors too (while not checking the sign)
assert_almost_equal(np.dot(U[:, :k], V[:k, :]), np.dot(Ua, Va))
# check the sparse matrix representation
X = sparse.csr_matrix(X)
# compute the singular values of X using the fast approximate method
Ua, sa, Va = fast_svd(X, k)
assert_almost_equal(s[:rank], sa[:rank])
开发者ID:forkloop,项目名称:scikit-learn,代码行数:35,代码来源:test_svd.py
示例7: benchmark
def benchmark(samples_range, features_range, rank=50, tolerance=1e-5):
timeset = defaultdict(lambda: [])
err = defaultdict(lambda: [])
for n_samples in samples_range:
for n_features in features_range:
print("%2d samples, %2d features" % (n_samples, n_features))
print('=======================')
X = np.abs(make_low_rank_matrix(n_samples, n_features,
effective_rank=rank, tail_strength=0.2))
gc.collect()
print("benchmarking nndsvd-nmf: ")
tstart = time()
m = NMF(n_components=30, tol=tolerance, init='nndsvd').fit(X)
tend = time() - tstart
timeset['nndsvd-nmf'].append(tend)
err['nndsvd-nmf'].append(m.reconstruction_err_)
report(m.reconstruction_err_, tend)
gc.collect()
print("benchmarking nndsvda-nmf: ")
tstart = time()
m = NMF(n_components=30, init='nndsvda',
tol=tolerance).fit(X)
tend = time() - tstart
timeset['nndsvda-nmf'].append(tend)
err['nndsvda-nmf'].append(m.reconstruction_err_)
report(m.reconstruction_err_, tend)
gc.collect()
print("benchmarking nndsvdar-nmf: ")
tstart = time()
m = NMF(n_components=30, init='nndsvdar',
tol=tolerance).fit(X)
tend = time() - tstart
timeset['nndsvdar-nmf'].append(tend)
err['nndsvdar-nmf'].append(m.reconstruction_err_)
report(m.reconstruction_err_, tend)
gc.collect()
print("benchmarking random-nmf")
tstart = time()
m = NMF(n_components=30, init='random', max_iter=1000,
tol=tolerance).fit(X)
tend = time() - tstart
timeset['random-nmf'].append(tend)
err['random-nmf'].append(m.reconstruction_err_)
report(m.reconstruction_err_, tend)
gc.collect()
print("benchmarking alt-random-nmf")
tstart = time()
W, H = alt_nnmf(X, r=30, init='random', tol=tolerance)
tend = time() - tstart
timeset['alt-random-nmf'].append(tend)
err['alt-random-nmf'].append(np.linalg.norm(X - np.dot(W, H)))
report(norm(X - np.dot(W, H)), tend)
return timeset, err
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:60,代码来源:bench_plot_nmf.py
示例8: compute_bench
def compute_bench(samples_range, features_range, q=3, rank=50):
it = 0
results = defaultdict(lambda: [])
max_it = len(samples_range) * len(features_range)
for n_samples in samples_range:
for n_features in features_range:
it += 1
print '===================='
print 'Iteration %03d of %03d' % (it, max_it)
print '===================='
X = make_low_rank_matrix(n_samples, n_features, effective_rank=rank,
tail_strength=0.2)
gc.collect()
print "benching scipy svd: "
tstart = time()
svd(X, full_matrices=False)
results['scipy svd'].append(time() - tstart)
gc.collect()
print "benching scikit-learn fast_svd: q=0"
tstart = time()
fast_svd(X, rank, q=0)
results['scikit-learn fast_svd (q=0)'].append(time() - tstart)
gc.collect()
print "benching scikit-learn fast_svd: q=%d " % q
tstart = time()
fast_svd(X, rank, q=q)
results['scikit-learn fast_svd (q=%d)' % q].append(time() - tstart)
return results
开发者ID:Yangqing,项目名称:scikit-learn,代码行数:35,代码来源:bench_plot_svd.py
示例9: test_randomized_svd_sparse_warnings
def test_randomized_svd_sparse_warnings():
# randomized_svd throws a warning for lil and dok matrix
rng = np.random.RandomState(42)
X = make_low_rank_matrix(50, 20, effective_rank=10, random_state=rng)
n_components = 5
for cls in (sparse.lil_matrix, sparse.dok_matrix):
X = cls(X)
assert_warns_message(
sparse.SparseEfficiencyWarning,
"Calculating SVD of a {} is expensive. "
"csr_matrix is more efficient.".format(cls.__name__),
randomized_svd, X, n_components, n_iter=1,
power_iteration_normalizer='none')
开发者ID:aniryou,项目名称:scikit-learn,代码行数:13,代码来源:test_extmath.py
示例10: get_data
def get_data(dataset_name):
print("Getting dataset: %s" % dataset_name)
if dataset_name == 'lfw_people':
X = fetch_lfw_people().data
elif dataset_name == '20newsgroups':
X = fetch_20newsgroups_vectorized().data[:, :100000]
elif dataset_name == 'olivetti_faces':
X = fetch_olivetti_faces().data
elif dataset_name == 'rcv1':
X = fetch_rcv1().data
elif dataset_name == 'CIFAR':
if handle_missing_dataset(CIFAR_FOLDER) == "skip":
return
X1 = [unpickle("%sdata_batch_%d" % (CIFAR_FOLDER, i + 1))
for i in range(5)]
X = np.vstack(X1)
del X1
elif dataset_name == 'SVHN':
if handle_missing_dataset(SVHN_FOLDER) == 0:
return
X1 = sp.io.loadmat("%strain_32x32.mat" % SVHN_FOLDER)['X']
X2 = [X1[:, :, :, i].reshape(32 * 32 * 3) for i in range(X1.shape[3])]
X = np.vstack(X2)
del X1
del X2
elif dataset_name == 'low rank matrix':
X = make_low_rank_matrix(n_samples=500, n_features=np.int(1e4),
effective_rank=100, tail_strength=.5,
random_state=random_state)
elif dataset_name == 'uncorrelated matrix':
X, _ = make_sparse_uncorrelated(n_samples=500, n_features=10000,
random_state=random_state)
elif dataset_name == 'big sparse matrix':
sparsity = np.int(1e6)
size = np.int(1e6)
small_size = np.int(1e4)
data = np.random.normal(0, 1, np.int(sparsity/10))
data = np.repeat(data, 10)
row = np.random.uniform(0, small_size, sparsity)
col = np.random.uniform(0, small_size, sparsity)
X = sp.sparse.csr_matrix((data, (row, col)), shape=(size, small_size))
del data
del row
del col
else:
X = fetch_mldata(dataset_name).data
return X
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:48,代码来源:bench_plot_randomized_svd.py
示例11: compute_bench
def compute_bench(samples_range, features_range, n_iter=3, rank=50):
it = 0
results = defaultdict(lambda: [])
max_it = len(samples_range) * len(features_range)
for n_samples in samples_range:
for n_features in features_range:
it += 1
print('====================')
print('Iteration %03d of %03d' % (it, max_it))
print('====================')
X = make_low_rank_matrix(n_samples, n_features,
effective_rank=rank,
tail_strength=0.2)
gc.collect()
print("benchmarking scipy svd: ")
tstart = time()
svd(X, full_matrices=False)
results['scipy svd'].append(time() - tstart)
gc.collect()
print("benchmarking scikit-learn randomized_svd: n_iter=0")
tstart = time()
randomized_svd(X, rank, n_iter=0)
results['scikit-learn randomized_svd (n_iter=0)'].append(
time() - tstart)
gc.collect()
print("benchmarking scikit-learn randomized_svd: n_iter=%d "
% n_iter)
tstart = time()
randomized_svd(X, rank, n_iter=n_iter)
results['scikit-learn randomized_svd (n_iter=%d)'
% n_iter].append(time() - tstart)
return results
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:39,代码来源:bench_plot_svd.py
示例12: check_randomized_svd_low_rank
def check_randomized_svd_low_rank(dtype):
# Check that extmath.randomized_svd is consistent with linalg.svd
n_samples = 100
n_features = 500
rank = 5
k = 10
decimal = 5 if dtype == np.float32 else 7
dtype = np.dtype(dtype)
# generate a matrix X of approximate effective rank `rank` and no noise
# component (very structured signal):
X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
effective_rank=rank, tail_strength=0.0,
random_state=0).astype(dtype, copy=False)
assert_equal(X.shape, (n_samples, n_features))
# compute the singular values of X using the slow exact method
U, s, V = linalg.svd(X, full_matrices=False)
# Convert the singular values to the specific dtype
U = U.astype(dtype, copy=False)
s = s.astype(dtype, copy=False)
V = V.astype(dtype, copy=False)
for normalizer in ['auto', 'LU', 'QR']: # 'none' would not be stable
# compute the singular values of X using the fast approximate method
Ua, sa, Va = randomized_svd(
X, k, power_iteration_normalizer=normalizer, random_state=0)
# If the input dtype is float, then the output dtype is float of the
# same bit size (f32 is not upcast to f64)
# But if the input dtype is int, the output dtype is float64
if dtype.kind == 'f':
assert Ua.dtype == dtype
assert sa.dtype == dtype
assert Va.dtype == dtype
else:
assert Ua.dtype == np.float64
assert sa.dtype == np.float64
assert Va.dtype == np.float64
assert_equal(Ua.shape, (n_samples, k))
assert_equal(sa.shape, (k,))
assert_equal(Va.shape, (k, n_features))
# ensure that the singular values of both methods are equal up to the
# real rank of the matrix
assert_almost_equal(s[:k], sa, decimal=decimal)
# check the singular vectors too (while not checking the sign)
assert_almost_equal(np.dot(U[:, :k], V[:k, :]), np.dot(Ua, Va),
decimal=decimal)
# check the sparse matrix representation
X = sparse.csr_matrix(X)
# compute the singular values of X using the fast approximate method
Ua, sa, Va = \
randomized_svd(X, k, power_iteration_normalizer=normalizer,
random_state=0)
if dtype.kind == 'f':
assert Ua.dtype == dtype
assert sa.dtype == dtype
assert Va.dtype == dtype
else:
assert Ua.dtype.kind == 'f'
assert sa.dtype.kind == 'f'
assert Va.dtype.kind == 'f'
assert_almost_equal(s[:rank], sa[:rank], decimal=decimal)
开发者ID:BasilBeirouti,项目名称:scikit-learn,代码行数:70,代码来源:test_extmath.py
示例13: compute_bench
def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
it = 0
timeset = defaultdict(lambda: [])
err = defaultdict(lambda: [])
max_it = len(samples_range) * len(features_range)
for n_samples in samples_range:
for n_features in features_range:
it += 1
print('====================')
print('Iteration %03d of %03d' % (it, max_it))
print('====================')
X = np.abs(make_low_rank_matrix(n_samples, n_features,
effective_rank=rank, tail_strength=0.2))
gc.collect()
print("benching nndsvd-nmf: ")
tstart = time()
m = NMF(n_components=30, tol=tolerance, init='nndsvd').fit(X)
tend = time() - tstart
timeset['nndsvd-nmf'].append(tend)
err['nndsvd-nmf'].append(m.reconstruction_err_)
print(m.reconstruction_err_, tend)
gc.collect()
print("benching nndsvda-nmf: ")
tstart = time()
m = NMF(n_components=30, init='nndsvda',
tol=tolerance).fit(X)
tend = time() - tstart
timeset['nndsvda-nmf'].append(tend)
err['nndsvda-nmf'].append(m.reconstruction_err_)
print(m.reconstruction_err_, tend)
gc.collect()
print("benching nndsvdar-nmf: ")
tstart = time()
m = NMF(n_components=30, init='nndsvdar',
tol=tolerance).fit(X)
tend = time() - tstart
timeset['nndsvdar-nmf'].append(tend)
err['nndsvdar-nmf'].append(m.reconstruction_err_)
print(m.reconstruction_err_, tend)
gc.collect()
print("benching random-nmf")
tstart = time()
m = NMF(n_components=30, init=None, max_iter=1000,
tol=tolerance).fit(X)
tend = time() - tstart
timeset['random-nmf'].append(tend)
err['random-nmf'].append(m.reconstruction_err_)
print(m.reconstruction_err_, tend)
gc.collect()
print("benching alt-random-nmf")
tstart = time()
W, H = alt_nnmf(X, r=30, R=None, tol=tolerance)
tend = time() - tstart
timeset['alt-random-nmf'].append(tend)
err['alt-random-nmf'].append(np.linalg.norm(X - np.dot(W, H)))
print(np.linalg.norm(X - np.dot(W, H)), tend)
return timeset, err
开发者ID:Calvin-O,项目名称:scikit-learn,代码行数:64,代码来源:bench_plot_nmf.py
注:本文中的sklearn.datasets.samples_generator.make_low_rank_matrix函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论