本文整理汇总了Python中sklearn.utils.testing.assert_greater_equal函数的典型用法代码示例。如果您正苦于以下问题:Python assert_greater_equal函数的具体用法?Python assert_greater_equal怎么用?Python assert_greater_equal使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了assert_greater_equal函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_bayesian_mixture_predict_predict_proba
def test_bayesian_mixture_predict_predict_proba():
# this is the same test as test_gaussian_mixture_predict_predict_proba()
rng = np.random.RandomState(0)
rand_data = RandomData(rng)
for prior_type in PRIOR_TYPE:
for covar_type in COVARIANCE_TYPE:
X = rand_data.X[covar_type]
Y = rand_data.Y
bgmm = BayesianGaussianMixture(
n_components=rand_data.n_components,
random_state=rng,
weight_concentration_prior_type=prior_type,
covariance_type=covar_type)
# Check a warning message arrive if we don't do fit
assert_raise_message(NotFittedError,
"This BayesianGaussianMixture instance"
" is not fitted yet. Call 'fit' with "
"appropriate arguments before using "
"this method.", bgmm.predict, X)
bgmm.fit(X)
Y_pred = bgmm.predict(X)
Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1)
assert_array_equal(Y_pred, Y_pred_proba)
assert_greater_equal(adjusted_rand_score(Y, Y_pred), .95)
开发者ID:mikebotazzo,项目名称:scikit-learn,代码行数:26,代码来源:test_bayesian_mixture.py
示例2: test_monotonic_likelihood
def test_monotonic_likelihood():
# We check that each step of the each step of variational inference without
# regularization improve monotonically the training set of the bound
rng = np.random.RandomState(0)
rand_data = RandomData(rng, scale=7)
n_components = rand_data.n_components
for covar_type in COVARIANCE_TYPE:
X = rand_data.X[covar_type]
bgmm = BayesianGaussianMixture(
n_components=2 * n_components,
covariance_type=covar_type,
warm_start=True,
max_iter=1,
random_state=rng,
tol=1e-4,
)
current_lower_bound = -np.infty
# Do one training iteration at a time so we can make sure that the
# training log likelihood increases after each iteration.
for _ in range(500):
prev_lower_bound = current_lower_bound
current_lower_bound = bgmm.fit(X).lower_bound_
assert_greater_equal(current_lower_bound, prev_lower_bound)
if bgmm.converged_:
break
assert bgmm.converged_
开发者ID:mannby,项目名称:scikit-learn,代码行数:28,代码来源:test_bayesian_mixture.py
示例3: test_min_weight_fraction_leaf
def test_min_weight_fraction_leaf():
"""Test if leaves contain at least min_weight_fraction_leaf of the
training set"""
X = np.asfortranarray(iris.data.astype(tree._tree.DTYPE))
y = iris.target
rng = np.random.RandomState(0)
weights = rng.rand(X.shape[0])
total_weight = np.sum(weights)
# test both DepthFirstTreeBuilder and BestFirstTreeBuilder
# by setting max_leaf_nodes
for max_leaf_nodes, name, frac in product((None, 1000),
ALL_TREES,
np.linspace(0, 0.5, 6)):
TreeEstimator = ALL_TREES[name]
est = TreeEstimator(min_weight_fraction_leaf=frac,
max_leaf_nodes=max_leaf_nodes,
random_state=0)
est.fit(X, y, sample_weight=weights)
out = est.tree_.apply(X)
node_weights = np.bincount(out, weights=weights)
# drop inner nodes
leaf_weights = node_weights[node_weights != 0]
assert_greater_equal(
np.min(leaf_weights),
total_weight * est.min_weight_fraction_leaf,
"Failed with {0} "
"min_weight_fraction_leaf={1}".format(
name, est.min_weight_fraction_leaf))
开发者ID:andosa,项目名称:scikit-learn,代码行数:29,代码来源:test_tree.py
示例4: check_min_weight_fraction_leaf
def check_min_weight_fraction_leaf(name, X, y):
"""Test if leaves contain at least min_weight_fraction_leaf of the
training set"""
ForestEstimator = FOREST_ESTIMATORS[name]
rng = np.random.RandomState(0)
weights = rng.rand(X.shape[0])
total_weight = np.sum(weights)
# test both DepthFirstTreeBuilder and BestFirstTreeBuilder
# by setting max_leaf_nodes
for max_leaf_nodes in (None, 1000):
for frac in np.linspace(0, 0.5, 6):
est = ForestEstimator(min_weight_fraction_leaf=frac,
max_leaf_nodes=max_leaf_nodes,
random_state=0)
if isinstance(est, (RandomForestClassifier,
RandomForestRegressor)):
est.bootstrap = False
est.fit(X, y, sample_weight=weights)
out = est.estimators_[0].tree_.apply(X)
node_weights = np.bincount(out, weights=weights)
# drop inner nodes
leaf_weights = node_weights[node_weights != 0]
assert_greater_equal(
np.min(leaf_weights),
total_weight * est.min_weight_fraction_leaf,
"Failed with {0} "
"min_weight_fraction_leaf={1}".format(
name, est.min_weight_fraction_leaf))
开发者ID:0x0all,项目名称:scikit-learn,代码行数:29,代码来源:test_forest.py
示例5: check_min_weight_fraction_leaf
def check_min_weight_fraction_leaf(name):
X, y = hastie_X, hastie_y
# Test if leaves contain at least min_weight_fraction_leaf of the
# training set
ForestEstimator = FOREST_ESTIMATORS[name]
rng = np.random.RandomState(0)
weights = rng.rand(X.shape[0])
total_weight = np.sum(weights)
# test both DepthFirstTreeBuilder and BestFirstTreeBuilder
# by setting max_leaf_nodes
for frac in np.linspace(0, 0.5, 6):
est = ForestEstimator(min_weight_fraction_leaf=frac, n_estimators=1,
random_state=0)
if "RandomForest" in name:
est.bootstrap = False
est.fit(X, y, sample_weight=weights)
out = est.estimators_[0].tree_.apply(X)
node_weights = np.bincount(out, weights=weights)
# drop inner nodes
leaf_weights = node_weights[node_weights != 0]
assert_greater_equal(
np.min(leaf_weights),
total_weight * est.min_weight_fraction_leaf,
"Failed with {0} "
"min_weight_fraction_leaf={1}".format(
name, est.min_weight_fraction_leaf))
开发者ID:henrywoo,项目名称:scikit-learn,代码行数:29,代码来源:test_forest.py
示例6: test_min_max_scaler_1d
def test_min_max_scaler_1d():
"""Test scaling of dataset along single axis"""
rng = np.random.RandomState(0)
X = rng.randn(5)
X_orig_copy = X.copy()
scaler = MinMaxScaler()
X_scaled = scaler.fit(X).transform(X)
assert_array_almost_equal(X_scaled.min(axis=0), 0.0)
assert_array_almost_equal(X_scaled.max(axis=0), 1.0)
# check inverse transform
X_scaled_back = scaler.inverse_transform(X_scaled)
assert_array_almost_equal(X_scaled_back, X_orig_copy)
# Test with 1D list
X = [0., 1., 2, 0.4, 1.]
scaler = MinMaxScaler()
X_scaled = scaler.fit(X).transform(X)
assert_array_almost_equal(X_scaled.min(axis=0), 0.0)
assert_array_almost_equal(X_scaled.max(axis=0), 1.0)
# Constant feature.
X = np.zeros(5)
scaler = MinMaxScaler()
X_scaled = scaler.fit(X).transform(X)
assert_greater_equal(X_scaled.min(), 0.)
assert_less_equal(X_scaled.max(), 1.)
开发者ID:0x0all,项目名称:scikit-learn,代码行数:28,代码来源:test_data.py
示例7: test_monotonic_likelihood
def test_monotonic_likelihood():
# We check that each step of the EM without regularization improve
# monotonically the training set likelihood
rng = np.random.RandomState(0)
rand_data = RandomData(rng, scale=7)
n_components = rand_data.n_components
for covar_type in COVARIANCE_TYPE:
X = rand_data.X[covar_type]
gmm = GaussianMixture(n_components=n_components,
covariance_type=covar_type, reg_covar=0,
warm_start=True, max_iter=1, random_state=rng,
tol=1e-7)
current_log_likelihood = -np.infty
with warnings.catch_warnings():
warnings.simplefilter("ignore", ConvergenceWarning)
# Do one training iteration at a time so we can make sure that the
# training log likelihood increases after each iteration.
for _ in range(600):
prev_log_likelihood = current_log_likelihood
try:
current_log_likelihood = gmm.fit(X).score(X)
except ConvergenceWarning:
pass
assert_greater_equal(current_log_likelihood,
prev_log_likelihood)
if gmm.converged_:
break
assert gmm.converged_
开发者ID:jerry-dumblauskas,项目名称:scikit-learn,代码行数:31,代码来源:test_gaussian_mixture.py
示例8: check_threshold
def check_threshold(birch_instance, threshold):
"""Use the leaf linked list for traversal"""
current_leaf = birch_instance.dummy_leaf_.next_leaf_
while current_leaf:
subclusters = current_leaf.subclusters_
for sc in subclusters:
assert_greater_equal(threshold, sc.radius)
current_leaf = current_leaf.next_leaf_
开发者ID:mbarnes1,项目名称:entity_resolution,代码行数:8,代码来源:test_birch.py
示例9: test_pearsonr_mat
def test_pearsonr_mat(self):
pear_mat = pearsonr_mat(self.mat)
assert_equal(pear_mat.shape, (10, 10))
pear_mat = pearsonr_mat(self.mat, self.w_mat)
assert_equal(pear_mat.shape, (10, 10))
assert_greater_equal(np.min(pear_mat), -1)
assert_less_equal(np.max(pear_mat), 1)
开发者ID:flaviassantos,项目名称:pyod,代码行数:9,代码来源:test_stat_models.py
示例10: test_calibration_multiclass
def test_calibration_multiclass():
"""Test calibration for multiclass """
# test multi-class setting with classifier that implements
# only decision function
clf = LinearSVC()
X, y_idx = make_blobs(n_samples=100, n_features=2, random_state=42,
centers=3, cluster_std=3.0)
# Use categorical labels to check that CalibratedClassifierCV supports
# them correctly
target_names = np.array(['a', 'b', 'c'])
y = target_names[y_idx]
X_train, y_train = X[::2], y[::2]
X_test, y_test = X[1::2], y[1::2]
clf.fit(X_train, y_train)
for method in ['isotonic', 'sigmoid']:
cal_clf = CalibratedClassifierCV(clf, method=method, cv=2)
cal_clf.fit(X_train, y_train)
probas = cal_clf.predict_proba(X_test)
assert_array_almost_equal(np.sum(probas, axis=1), np.ones(len(X_test)))
# Check that log-loss of calibrated classifier is smaller than
# log-loss of naively turned OvR decision function to probabilities
# via softmax
def softmax(y_pred):
e = np.exp(-y_pred)
return e / e.sum(axis=1).reshape(-1, 1)
uncalibrated_log_loss = \
log_loss(y_test, softmax(clf.decision_function(X_test)))
calibrated_log_loss = log_loss(y_test, probas)
assert_greater_equal(uncalibrated_log_loss, calibrated_log_loss)
# Test that calibration of a multiclass classifier decreases log-loss
# for RandomForestClassifier
X, y = make_blobs(n_samples=100, n_features=2, random_state=42,
cluster_std=3.0)
X_train, y_train = X[::2], y[::2]
X_test, y_test = X[1::2], y[1::2]
clf = RandomForestClassifier(n_estimators=10, random_state=42)
clf.fit(X_train, y_train)
clf_probs = clf.predict_proba(X_test)
loss = log_loss(y_test, clf_probs)
for method in ['isotonic', 'sigmoid']:
cal_clf = CalibratedClassifierCV(clf, method=method, cv=3)
cal_clf.fit(X_train, y_train)
cal_clf_probs = cal_clf.predict_proba(X_test)
cal_loss = log_loss(y_test, cal_clf_probs)
assert_greater(loss, cal_loss)
开发者ID:abecadel,项目名称:scikit-learn,代码行数:53,代码来源:test_calibration.py
示例11: test_multiple_init
def test_multiple_init():
# Test that multiple inits does not much worse than a single one
rng = np.random.RandomState(0)
n_samples, n_features, n_components = 50, 5, 2
X = rng.randn(n_samples, n_features)
for cv_type in COVARIANCE_TYPE:
train1 = GaussianMixture(n_components=n_components,
covariance_type=cv_type,
random_state=0).fit(X).score(X)
train2 = GaussianMixture(n_components=n_components,
covariance_type=cv_type,
random_state=0, n_init=5).fit(X).score(X)
assert_greater_equal(train2, train1)
开发者ID:jerry-dumblauskas,项目名称:scikit-learn,代码行数:13,代码来源:test_gaussian_mixture.py
示例12: test_lda_preplexity
def test_lda_preplexity():
"""
Test LDA preplexity for batch training
preplexity should be lower after each iteration
"""
n_topics, alpha, eta, X = _build_sparse_mtx()
lda_1 = OnlineLDA(n_topics=n_topics, alpha=alpha, eta=eta, random_state=0)
lda_2 = OnlineLDA(n_topics=n_topics, alpha=alpha, eta=eta, random_state=0)
distr_1 = lda_1.fit_transform(X, max_iters=1)
prep_1 = lda_1.preplexity(X, distr_1, sub_sampling=False)
distr_2 = lda_2.fit_transform(X, max_iters=10)
prep_2 = lda_2.preplexity(X, distr_2, sub_sampling=False)
assert_greater_equal(prep_1, prep_2)
开发者ID:joewandy,项目名称:topicModels,代码行数:15,代码来源:test.py
示例13: test_lda_score
def test_lda_score():
# Test LDA score for batch training
# score should be higher after each iteration
n_topics, X = _build_sparse_mtx()
for method in ('online', 'batch'):
lda_1 = LatentDirichletAllocation(n_topics=n_topics, max_iter=1, learning_method=method,
total_samples=100, random_state=0)
lda_2 = LatentDirichletAllocation(n_topics=n_topics, max_iter=10, learning_method=method,
total_samples=100, random_state=0)
lda_1.fit_transform(X)
score_1 = lda_1.score(X)
lda_2.fit_transform(X)
score_2 = lda_2.score(X)
assert_greater_equal(score_2, score_1)
开发者ID:andaag,项目名称:scikit-learn,代码行数:15,代码来源:test_online_lda.py
示例14: test_arpack_eigsh_initialization
def test_arpack_eigsh_initialization():
# Non-regression test that shows null-space computation is better with
# initialization of eigsh from [-1,1] instead of [0,1]
random_state = check_random_state(42)
A = random_state.rand(50, 50)
A = np.dot(A.T, A) # create s.p.d. matrix
A = graph_laplacian(A) + 1e-7 * np.identity(A.shape[0])
k = 5
# Test if eigsh is working correctly
# New initialization [-1,1] (as in original ARPACK)
# Was [0,1] before, with which this test could fail
v0 = random_state.uniform(-1,1, A.shape[0])
w, _ = eigsh(A, k=k, sigma=0.0, v0=v0)
# Eigenvalues of s.p.d. matrix should be nonnegative, w[0] is smallest
assert_greater_equal(w[0], 0)
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:18,代码来源:test_utils.py
示例15: test_select_fdr_regression
def test_select_fdr_regression():
# Test that fdr heuristic actually has low FDR.
def single_fdr(alpha, n_informative, random_state):
X, y = make_regression(
n_samples=150,
n_features=20,
n_informative=n_informative,
shuffle=False,
random_state=random_state,
noise=10,
)
with warnings.catch_warnings(record=True):
# Warnings can be raised when no features are selected
# (low alpha or very noisy data)
univariate_filter = SelectFdr(f_regression, alpha=alpha)
X_r = univariate_filter.fit(X, y).transform(X)
X_r2 = GenericUnivariateSelect(f_regression, mode="fdr", param=alpha).fit(X, y).transform(X)
assert_array_equal(X_r, X_r2)
support = univariate_filter.get_support()
num_false_positives = np.sum(support[n_informative:] == 1)
num_true_positives = np.sum(support[:n_informative] == 1)
if num_false_positives == 0:
return 0.0
false_discovery_rate = num_false_positives / (num_true_positives + num_false_positives)
return false_discovery_rate
for alpha in [0.001, 0.01, 0.1]:
for n_informative in [1, 5, 10]:
# As per Benjamini-Hochberg, the expected false discovery rate
# should be lower than alpha:
# FDR = E(FP / (TP + FP)) <= alpha
false_discovery_rate = np.mean(
[single_fdr(alpha, n_informative, random_state) for random_state in range(100)]
)
assert_greater_equal(alpha, false_discovery_rate)
# Make sure that the empirical false discovery rate increases
# with alpha:
if false_discovery_rate != 0:
assert_greater(false_discovery_rate, alpha / 10)
开发者ID:nelson-liu,项目名称:scikit-learn,代码行数:43,代码来源:test_feature_select.py
示例16: test_lda_perplexity
def test_lda_perplexity():
# Test LDA perplexity for batch training
# perplexity should be lower after each iteration
n_topics, X = _build_sparse_mtx()
for method in ('online', 'batch'):
lda_1 = LatentDirichletAllocation(n_topics=n_topics, max_iter=1, learning_method=method,
total_samples=100, random_state=0)
lda_2 = LatentDirichletAllocation(n_topics=n_topics, max_iter=10, learning_method=method,
total_samples=100, random_state=0)
distr_1 = lda_1.fit_transform(X)
perp_1 = lda_1.perplexity(X, distr_1, sub_sampling=False)
distr_2 = lda_2.fit_transform(X)
perp_2 = lda_2.perplexity(X, distr_2, sub_sampling=False)
assert_greater_equal(perp_1, perp_2)
perp_1_subsampling = lda_1.perplexity(X, distr_1, sub_sampling=True)
perp_2_subsampling = lda_2.perplexity(X, distr_2, sub_sampling=True)
assert_greater_equal(perp_1_subsampling, perp_2_subsampling)
开发者ID:andaag,项目名称:scikit-learn,代码行数:19,代码来源:test_online_lda.py
示例17: test_lda_perplexity
def test_lda_perplexity(method):
# Test LDA perplexity for batch training
# perplexity should be lower after each iteration
n_components, X = _build_sparse_mtx()
lda_1 = LatentDirichletAllocation(n_components=n_components,
max_iter=1, learning_method=method,
total_samples=100, random_state=0)
lda_2 = LatentDirichletAllocation(n_components=n_components,
max_iter=10, learning_method=method,
total_samples=100, random_state=0)
lda_1.fit(X)
perp_1 = lda_1.perplexity(X, sub_sampling=False)
lda_2.fit(X)
perp_2 = lda_2.perplexity(X, sub_sampling=False)
assert_greater_equal(perp_1, perp_2)
perp_1_subsampling = lda_1.perplexity(X, sub_sampling=True)
perp_2_subsampling = lda_2.perplexity(X, sub_sampling=True)
assert_greater_equal(perp_1_subsampling, perp_2_subsampling)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:20,代码来源:test_online_lda.py
示例18: check_min_weight_fraction_leaf
def check_min_weight_fraction_leaf(name, datasets, sparse=False):
"""Test if leaves contain at least min_weight_fraction_leaf of the
training set"""
if sparse:
X = DATASETS[datasets]["X_sparse"].astype(np.float32)
else:
X = DATASETS[datasets]["X"].astype(np.float32)
y = DATASETS[datasets]["y"]
weights = rng.rand(X.shape[0])
total_weight = np.sum(weights)
TreeEstimator = ALL_TREES[name]
# test both DepthFirstTreeBuilder and BestFirstTreeBuilder
# by setting max_leaf_nodes
for max_leaf_nodes, frac in product((None, 1000), np.linspace(0, 0.5, 6)):
est = TreeEstimator(min_weight_fraction_leaf=frac,
max_leaf_nodes=max_leaf_nodes,
random_state=0)
est.fit(X, y, sample_weight=weights)
if sparse:
out = est.tree_.apply(X.tocsr())
else:
out = est.tree_.apply(X)
node_weights = np.bincount(out, weights=weights)
# drop inner nodes
leaf_weights = node_weights[node_weights != 0]
assert_greater_equal(
np.min(leaf_weights),
total_weight * est.min_weight_fraction_leaf,
"Failed with {0} "
"min_weight_fraction_leaf={1}".format(
name, est.min_weight_fraction_leaf))
开发者ID:JeongSeonGyo,项目名称:EnergyData,代码行数:37,代码来源:test_tree.py
示例19: test_label_kfold
def test_label_kfold():
rng = np.random.RandomState(0)
# Parameters of the test
n_labels = 15
n_samples = 1000
n_folds = 5
# Construct the test data
tolerance = 0.05 * n_samples # 5 percent error allowed
labels = rng.randint(0, n_labels, n_samples)
folds = cval.LabelKFold(labels, n_folds=n_folds).idxs
ideal_n_labels_per_fold = n_samples // n_folds
# Check that folds have approximately the same size
assert_equal(len(folds), len(labels))
for i in np.unique(folds):
assert_greater_equal(tolerance,
abs(sum(folds == i) - ideal_n_labels_per_fold))
# Check that each label appears only in 1 fold
for label in np.unique(labels):
assert_equal(len(np.unique(folds[labels == label])), 1)
# Check that no label is on both sides of the split
labels = np.asarray(labels, dtype=object)
for train, test in cval.LabelKFold(labels, n_folds=n_folds):
assert_equal(len(np.intersect1d(labels[train], labels[test])), 0)
# Construct the test data
labels = ['Albert', 'Jean', 'Bertrand', 'Michel', 'Jean',
'Francis', 'Robert', 'Michel', 'Rachel', 'Lois',
'Michelle', 'Bernard', 'Marion', 'Laura', 'Jean',
'Rachel', 'Franck', 'John', 'Gael', 'Anna', 'Alix',
'Robert', 'Marion', 'David', 'Tony', 'Abel', 'Becky',
'Madmood', 'Cary', 'Mary', 'Alexandre', 'David', 'Francis',
'Barack', 'Abdoul', 'Rasha', 'Xi', 'Silvia']
labels = np.asarray(labels, dtype=object)
n_labels = len(np.unique(labels))
n_samples = len(labels)
n_folds = 5
tolerance = 0.05 * n_samples # 5 percent error allowed
folds = cval.LabelKFold(labels, n_folds=n_folds).idxs
ideal_n_labels_per_fold = n_samples // n_folds
# Check that folds have approximately the same size
assert_equal(len(folds), len(labels))
for i in np.unique(folds):
assert_greater_equal(tolerance,
abs(sum(folds == i) - ideal_n_labels_per_fold))
# Check that each label appears only in 1 fold
for label in np.unique(labels):
assert_equal(len(np.unique(folds[labels == label])), 1)
# Check that no label is on both sides of the split
for train, test in cval.LabelKFold(labels, n_folds=n_folds):
assert_equal(len(np.intersect1d(labels[train], labels[test])), 0)
# Should fail if there are more folds than labels
labels = np.array([1, 1, 1, 2, 2])
assert_raises(ValueError, cval.LabelKFold, labels, n_folds=3)
开发者ID:AppliedArtificialIntelligence,项目名称:scikit-learn,代码行数:63,代码来源:test_cross_validation.py
示例20: test_label_kfold
def test_label_kfold():
rng = np.random.RandomState(0)
# Parameters of the test
n_labels = 15
n_samples = 1000
n_folds = 5
X = y = np.ones(n_samples)
# Construct the test data
tolerance = 0.05 * n_samples # 5 percent error allowed
labels = rng.randint(0, n_labels, n_samples)
ideal_n_labels_per_fold = n_samples // n_folds
len(np.unique(labels))
# Get the test fold indices from the test set indices of each fold
folds = np.zeros(n_samples)
lkf = LabelKFold(n_folds=n_folds)
for i, (_, test) in enumerate(lkf.split(X, y, labels)):
folds[test] = i
# Check that folds have approximately the same size
assert_equal(len(folds), len(labels))
for i in np.unique(folds):
assert_greater_equal(tolerance,
abs(sum(folds == i) - ideal_n_labels_per_fold))
# Check that each label appears only in 1 fold
for label in np.unique(labels):
assert_equal(len(np.unique(folds[labels == label])), 1)
# Check that no label is on both sides of the split
labels = np.asarray(labels, dtype=object)
for train, test in lkf.split(X, y, labels):
assert_equal(len(np.intersect1d(labels[train], labels[test])), 0)
# Construct the test data
labels = np.array(['Albert', 'Jean', 'Bertrand', 'Michel', 'Jean',
'Francis', 'Robert', 'Michel', 'Rachel', 'Lois',
'Michelle', 'Bernard', 'Marion', 'Laura', 'Jean',
'Rachel', 'Franck', 'John', 'Gael', 'Anna', 'Alix',
'Robert', 'Marion', 'David', 'Tony', 'Abel', 'Becky',
'Madmood', 'Cary', 'Mary', 'Alexandre', 'David',
'Francis', 'Barack', 'Abdoul', 'Rasha', 'Xi', 'Silvia'])
n_labels = len(np.unique(labels))
n_samples = len(labels)
n_folds = 5
tolerance = 0.05 * n_samples # 5 percent error allowed
ideal_n_labels_per_fold = n_samples // n_folds
X = y = np.ones(n_samples)
# Get the test fold indices from the test set indices of each fold
folds = np.zeros(n_samples)
for i, (_, test) in enumerate(lkf.split(X, y, labels)):
folds[test] = i
# Check that folds have approximately the same size
assert_equal(len(folds), len(labels))
for i in np.unique(folds):
assert_greater_equal(tolerance,
abs(sum(folds == i) - ideal_n_labels_per_fold))
# Check that each label appears only in 1 fold
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
for label in np.unique(labels):
assert_equal(len(np.unique(folds[labels == label])), 1)
# Check that no label is on both sides of the split
labels = np.asarray(labels, dtype=object)
for train, test in lkf.split(X, y, labels):
assert_equal(len(np.intersect1d(labels[train], labels[test])), 0)
# Should fail if there are more folds than labels
labels = np.array([1, 1, 1, 2, 2])
X = y = np.ones(len(labels))
assert_raises_regexp(ValueError, "Cannot have number of folds.*greater",
next, LabelKFold(n_folds=3).split(X, y, labels))
开发者ID:absolutelyNoWarranty,项目名称:scikit-learn,代码行数:82,代码来源:test_split.py
注:本文中的sklearn.utils.testing.assert_greater_equal函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论