本文整理汇总了Python中sklearn.utils.safe_indexing函数的典型用法代码示例。如果您正苦于以下问题:Python safe_indexing函数的具体用法?Python safe_indexing怎么用?Python safe_indexing使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了safe_indexing函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: sample_data
def sample_data(data, train_idx, test_idx):
sample = bunch.Bunch(train=bunch.Bunch(), test=bunch.Bunch(), target_names=None)
# sample.target_names = data.target_names
# sample.train.data = safe_indexing(data.train.data,train_idx)
sample.train.target = safe_indexing(data.train.target,train_idx)
sample.train.bow = safe_indexing(data.train.bow,train_idx)
sample.train.remaining = []
sample.train.validation = []
sample.train.revisit = []
sample.train.snippets=safe_indexing(data.train.snippets,train_idx)
sample.train.sizes=safe_indexing(data.train.sizes,train_idx)
sample.train.snippet_cost = safe_indexing(data.train.snippet_cost,train_idx)
if len(test_idx) > 0: #if there are test indexes
# sample.test.data = safe_indexing(data.train.target,test_idx)
sample.test.target = safe_indexing(data.train.target,test_idx)
sample.test.bow = safe_indexing(data.train.bow,train_idx)
sample.test.snippets=safe_indexing(data.train.snippets,train_idx)
sample.test.sizes=safe_indexing(data.train.sizes,train_idx)
sample.test.snippet_cost = safe_indexing(data.train.snippet_cost,train_idx)
else:
sample.test = data.test
return sample.train, sample.test
开发者ID:mramire8,项目名称:utility-based,代码行数:29,代码来源:experimentutils.py
示例2: _safe_split
def _safe_split(estimator, X, y, indices, train_indices=None):
"""Create subset of dataset and properly handle kernels."""
if hasattr(estimator, 'kernel') and callable(estimator.kernel):
# cannot compute the kernel values with custom function
raise ValueError("Cannot use a custom kernel function. "
"Precompute the kernel matrix instead.")
if not hasattr(X, "shape"):
if getattr(estimator, "_pairwise", False):
raise ValueError("Precomputed kernels or affinity matrices have "
"to be passed as arrays or sparse matrices.")
X_subset = [X[idx] for idx in indices]
else:
if getattr(estimator, "_pairwise", False):
# X is a precomputed square kernel matrix
if X.shape[0] != X.shape[1]:
raise ValueError("X should be a square kernel matrix")
if train_indices is None:
X_subset = X[np.ix_(indices, indices)]
else:
X_subset = X[np.ix_(indices, train_indices)]
else:
X_subset = safe_indexing(X, indices)
if y is not None:
y_subset = safe_indexing(y, indices)
else:
y_subset = None
return X_subset, y_subset
开发者ID:vene,项目名称:ambra,代码行数:30,代码来源:cross_validation.py
示例3: _safe_split
def _safe_split(depthmaps, offset_points_projected, direction_vectors, true_joints, indices):
depth_subset = safe_indexing(depthmaps, indices)
offsets_subset = safe_indexing(offset_points_projected, indices)
directions_subset = safe_indexing(direction_vectors, indices)
truths_subset = safe_indexing(true_joints, indices)
return depth_subset, offsets_subset, directions_subset, truths_subset
开发者ID:aoikaneko,项目名称:RandomTreeWalk,代码行数:7,代码来源:grid_search.py
示例4: _split_fit_score_trial
def _split_fit_score_trial(self, X, y, idx=0):
"""
Splits the dataset, fits a clone of the estimator, then scores it
according to the required metrics.
The index of the split is added to the random_state if the
random_state is not None; this ensures that every split is shuffled
differently but in a deterministic fashion for testing purposes.
"""
random_state = self.random_state
if random_state is not None:
random_state += idx
splitter = self._check_cv(self.cv, random_state)
for train_index, test_index in splitter.split(X, y):
# Safe indexing handles multiple types of inputs including
# DataFrames and structured arrays - required for generic splits.
X_train = safe_indexing(X, train_index)
y_train = safe_indexing(y, train_index)
X_test = safe_indexing(X, test_index)
y_test = safe_indexing(y, test_index)
model = clone(self.estimator)
model.fit(X_train, y_train)
if hasattr(model, "predict_proba"):
# Get the probabilities for the positive class
y_scores = model.predict_proba(X_test)[:,1]
else:
# Use the decision function to get the scores
y_scores = model.decision_function(X_test)
# Compute the curve metrics and thresholds
curve_metrics = precision_recall_curve(y_test, y_scores)
precision, recall, thresholds = curve_metrics
# Compute the F1 score from precision and recall
# Don't need to warn for F, precision/recall would have warned
with np.errstate(divide='ignore', invalid='ignore'):
beta = self.fbeta ** 2
f_score = ((1 + beta) * precision * recall /
(beta * precision + recall))
# Ensure thresholds ends at 1
thresholds = np.append(thresholds, 1)
# Compute the queue rate
queue_rate = np.array([
(y_scores >= threshold).mean()
for threshold in thresholds
])
yield {
'thresholds': thresholds,
'precision': precision,
'recall': recall,
'fscore': f_score,
'queue_rate': queue_rate
}
开发者ID:DistrictDataLabs,项目名称:yellowbrick,代码行数:60,代码来源:threshold.py
示例5: test_safe_indexing_mock_pandas
def test_safe_indexing_mock_pandas():
X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
X_df = MockDataFrame(X)
inds = np.array([1, 2])
X_df_indexed = safe_indexing(X_df, inds)
X_indexed = safe_indexing(X_df, inds)
assert_array_equal(np.array(X_df_indexed), X_indexed)
开发者ID:allefpablo,项目名称:scikit-learn,代码行数:7,代码来源:test_utils.py
示例6: _fit_resample
def _fit_resample(self, X, y):
random_state = check_random_state(self.random_state)
idx_under = np.empty((0, ), dtype=int)
for target_class in np.unique(y):
if target_class in self.sampling_strategy_.keys():
n_samples = self.sampling_strategy_[target_class]
index_target_class = random_state.choice(
range(np.count_nonzero(y == target_class)),
size=n_samples,
replace=self.replacement)
else:
index_target_class = slice(None)
idx_under = np.concatenate(
(idx_under,
np.flatnonzero(y == target_class)[index_target_class]),
axis=0)
if self.return_indices:
return (safe_indexing(X, idx_under), safe_indexing(y, idx_under),
idx_under)
else:
return safe_indexing(X, idx_under), safe_indexing(y, idx_under)
开发者ID:bodycat,项目名称:imbalanced-learn,代码行数:25,代码来源:_random_under_sampler.py
示例7: _fit_resample
def _fit_resample(self, X, y):
n_samples = X.shape[0]
# convert y to z_score
y_z = (y - y.mean()) / y.std()
index0 = np.arange(n_samples)
index_negative = index0[y_z > self.negative_thres]
index_positive = index0[y_z <= self.positive_thres]
index_unclassified = [x for x in index0
if x not in index_negative
and x not in index_positive]
y_z[index_negative] = 0
y_z[index_positive] = 1
y_z[index_unclassified] = -1
ros = RandomOverSampler(
sampling_strategy=self.sampling_strategy,
random_state=self.random_state,
ratio=self.ratio)
_, _ = ros.fit_resample(X, y_z)
sample_indices = ros.sample_indices_
print("Before sampler: %s. Total after: %s"
% (Counter(y_z), sample_indices.shape))
self.sample_indices_ = np.array(sample_indices)
if self.return_indices:
return (safe_indexing(X, sample_indices),
safe_indexing(y, sample_indices),
sample_indices)
return (safe_indexing(X, sample_indices),
safe_indexing(y, sample_indices))
开发者ID:bgruening,项目名称:galaxytools,代码行数:35,代码来源:preprocessors.py
示例8: test_safe_indexing
def test_safe_indexing():
X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
inds = np.array([1, 2])
X_inds = safe_indexing(X, inds)
X_arrays = safe_indexing(np.array(X), inds)
assert_array_equal(np.array(X_inds), X_arrays)
assert_array_equal(np.array(X_inds), np.array(X)[inds])
开发者ID:allefpablo,项目名称:scikit-learn,代码行数:7,代码来源:test_utils.py
示例9: generate_train_set
def generate_train_set(self, train_size=None, test_size=None, rand_state=None):
"""
:param test_size:
:param rand_state:
:param train_size: float or int (default=20)
If float, should be between 0.0 and 1.0 and represent the
proportion of the dataset to include in the train split. If
int, represents the absolute number of train samples.
:return:
"""
# self.probe.clear()
# self.gallery.clear()
if train_size is None and test_size is None:
self.probe.files_train, self.probe.files_test = [], self.probe.files
self.gallery.files_train, self.gallery.files_test = [], self.gallery.files
self.train_indexes, self.test_indexes = [], list(range(0, len(self.probe.files)))
else:
n_samples = len(self.probe.files)
cv = ShuffleSplit(n_samples, test_size=test_size, train_size=train_size, random_state=rand_state)
train_indexes, test_indexes = next(iter(cv))
arrays = [self.probe.files, self.gallery.files]
self.probe.files_train, self.probe.files_test, self.gallery.files_train, self.gallery.files_test = \
list(chain.from_iterable((safe_indexing(a, train_indexes),
safe_indexing(a, test_indexes)) for a in arrays))
self.train_indexes, self.test_indexes = train_indexes, test_indexes
self.train_size = len(self.train_indexes)
self.test_size = len(self.test_indexes)
开发者ID:AShedko,项目名称:PyReID,代码行数:32,代码来源:dataset.py
示例10: _sample
def _sample(self, X, y):
"""Resample the dataset.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Matrix containing the data which have to be sampled.
y : array-like, shape (n_samples,)
Corresponding label for each sample in X.
Returns
-------
X_resampled : {ndarray, sparse matrix}, shape \
(n_samples_new, n_features)
The array containing the resampled data.
y_resampled : ndarray, shape (n_samples_new,)
The corresponding label of `X_resampled`
"""
self._validate_estimator()
if self.voting == 'auto':
if sparse.issparse(X):
self.voting_ = 'hard'
else:
self.voting_ = 'soft'
else:
if self.voting in VOTING_KIND:
self.voting_ = self.voting
else:
raise ValueError("'voting' needs to be one of {}. Got {}"
" instead.".format(VOTING_KIND, self.voting))
X_resampled, y_resampled = [], []
for target_class in np.unique(y):
if target_class in self.ratio_.keys():
n_samples = self.ratio_[target_class]
self.estimator_.set_params(**{'n_clusters': n_samples})
self.estimator_.fit(X[y == target_class])
X_new, y_new = self._generate_sample(
X, y, self.estimator_.cluster_centers_, target_class)
X_resampled.append(X_new)
y_resampled.append(y_new)
else:
target_class_indices = np.flatnonzero(y == target_class)
X_resampled.append(safe_indexing(X, target_class_indices))
y_resampled.append(safe_indexing(y, target_class_indices))
if sparse.issparse(X):
X_resampled = sparse.vstack(X_resampled)
else:
X_resampled = np.vstack(X_resampled)
y_resampled = np.hstack(y_resampled)
return X_resampled, np.array(y_resampled)
开发者ID:glemaitre,项目名称:imbalanced-learn,代码行数:57,代码来源:cluster_centroids.py
示例11: test_safe_indexing_pandas
def test_safe_indexing_pandas():
try:
import pandas as pd
except ImportError:
raise SkipTest("Pandas not found")
X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
X_df = pd.DataFrame(X)
inds = np.array([1, 2])
X_df_indexed = safe_indexing(X_df, inds)
X_indexed = safe_indexing(X_df, inds)
assert_array_equal(np.array(X_df_indexed), X_indexed)
开发者ID:Flushot,项目名称:scikit-learn,代码行数:11,代码来源:test_utils.py
示例12: generator
def generator(X, y, sample_weight, indices, batch_size):
while True:
for index in range(0, len(indices), batch_size):
X_res = safe_indexing(X, indices[index:index + batch_size])
y_res = safe_indexing(y, indices[index:index + batch_size])
if issparse(X_res) and not keep_sparse:
X_res = X_res.toarray()
if sample_weight is None:
yield X_res, y_res
else:
sw_res = safe_indexing(sample_weight,
indices[index:index + batch_size])
yield X_res, y_res, sw_res
开发者ID:chkoar,项目名称:imbalanced-learn,代码行数:13,代码来源:_generator.py
示例13: _sample
def _sample(self, X, y):
"""Resample the dataset.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Matrix containing the data which have to be sampled.
y : array-like, shape (n_samples,)
Corresponding label for each sample in X.
Returns
-------
X_resampled : {ndarray, sparse matrix}, shape \
(n_samples_new, n_features)
The array containing the resampled data.
y_resampled : ndarray, shape (n_samples_new,)
The corresponding label of `X_resampled`
idx_under : ndarray, shape (n_samples, )
If `return_indices` is `True`, an array will be returned
containing a boolean for each sample to represent whether
that sample was selected or not.
"""
random_state = check_random_state(self.random_state)
idx_under = np.empty((0, ), dtype=int)
for target_class in np.unique(y):
if target_class in self.ratio_.keys():
n_samples = self.ratio_[target_class]
index_target_class = random_state.choice(
range(np.count_nonzero(y == target_class)),
size=n_samples,
replace=self.replacement)
else:
index_target_class = slice(None)
idx_under = np.concatenate(
(idx_under, np.flatnonzero(y == target_class)[
index_target_class]), axis=0)
if self.return_indices:
return (safe_indexing(X, idx_under), safe_indexing(y, idx_under),
idx_under)
else:
return safe_indexing(X, idx_under), safe_indexing(y, idx_under)
开发者ID:glemaitre,项目名称:imbalanced-learn,代码行数:49,代码来源:random_under_sampler.py
示例14: _sample
def _sample(self, X, y):
# FIXME: uncomment in version 0.6
# self._validate_estimator()
X_resampled = X.copy()
y_resampled = y.copy()
for class_sample, n_samples in self.sampling_strategy_.items():
if n_samples == 0:
continue
target_class_indices = np.flatnonzero(y == class_sample)
X_class = safe_indexing(X, target_class_indices)
self.nn_k_.fit(X_class)
nns = self.nn_k_.kneighbors(X_class, return_distance=False)[:, 1:]
X_new, y_new = self._make_samples(X_class, y.dtype, class_sample,
X_class, nns, n_samples, 1.0)
if sparse.issparse(X_new):
X_resampled = sparse.vstack([X_resampled, X_new])
sparse_func = 'tocsc' if X.format == 'csc' else 'tocsr'
X_resampled = getattr(X_resampled, sparse_func)()
else:
X_resampled = np.vstack((X_resampled, X_new))
y_resampled = np.hstack((y_resampled, y_new))
return X_resampled, y_resampled
开发者ID:scikit-learn-contrib,项目名称:imbalanced-learn,代码行数:27,代码来源:_smote.py
示例15: _index_param_value
def _index_param_value(X, v, indices):
"""Private helper function for parameter value indexing."""
if not _is_arraylike(v) or _num_samples(v) != _num_samples(X):
# pass through: skip indexing
return v
if sp.issparse(v):
v = v.tocsr()
return safe_indexing(v, indices)
开发者ID:Meyenhofer,项目名称:pattern-recognition-2016,代码行数:8,代码来源:_validation.py
示例16: _fit_resample
def _fit_resample(self, X, y):
# check for deprecated random_state
if self.random_state is not None:
deprecate_parameter(self, '0.4', 'random_state')
# Find the nearest neighbour of every point
nn = NearestNeighbors(n_neighbors=2, n_jobs=self.n_jobs)
nn.fit(X)
nns = nn.kneighbors(X, return_distance=False)[:, 1]
links = self.is_tomek(y, nns, self.sampling_strategy_)
idx_under = np.flatnonzero(np.logical_not(links))
if self.return_indices:
return (safe_indexing(X, idx_under), safe_indexing(y, idx_under),
idx_under)
else:
return (safe_indexing(X, idx_under), safe_indexing(y, idx_under))
开发者ID:bodycat,项目名称:imbalanced-learn,代码行数:18,代码来源:_tomek_links.py
示例17: extract_param
def extract_param(self, key, x, n):
if self.cache is not None and (n, key) in self.cache:
return self.cache[n, key]
out = safe_indexing(x, self.splits[n][0]) if _is_arraylike(x) else x
if self.cache is not None:
self.cache[n, key] = out
return out
开发者ID:dask,项目名称:dask-learn,代码行数:9,代码来源:methods.py
示例18: _sample
def _sample(self, X, y):
"""Resample the dataset.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
Matrix containing the data which have to be sampled.
y : array-like, shape (n_samples,)
Corresponding label for each sample in X.
Returns
-------
X_resampled : {ndarray, sparse matrix}, shape \
(n_samples_new, n_features)
The array containing the resampled data.
y_resampled : ndarray, shape (n_samples_new,)
The corresponding label of `X_resampled`
idx_under : ndarray, shape (n_samples, )
If `return_indices` is `True`, a boolean array will be returned
containing the which samples have been selected.
"""
# check for deprecated random_state
if self.random_state is not None:
deprecate_parameter(self, '0.4', 'random_state')
# Find the nearest neighbour of every point
nn = NearestNeighbors(n_neighbors=2, n_jobs=self.n_jobs)
nn.fit(X)
nns = nn.kneighbors(X, return_distance=False)[:, 1]
links = self.is_tomek(y, nns, self.ratio_)
idx_under = np.flatnonzero(np.logical_not(links))
if self.return_indices:
return (safe_indexing(X, idx_under),
safe_indexing(y, idx_under),
idx_under)
else:
return (safe_indexing(X, idx_under),
safe_indexing(y, idx_under))
开发者ID:glemaitre,项目名称:imbalanced-learn,代码行数:44,代码来源:tomek_links.py
示例19: __getitem__
def __getitem__(self, index):
X_resampled = safe_indexing(
self.X, self.indices_[index * self.batch_size:
(index + 1) * self.batch_size])
y_resampled = safe_indexing(
self.y, self.indices_[index * self.batch_size:
(index + 1) * self.batch_size])
if issparse(X_resampled) and not self.keep_sparse:
X_resampled = X_resampled.toarray()
if self.sample_weight is not None:
sample_weight_resampled = safe_indexing(
self.sample_weight,
self.indices_[index * self.batch_size:
(index + 1) * self.batch_size])
if self.sample_weight is None:
return X_resampled, y_resampled
else:
return X_resampled, y_resampled, sample_weight_resampled
开发者ID:bodycat,项目名称:imbalanced-learn,代码行数:19,代码来源:_generator.py
示例20: _extract
def _extract(self, X, y, n, is_x=True, is_train=True):
if self.cache is not None and (n, is_x, is_train) in self.cache:
return self.cache[n, is_x, is_train]
inds = self.splits[n][0] if is_train else self.splits[n][1]
result = safe_indexing(X if is_x else y, inds)
if self.cache is not None:
self.cache[n, is_x, is_train] = result
return result
开发者ID:dask,项目名称:dask-learn,代码行数:10,代码来源:methods.py
注:本文中的sklearn.utils.safe_indexing函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论