本文整理汇总了Python中sklearn.utils.check_arrays函数的典型用法代码示例。如果您正苦于以下问题:Python check_arrays函数的具体用法?Python check_arrays怎么用?Python check_arrays使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了check_arrays函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, X, y, n_classes, batch_size):
check_arrays(X, dtype=np.float32)
check_arrays(y, dtype=None)
self.X = X
self.y = y
self.n_classes = n_classes
self.batch_size = batch_size
开发者ID:dmollaaliod,项目名称:skflow,代码行数:7,代码来源:__init__.py
示例2: fit
def fit(self, X, y=None):
"""Fit the model to the data X.
Parameters
----------
X : {array-like, sparse matrix} shape (n_samples, n_features)
Training data.
Returns
-------
self : BernoulliRBM
The fitted model.
"""
X, = check_arrays(X, sparse_format='csr', dtype=np.float)
n_samples = X.shape[0]
rng = check_random_state(self.random_state)
self.components_ = np.asarray(
rng.normal(0, 0.01, (self.n_components, X.shape[1])),
order='fortran')
self.intercept_hidden_ = np.zeros(self.n_components, )
self.intercept_visible_ = np.zeros(X.shape[1], )
self.h_samples_ = np.zeros((self.batch_size, self.n_components))
n_batches = int(np.ceil(float(n_samples) / self.batch_size))
batch_slices = list(gen_even_slices(n_batches * self.batch_size,
n_batches, n_samples))
for iteration in xrange(1, self.n_iter + 1):
for batch_slice in batch_slices:
self._fit(X[batch_slice], rng)
return self
开发者ID:Markusjsommer,项目名称:classnotes,代码行数:32,代码来源:rbms.py
示例3: predict
def predict(self, X):
"""Predict regression target for X.
The predicted regression target of an input sample is computed as the
mean predicted regression targets of the estimators in the ensemble.
Parameters
----------
X : array-like of shape = [n_samples, n_features]
The input samples.
Returns
-------
y : array of shape = [n_samples]
The predicted values.
"""
# Check data
X, = check_arrays(X)
# Parallel loop
n_jobs, n_estimators, starts = _partition_estimators(self)
all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
delayed(_parallel_predict_regression)(
self.estimators_[starts[i]:starts[i + 1]],
self.estimators_features_[starts[i]:starts[i + 1]],
X)
for i in range(n_jobs))
# Reduce
y_hat = sum(all_y_hat) / self.n_estimators
return y_hat
开发者ID:orazaro,项目名称:kgml,代码行数:33,代码来源:bag.py
示例4: benchmark
def benchmark(clf, X, y, cv=None):
X, y = check_arrays(X, y, sparse_format='csr', allow_lists=True)
cv = check_cv(cv, X, y, classifier=is_classifier(clf))
# learning_curve_ = learning_curve(clf, X_all, y_all, cv=cv)
train_times = []
test_times = []
confusion_matrices = []
confusion_matrix_indices = []
coefs = []
for train, test in cv:
X_train, y_train = X[train], y[train]
X_test, y_test = X[test], y[test]
t0 = time()
clf.fit(X_train, y_train)
train_times.append(time()-t0)
t0 = time()
y_pred = clf.predict(X_test)
test_times.append(time()-t0)
confusion_matrices.append(confusion_matrix(y_test, y_pred))
confusion_matrix_indices.append(np.array([[test[pred] for pred in true] for true in confusion_matrix_instances(y_test, y_pred)]))
coefs.append(clf.coef_)
return dict(
train_times = np.array(train_times),
test_times = np.array(test_times),
confusion_matrices = np.array(confusion_matrices),
confusion_matrix_indices = np.array(confusion_matrix_indices),
coefs = np.array(coefs)
)
开发者ID:EdwardBetts,项目名称:twitter-sentiment,代码行数:35,代码来源:evaluation.py
示例5: mean_absolute_percentage_error
def mean_absolute_percentage_error(y_true, y_pred):
y_true, y_pred = check_arrays(y_true, y_pred)
## Note: does not handle mix 1d representation
#if _is_1d(y_true):
# y_true, y_pred = _check_1d_array(y_true, y_pred)
return round(np.mean(np.abs((y_true - y_pred) / y_true)) * 100, 2)
开发者ID:martin1,项目名称:thesis,代码行数:8,代码来源:mape.py
示例6: fit
def fit(self, X, y):
"""Fit MLP Classifier according to X, y
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples
and n_features is the number of features.
y : array-like, shape = [n_samples] or [n_samples, n_classes]
Target values. It determines the problem type.
*binary*
If y is a vector of integers with two unique values.
*multiclass*
If y is a vector of integers with three or more values
or if y is a two-dimensional array of integers and there exists only
one non-zero element per row.
*multiclass-multioutput*
If y is two-dimensional array of integers with two unique values
and there exists more than one non-zero element per row.
*continuous*
If y is a vector of floats.
*continuous-multioutput*
If y is a two-dimensional array of floats.
Returns
-------
self : object
Returns self.
"""
X, = check_arrays(X, sparse_format='dense')
n_samples, self.input_size_ = X.shape
y = np.atleast_1d(y)
self.type_of_target_ = type_of_target(y)
if self.verbose > 0:
print("The inferred type of y is %s" % self.type_of_target_)
if self.type_of_y != None:
if self.type_of_y != self.type_of_target_:
print("Passed type of y is %s, inferred type is %s"
% (self.type_of_y, self.type_of_target_))
raise("derp")
self.check_type_implemented()
y = self._get_output(y)
X, y = self._scale(X, y)
self._inst_mlp()
self._fit_mlp(X, y)
if self.dropout and self.type_of_target_ in ['continuous', 'continuous-multioutput']:
self._lineregress(X, y)
开发者ID:JakeMick,项目名称:graymatter,代码行数:57,代码来源:mlp.py
示例7: fit_all
def fit_all(self, X, y, n_shop, last_obs_plan):
# if not warmstart - clear the estimator state
if not self.warm_start:
self._clear_state()
# Check input
X, = check_arrays(X, dtype=DTYPE, sparse_format="dense")
y = column_or_1d(y, warn=True)
n_samples, n_features = X.shape
self.n_features = n_features
random_state = check_random_state(self.random_state)
self._check_params()
if not self._is_initialized():
if self.verbose:
print 'Initializing gradient boosting...'
# init state
self._init_state()
# fit initial model
if not self.fix_history:
idx = get_truncated_shopping_indices(n_shop)
else:
idx = np.arange(len(n_shop))
# init predictions by averaging over the shopping histories
y_pred = self.init_.predict(last_obs_plan[idx])
print 'First training accuracy:', accuracy_score(y, y_pred.argmax(axis=1))
begin_at_stage = 0
else:
# add more estimators to fitted model
# invariant: warm_start = True
if self.n_estimators < self.estimators_.shape[0]:
raise ValueError('n_estimators=%d must be larger or equal to '
'estimators_.shape[0]=%d when '
'warm_start==True'
% (self.n_estimators,
self.estimators_.shape[0]))
begin_at_stage = self.estimators_.shape[0]
y_pred = self.decision_function(X)
self._resize_state()
# fit the boosting stages
n_stages = self._fit_stages(X, y, y_pred, random_state, begin_at_stage, n_shop)
# change shape of arrays after fit (early-stopping or additional tests)
if n_stages != self.estimators_.shape[0]:
self.estimators_ = self.estimators_[:n_stages]
self.train_score_ = self.train_score_[:n_stages]
if hasattr(self, 'oob_improvement_'):
self.oob_improvement_ = self.oob_improvement_[:n_stages]
if hasattr(self, '_oob_score_'):
self._oob_score_ = self._oob_score_[:n_stages]
return self
开发者ID:brandonckelly,项目名称:allstate,代码行数:54,代码来源:boosted_truncated_history.py
示例8: fit
def fit(self, X, y):
self._validate()
X, y = check_arrays(X, y, sparse_format="csc")
n_samples, n_features = X.shape
# discretize continuous features
if np.issubdtype(X.dtype, float):
X_new = X.astype(np.int)
if np.any(X_new != X):
raise ValueError('X could not safely be converted to integers.'
' MRMR does not support continuous values.')
X = X_new
if np.issubdtype(y.dtype, float):
y_new = y.astype(np.int)
if np.any(y_new != y):
raise ValueError('y could not safely be converted to integers.'
' MRMR does not support continuous values.')
y = y_new
if self.k is None:
k = n_features // 2
else:
k = self.k
X_classes = np.array(list(set(X.reshape((n_samples * n_features,)))))
y_classes = np.array(list(set(y.reshape((n_samples,)))))
if len(X_classes) > self.warn_limit:
print('Warning: X contains {} discrete values. MRMR may'
' run slow'.format(len(X_classes)))
if len(y_classes) > self.warn_limit:
print('Warning: y contains {} discrete values. MRMR may'
' run slow'.format(len(y_classes)))
method = self.methods[self.method]
idxs, _ = _mrmr(n_samples, n_features, y.astype(np.long),
X.astype(np.long), y_classes.astype(np.long),
X_classes.astype(np.long), y_classes.shape[0],
X_classes.shape[0], k, method, self.normalize)
support_ = np.zeros(n_features, dtype=np.bool)
ranking_ = np.ones(n_features, dtype=np.int) + k
support_[idxs] = True
for i, idx in enumerate(idxs, start=1):
ranking_[idx] = i
self.n_features_ = support_.sum()
self.support_ = support_
self.ranking_ = ranking_
self.selected_ = np.argsort(self.ranking_)[:self.n_features_]
return self
开发者ID:kemaleren,项目名称:sklmrmr,代码行数:54,代码来源:mrmr.py
示例9: precision_recall_fscore_support
def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None):
y_true, y_pred = check_arrays(y_true, y_pred)
assert(beta > 0)
if labels is None:
labels = unique_labels(y_true, y_pred)
else:
labels = np.asarray(labels, dtype=np.int)
n_labels = labels.size
true_pos = np.zeros(n_labels, dtype=np.double)
false_pos = np.zeros(n_labels, dtype=np.double)
false_neg = np.zeros(n_labels, dtype=np.double)
support = np.zeros(n_labels, dtype=np.long)
for i, label_i in enumerate(labels):
true_pos[i] = np.sum(y_pred[y_true == label_i] == label_i)
false_pos[i] = np.sum(y_pred[y_true != label_i] == label_i)
false_neg[i] = np.sum(y_pred[y_true == label_i] != label_i)
support[i] = np.sum(y_true == label_i)
try:
# oddly, we may get an "invalid" rather than a "divide" error here
old_err_settings = np.seterr(divide='ignore', invalid='ignore')
# precision and recall
# Micro-averaging is used
precision = true_pos.sum() / (true_pos.sum() + false_pos.sum())
recall = true_pos.sum() / (true_pos.sum() + false_neg.sum())
# print false_pos
# print false_neg
# print false_pos.sum()
# print false_neg.sum()
# # handle division by 0.0 in precision and recall
# precision[(true_pos + false_pos) == 0.0] = 0.0
# recall[(true_pos + false_neg) == 0.0] = 0.0
# fbeta score
beta2 = beta ** 2
fscore = (1 + beta2) * (precision * recall) / (
beta2 * precision + recall)
# print (beta2 * precision + recall)
# handle division by 0.0 in fscore
if (precision + recall) == 0.0:
fscore = 0.0
# fscore[(precision + recall) == 0.0] = 0.0
finally:
np.seterr(**old_err_settings)
return precision, recall, fscore, support
开发者ID:YuanhaoSun,项目名称:PPLearn,代码行数:53,代码来源:ml_metrics.py
示例10: test_check_arrays
def test_check_arrays():
# check that error is raised on different length inputs
X = [0, 1]
Y = np.arange(3)
assert_raises(ValueError, check_arrays, X, Y)
# check error for sparse matrix and array
X = sp.csc_matrix(np.arange(4))
assert_raises(ValueError, check_arrays, X, Y)
# check they y=None pattern
X = [0, 1, 2]
X_, Y_, Z_ = check_arrays(X, Y, None)
assert_true(Z_ is None)
# check that lists are converted
X_, Y_ = check_arrays(X, Y)
assert_true(isinstance(X_, np.ndarray))
assert_true(isinstance(Y_, np.ndarray))
# check that Y was not copied:
assert_true(Y_ is Y)
# check copying
X_, Y_ = check_arrays(X, Y, copy=True)
assert_false(Y_ is Y)
# check forcing dtype
X_, Y_ = check_arrays(X, Y, dtype=np.int)
assert_equal(X_.dtype, np.int)
assert_equal(Y_.dtype, np.int)
X_, Y_ = check_arrays(X, Y, dtype=np.float)
assert_equal(X_.dtype, np.float)
assert_equal(Y_.dtype, np.float)
# test check_ccontiguous
Y = np.arange(6).reshape(3, 2).copy('F')
# if we don't specify it, it is not changed
X_, Y_ = check_arrays(X, Y)
assert_true(Y_.flags['F_CONTIGUOUS'])
assert_false(Y_.flags['C_CONTIGUOUS'])
X_, Y_ = check_arrays(X, Y, check_ccontiguous=True)
assert_true(Y_.flags['C_CONTIGUOUS'])
assert_false(Y_.flags['F_CONTIGUOUS'])
# check that lists are passed through if allow_lists is true
X_, Y_ = check_arrays(X, Y, allow_lists=True)
assert_true(isinstance(X_, list))
开发者ID:Big-Data,项目名称:scikit-learn,代码行数:50,代码来源:test_validation.py
示例11: predict_log_proba
def predict_log_proba(self, X):
"""Predict class log-probabilities for X.
The predicted class log-probabilities of an input sample is computed as
the log of the mean predicted class probabilities of the base
estimators in the ensemble.
Parameters
----------
X : array-like of shape = [n_samples, n_features]
The input samples.
Returns
-------
p : array of shape = [n_samples, n_classes]
The class log-probabilities of the input samples. Classes are
ordered by arithmetical order.
"""
if hasattr(self.base_estimator_, "predict_log_proba"):
# Check data
X, = check_arrays(X)
if self.n_features_ != X.shape[1]:
raise ValueError("Number of features of the model must "
"match the input. Model n_features is {0} "
"and input n_features is {1} "
"".format(self.n_features_, X.shape[1]))
# Parallel loop
n_jobs, n_estimators, starts = _partition_estimators(self)
all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)(
delayed(_parallel_predict_log_proba)(
self.estimators_[starts[i]:starts[i + 1]],
self.estimators_features_[starts[i]:starts[i + 1]],
X,
self.n_classes_)
for i in range(n_jobs))
# Reduce
log_proba = all_log_proba[0]
for j in range(1, len(all_log_proba)):
log_proba = logaddexp(log_proba, all_log_proba[j])
log_proba -= np.log(self.n_estimators)
return log_proba
else:
return np.log(self.predict_proba(X))
开发者ID:orazaro,项目名称:kgml,代码行数:51,代码来源:bag.py
示例12: fit
def fit(self, X, winnerTakeAll,plList, y=None,):
"""Fit the model to the data X.
Parameters
----------
X : {array-like, sparse matrix} shape (n_samples, n_features)
Training data.
Returns
-------
self : BernoulliRBM
The fitted model.
"""
X, = check_arrays(X, sparse_format='csr', dtype=np.float)
n_samples = X.shape[0]
rng = check_random_state(self.random_state)
self.components_ = np.asarray(
rng.normal(0, 0.001, (self.n_components, X.shape[1])),
order='fortran')
self.intercept_hidden_ = np.zeros(self.n_components, )
self.intercept_visible_ = np.zeros(X.shape[1], )
self.h_samples_ = np.zeros((self.batch_size, self.n_components))
n_batches = int(np.ceil(float(n_samples) / self.batch_size))
batch_slices = list(self.gen_even_slices(n_batches * self.batch_size,n_batches, n_samples))
verbose = self.verbose
for iteration in xrange(self.n_iter):
pl = 0.
if verbose:
begin = time.time()
batch_index = 0
for batch_slice in batch_slices:
if(batch_index + 1 != n_batches - 1):
#next_batch = batch_slice
next_h_pos_mean_hidden = self._mean_hiddens(X[batch_index + 1])
pl_batch = self._fit(X[batch_slice], rng,winnerTakeAll)
if verbose:
pl += pl_batch.sum()
#self.printOutWeight()
batch_index = batch_index + 1
if verbose:
pl /= n_samples
end = time.time()
print("Iteration %d, pseudo-likelihood = %.2f, time = %.2fs"
% (iteration, pl, end - begin))
plList[iteration] = pl
#self.printOutWeight()
return self
开发者ID:jiajunshen,项目名称:convRBM,代码行数:51,代码来源:scikitRBM.py
示例13: f_regression_cov
def f_regression_cov(X, y, C):
"""Univariate linear regression tests
Quick linear model for testing the effect of a single regressor,
sequentially for many regressors.
This is done in 3 steps:
1. the regressor of interest and the data are orthogonalized
wrt constant regressors
2. the cross correlation between data and regressors is computed
3. it is converted to an F score then to a p-value
Parameters
----------
X : {array-like, sparse matrix} shape = (n_samples, n_features)
The set of regressors that will tested sequentially.
y : array of shape(n_samples).
The data matrix
c : {array-like, sparse matrix} shape = (n_samples, n_covariates)
The set of covariates.
Returns
-------
F : array, shape=(n_features,)
F values of features.
pval : array, shape=(n_features,)
p-values of F-scores.
"""
X, C, y = check_arrays(X, C, y, dtype=np.float)
y = y.ravel()
assert C.shape[1] < C.shape[0]
cpinv = np.linalg.pinv(C)
X -= np.dot(C,(np.dot(cpinv, X)))
y -= np.dot(C,(np.dot(cpinv, y)))
# compute the correlation
corr = np.dot(y, X)
corr /= np.asarray(np.sqrt(safe_sqr(X).sum(axis=0))).ravel()
corr /= np.asarray(np.sqrt(safe_sqr(y).sum())).ravel()
# convert to p-value
dof = (X.shape[0] - 1 - C.shape[1]) / (1) #(df_fm / (df_rm - df_fm))
F = corr ** 2 / (1 - corr ** 2) * dof
pv = stats.f.sf(F, 1, dof)
return F, pv
开发者ID:xiaofeng007,项目名称:FaST-LMM,代码行数:51,代码来源:linear_regression.py
示例14: partial_fit
def partial_fit(self, X, y=None, weights=None):
"""Update k means estimate on a single mini-batch X.
Parameters
----------
X: array-like, shape = [n_samples, n_features]
Coordinates of the data points to cluster.
"""
X = check_arrays(X, sparse_format="csr", copy=False)[0]
n_samples, n_features = X.shape
if hasattr(self.init, '__array__'):
self.init = np.ascontiguousarray(self.init, dtype=np.float64)
if n_samples == 0:
return self
x_squared_norms = _squared_norms(X)
self.random_state_ = check_random_state(self.random_state)
if (not hasattr(self, 'counts_')
or not hasattr(self, 'cluster_centers_')):
# this is the first call partial_fit on this object:
# initialize the cluster centers
self.cluster_centers_ = _init_centroids(
X, self.n_clusters, self.init,
random_state=self.random_state_,
x_squared_norms=x_squared_norms, init_size=self.init_size, weights=weights)
self.initial_cluster_centers_ = self.cluster_centers_.copy()
self.counts_ = np.zeros(self.n_clusters, dtype=np.int32)
random_reassign = False
else:
# The lower the minimum count is, the more we do random
# reassignment, however, we don't want to do random
# reassignment too often, to allow for building up counts
random_reassign = self.random_state_.randint(
10 * (1 + self.counts_.min())) == 0
_mini_batch_step(X, x_squared_norms, self.cluster_centers_,
self.counts_, np.zeros(0, np.double), 0,
random_reassign=random_reassign,
random_state=self.random_state_,
reassignment_ratio=self.reassignment_ratio,
verbose=self.verbose, weights=weights, sphered=self.sphered)
if self.compute_labels:
self.labels_, self.inertia_ = _labels_inertia(
X, x_squared_norms, self.cluster_centers_, weights=weights)
return self
开发者ID:mthomure,项目名称:glimpse-project,代码行数:51,代码来源:kmeans.py
示例15: _check_clf_targets
def _check_clf_targets(y_true, y_pred):
"""Check that y_true and y_pred belong to the same classification task
This converts multiclass or binary types to a common shape, and raises a
ValueError for a mix of multilabel and multiclass targets, a mix of
multilabel formats, for the presence of continuous-valued or multioutput
targets, or for targets of different lengths.
Column vectors are squeezed to 1d.
Parameters
----------
y_true : array-like,
y_pred : array-like
Returns
-------
type_true : one of {'multilabel-indicator', 'multilabel-sequences', \
'multiclass', 'binary'}
The type of the true target data, as output by
``utils.multiclass.type_of_target``
y_true : array or indicator matrix or sequence of sequences
y_pred : array or indicator matrix or sequence of sequences
"""
y_true, y_pred = check_arrays(y_true, y_pred, allow_lists=True)
type_true = type_of_target(y_true)
type_pred = type_of_target(y_pred)
y_type = set([type_true, type_pred])
if y_type == set(["binary", "multiclass"]):
y_type = set(["multiclass"])
if len(y_type) > 1:
raise ValueError("Can't handle mix of {0} and {1}" "".format(type_true, type_pred))
# We can't have more than one value on y_type => The set is no more needed
y_type = y_type.pop()
# No metrics support "multiclass-multioutput" format
if y_type not in ["binary", "multiclass", "multilabel-indicator", "multilabel-sequences"]:
raise ValueError("{0} is not supported".format(y_type))
if y_type in ["binary", "multiclass"]:
y_true = column_or_1d(y_true)
y_pred = column_or_1d(y_pred)
return y_type, y_true, y_pred
开发者ID:DjalelBBZ,项目名称:SOS14_practical_session,代码行数:51,代码来源:SOS_tools.py
示例16: fit
def fit(self, X):
"""Fit the model to the data X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training Data
i
Returns
-------
self: convolutionRBM
The fitted Model.
"""
X, = check_arrays(X, sparse_format = 'csr', dtype = np.float)
n_samples = X.shape[0]
rng = check_random_state(self.random_state)
self.components_ = np.asarray(
rng.normal(0, 0.001, (self.n_groups,self.window_size * self.window_size)),order='fortran')
self.intercept_hidden_ = np.zeros(self.n_groups)
self.intercept_visible_ = 0
self.h_samples_ = np.zeros((self.batch_size, self.n_components))
n_batches = int(np.ceil(float(n_samples)/ self.batch_size))
batch_slices = list(self.gen_even_slices(n_batches * self.batch_size, n_batches, n_samples))
verbose = self.verbose
for iteration in xrange(1, self.n_iter + 1):
reconstructError = 0
for batch_slice in batch_slices:
if(not self.use_theano):
reconstructError += self._fit(X[batch_slice], rng)
else:
reconstructError += self._fit_theano(X[batch_slice],rng)
print "step:", iteration, "reconstruct Error: ", reconstructError
if verbose:
end = time.time()
print("[%s] Iteration %d, pseudo-likelihood = %.2f,"
" time = %.2fs"
% (type(self).__name__, iteration,
self.score_samples(X).mean(), end - begin))
begin = end
return self
开发者ID:jiajunshen,项目名称:convRBM,代码行数:50,代码来源:convRBM.py
示例17: _fit_transform
def _fit_transform(self, X):
self.nbrs_ = NearestNeighbors(self.n_neighbors,
algorithm=self.neighbors_algorithm)
random_state = check_random_state(self.random_state)
X, = check_arrays(X, sparse_format='dense')
self.nbrs_.fit(X)
self.embedding_, self.reconstruction_error_ = \
locally_linear_embedding(
self.nbrs_, self.n_neighbors, self.n_components,
eigen_solver=self.eigen_solver, tol=self.tol,
max_iter=self.max_iter, method=self.method,
hessian_tol=self.hessian_tol, modified_tol=self.modified_tol,
random_state=random_state)
开发者ID:bernease,项目名称:Mmani,代码行数:14,代码来源:locally_linear.py
示例18: cross_validate
def cross_validate(self, k=10):
"""Performs a k-fold cross validation of our training data.
Args:
k: The number of folds for cross validation.
"""
self.scores = []
X, y = check_arrays(self.feature_vector,
self.classification_vector,
sparse_format='csr')
cv = cross_validation.check_cv(
k, self.feature_vector, self.classification_vector,
classifier=True)
for train, test in cv:
self.classifier1.fit(self.feature_vector[train],
self.classification_vector[train])
self.classifier2.fit(self.feature_vector[train],
self.classification_vector[train])
self.classifier3.fit(self.feature_vector[train],
self.classification_vector[train])
classification1 = self.classifier1.predict(
self.feature_vector[test])
classification2 = self.classifier2.predict(
self.feature_vector[test])
classification3 = self.classifier3.predict(
self.feature_vector[test])
classification = []
for predictions in zip(classification1, classification2,
classification3):
neutral_count = predictions.count(0)
positive_count = predictions.count(1)
negative_count = predictions.count(-1)
if (neutral_count == negative_count and
negative_count == positive_count):
classification.append(predictions[0])
elif (neutral_count > positive_count and
neutral_count > negative_count):
classification.append(0)
elif (positive_count > neutral_count and
positive_count > negative_count):
classification.append(1)
elif (negative_count > neutral_count and
negative_count > positive_count):
classification.append(-1)
classification = numpy.array(classification)
self.scores.append(self.score_func(y[test], classification))
开发者ID:AlinaKay,项目名称:sentiment-analyzer,代码行数:50,代码来源:train.py
示例19: f_regression_nosparse
def f_regression_nosparse(X, y, center=True):
"""Univariate linear regression tests
Quick linear model for testing the effect of a single regressor,
sequentially for many regressors.
This is done in 3 steps:
1. the regressor of interest and the data are orthogonalized
with respect to constant regressors
2. the cross correlation between data and regressors is computed
3. it is converted to an F score then to a p-value
Parameters
----------
X : {array-like, sparse matrix} shape = (n_samples, n_features)
The set of regressors that will tested sequentially.
y : array of shape(n_samples).
The data matrix
center : True, bool,
If true, X and y will be centered.
Returns
-------
F : array, shape=(n_features,)
F values of features.
pval : array, shape=(n_features,)
p-values of F-scores.
"""
X, y = check_arrays(X, y, dtype=np.float)
y = y.ravel()
if center:
y = y - np.mean(y)
X = X.copy('F') # faster in fortran
X -= X.mean(axis=0)
# compute the correlation
corr = np.dot(y, X)
# XXX could use corr /= row_norms(X.T) here, but the test doesn't pass
corr /= np.asarray(np.sqrt((X ** 2).sum(axis=0))).ravel()
corr /= norm(y)
# convert to p-value
degrees_of_freedom = y.size - (2 if center else 1)
F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
pv = stats.f.sf(F, 1, degrees_of_freedom)
return F, pv
开发者ID:CandyPythonFlow,项目名称:nilearn,代码行数:49,代码来源:sklearn_f_regression_nosparse.py
示例20: transform
def transform(self, X):
"""Compute the hidden layer activation probabilities, P(h=1|v=X).
Parameters
----------
X : {array-like, sparse matrix} shape (n_samples, n_features)
The data to be transformed.
Returns
-------
h : array, shape (n_samples, n_components)
Latent representations of the data.
"""
X, = check_arrays(X, sparse_format='csr', dtype=np.float)
return self._mean_hiddens(X)
开发者ID:jiajunshen,项目名称:convRBM,代码行数:15,代码来源:convRBM.py
注:本文中的sklearn.utils.check_arrays函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论