本文整理汇总了Python中sklearn.preprocessing.Scaler类的典型用法代码示例。如果您正苦于以下问题:Python Scaler类的具体用法?Python Scaler怎么用?Python Scaler使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Scaler类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: run_svm
def run_svm(svc,X):
X = X.copy()
scaler = Scaler()
X = scaler.fit_transform(X)
y_predict = svc.predict(X)
return y_predict
开发者ID:ajrichards,项目名称:cytostream,代码行数:7,代码来源:SupervisedLearning.py
示例2: test_scaler_1d
def test_scaler_1d():
"""Test scaling of dataset along single axis"""
rng = np.random.RandomState(0)
X = rng.randn(5)
X_orig_copy = X.copy()
scaler = Scaler()
X_scaled = scaler.fit(X).transform(X, copy=False)
assert_array_almost_equal(X_scaled.mean(axis=0), 0.0)
assert_array_almost_equal(X_scaled.std(axis=0), 1.0)
# check inverse transform
X_scaled_back = scaler.inverse_transform(X_scaled)
assert_array_almost_equal(X_scaled_back, X_orig_copy)
# Test with 1D list
X = [0., 1., 2, 0.4, 1.]
scaler = Scaler()
X_scaled = scaler.fit(X).transform(X, copy=False)
assert_array_almost_equal(X_scaled.mean(axis=0), 0.0)
assert_array_almost_equal(X_scaled.std(axis=0), 1.0)
X_scaled = scale(X)
assert_array_almost_equal(X_scaled.mean(axis=0), 0.0)
assert_array_almost_equal(X_scaled.std(axis=0), 1.0)
开发者ID:AlexLerman,项目名称:scikit-learn,代码行数:25,代码来源:test_preprocessing.py
示例3: test_scaler_without_centering
def test_scaler_without_centering():
rng = np.random.RandomState(42)
X = rng.randn(4, 5)
X[:, 0] = 0.0 # first feature is always of zero
scaler = Scaler(with_mean=False)
X_scaled = scaler.fit(X).transform(X, copy=True)
assert not np.any(np.isnan(X_scaled))
assert_array_almost_equal(
X_scaled.mean(axis=0), [0., -0.01, 2.24, -0.35, -0.78], 2)
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
# Check that X has not been copied
assert X_scaled is not X
X_scaled_back = scaler.inverse_transform(X_scaled)
assert X_scaled_back is not X
assert X_scaled_back is not X_scaled
assert_array_almost_equal(X_scaled_back, X)
X_scaled = scale(X, with_mean=False)
assert not np.any(np.isnan(X_scaled))
assert_array_almost_equal(
X_scaled.mean(axis=0), [0., -0.01, 2.24, -0.35, -0.78], 2)
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
# Check that X has not been copied
assert X_scaled is not X
X_scaled_back = scaler.inverse_transform(X_scaled)
assert X_scaled_back is not X
assert X_scaled_back is not X_scaled
assert_array_almost_equal(X_scaled_back, X)
开发者ID:Yangqing,项目名称:scikit-learn,代码行数:33,代码来源:test_preprocessing.py
示例4: _pre_fit
def _pre_fit(self, X, y):
random_state = check_random_state(self.random_state)
if self.scale_y:
self.y_scaler_ = Scaler(copy=True).fit(y)
y = self.y_scaler_.transform(y)
if self.metric == "precomputed":
self.components_ = None
n_components = X.shape[1]
else:
if self.init_components is None:
if self.verbose: print "Selecting components..."
self.components_ = select_components(X, y,
self.n_components,
random_state=random_state)
else:
self.components_ = self.init_components
n_components = self.components_.shape[0]
n_nonzero_coefs = self.n_nonzero_coefs
if 0 < n_nonzero_coefs and n_nonzero_coefs <= 1:
n_nonzero_coefs = int(n_nonzero_coefs * n_components)
n_nonzero_coefs = int(n_nonzero_coefs)
if n_nonzero_coefs > n_components:
raise AttributeError("n_nonzero_coefs cannot be bigger than "
"n_components.")
if self.verbose: print "Computing dictionary..."
start = time.time()
K = pairwise_kernels(X, self.components_, metric=self.metric,
filter_params=True, n_jobs=self.n_jobs,
**self._kernel_params())
if self.verbose: print "Done in", time.time() - start, "seconds"
if self.scale:
if self.verbose: print "Scaling dictionary"
start = time.time()
copy = True if self.metric == "precomputed" else False
self.scaler_ = Scaler(copy=copy)
K = self.scaler_.fit_transform(K)
if self.verbose: print "Done in", time.time() - start, "seconds"
# FIXME: this allocates a lot of intermediary memory
norms = np.sqrt(np.sum(K ** 2, axis=0))
return n_nonzero_coefs, K, y, norms
开发者ID:nagyistge,项目名称:lightning,代码行数:50,代码来源:kmp.py
示例5: test_scale_sparse_with_mean_raise_exception
def test_scale_sparse_with_mean_raise_exception():
rng = np.random.RandomState(42)
X = rng.randn(4, 5)
X_csr = sp.csr_matrix(X)
# check scaling and fit with direct calls on sparse data
assert_raises(ValueError, scale, X_csr, with_mean=True)
assert_raises(ValueError, Scaler(with_mean=True).fit, X_csr)
# check transform and inverse_transform after a fit on a dense array
scaler = Scaler(with_mean=True).fit(X)
assert_raises(ValueError, scaler.transform, X_csr)
X_transformed_csr = sp.csr_matrix(scaler.transform(X))
assert_raises(ValueError, scaler.inverse_transform, X_transformed_csr)
开发者ID:AlexLerman,项目名称:scikit-learn,代码行数:15,代码来源:test_preprocessing.py
示例6: data_to_kernels
def data_to_kernels(tr_data, te_data):
scaler = Scaler(copy=False)
scaler.fit_transform(tr_data)
#tr_data, mu, sigma = standardize(tr_data)
tr_data = power_normalize(tr_data, 0.5)
tr_data = L2_normalize(tr_data)
#te_data, _, _ = standardize(te_data, mu, sigma)
scaler.transform(te_data)
te_data = power_normalize(te_data, 0.5)
te_data = L2_normalize(te_data)
tr_kernel = np.dot(tr_data, tr_data.T)
te_kernel = np.dot(te_data, tr_data.T)
return tr_kernel, te_kernel
开发者ID:danoneata,项目名称:test,代码行数:16,代码来源:per_video.py
示例7: process_data
def process_data(self):
test = pandas.read_csv("test.csv")
testMat = test.as_matrix()
train = pandas.read_csv("train.csv")
trainMat = train.as_matrix()
trainResult = trainMat[:, 0]
trainMat = trainMat[:, 1:]
# trainInd = np.where(trainResult == 0)[0]
# how_many = (trainResult == 1).sum() - len(trainInd)
# np.random.shuffle(trainInd)
# addedResult = trainResult[trainInd[:how_many],:]
# addedData = trainMat[trainInd[:how_many],:]
# trainResult = np.append(trainResult,addedResult)
# trainMat = np.vstack((trainMat,addedData))
cv = StratifiedKFold(trainResult, 2)
# cv = KFold(n=trainResult.shape[0],k=2)
reduceFeatures = ExtraTreesClassifier(
compute_importances=True, random_state=1234, n_jobs=self.cpus, n_estimators=1000, criterion="gini"
)
reduceFeatures.fit(trainMat, trainResult)
trainScaler = Scaler()
self.cv_data = []
self.cv_data_nonreduced = []
for train, test in cv:
X_train, X_test, Y_train, Y_test = (
trainMat[train, :],
trainMat[test, :],
trainResult[train, :],
trainResult[test, :],
)
X_train = trainScaler.fit_transform(X_train)
X_test = trainScaler.transform(X_test)
self.cv_data_nonreduced.append((X_train, X_test, Y_train, Y_test))
X_train = reduceFeatures.transform(X_train)
X_test = reduceFeatures.transform(X_test)
self.cv_data.append((X_train, X_test, Y_train, Y_test))
testMat = trainScaler.transform(testMat)
self.testMat_nonreduced = testMat
self.testMat = reduceFeatures.transform(testMat)
allData = self.testMat, self.cv_data, self.testMat_nonreduced, self.cv_data_nonreduced
data_handle = open("allData.pkl", "w")
pickle.dump(allData, data_handle)
data_handle.close()
开发者ID:JakeMick,项目名称:kaggle,代码行数:47,代码来源:holistic.py
示例8: get_sl_test_data
def get_sl_test_data(fileEvents,fileLabels,includedChannels,useMeans=False,parentIndices=None):
## declare variables
X = fileEvents[:,includedChannels].copy()
scaler = Scaler()
X = scaler.fit_transform(X)
#if parentIndices != None:
# X = X[parentIndices,:]
#X = (X - X.mean(axis=0)) / X.std(axis=0)
if useMeans == True:
clusterIds,X = get_mean_matrix(X,fileLabels)
#X = (X - X.mean(axis=0)) / X.std(axis=0)
return clusterIds,X
return X
开发者ID:ajrichards,项目名称:cytostream,代码行数:17,代码来源:SupervisedLearning.py
示例9: test_center_kernel
def test_center_kernel():
"""Test that KernelCenterer is equivalent to Scaler in feature space"""
X_fit = np.random.random((5, 4))
scaler = Scaler(with_std=False)
scaler.fit(X_fit)
X_fit_centered = scaler.transform(X_fit)
K_fit = np.dot(X_fit, X_fit.T)
# center fit time matrix
centerer = KernelCenterer()
K_fit_centered = np.dot(X_fit_centered, X_fit_centered.T)
K_fit_centered2 = centerer.fit_transform(K_fit)
assert_array_almost_equal(K_fit_centered, K_fit_centered2)
# center predict time matrix
X_pred = np.random.random((2, 4))
K_pred = np.dot(X_pred, X_fit.T)
X_pred_centered = scaler.transform(X_pred)
K_pred_centered = np.dot(X_pred_centered, X_fit_centered.T)
K_pred_centered2 = centerer.transform(K_pred)
assert_array_almost_equal(K_pred_centered, K_pred_centered2)
开发者ID:Yangqing,项目名称:scikit-learn,代码行数:21,代码来源:test_preprocessing.py
示例10: run_svm_validation
def run_svm_validation(X1,y1,X2,y2,gammaRange=[0.5],cRange=[0.005],useLinear=False):
#X_train,y_train,X_test,y_test = split_train_test(X1,y1,X2,y2)
X = np.vstack((X1, X2))
Y = np.hstack((y1, y2))
scaler = Scaler()
X = scaler.fit_transform(X)
#if useLinear == True:
# svc = svm.SVC(kernel='linear')#class_weight={1: 10
# # # #svc = svm.SVC(kernel='poly',degree=3,C=1.0)
# svc.fit(X, Y)
# return svc
C_range = 10.0 ** np.arange(-2, 9)
gamma_range = 10.0 ** np.arange(-5, 4)
param_grid = dict(gamma=gamma_range, C=C_range)
grid = GridSearchCV(SVC(class_weight={1: 100}), param_grid=param_grid, cv=StratifiedKFold(y=Y,k=2))
grid.fit(X, Y)
print("The best classifier is: ", grid.best_estimator_)
return grid.best_estimator_
开发者ID:ajrichards,项目名称:cytostream,代码行数:24,代码来源:SupervisedLearning.py
示例11: test_scaler_2d_arrays
def test_scaler_2d_arrays():
"""Test scaling of 2d array along first axis"""
rng = np.random.RandomState(0)
X = rng.randn(4, 5)
X[:, 0] = 0.0 # first feature is always of zero
scaler = Scaler()
X_scaled = scaler.fit(X).transform(X, copy=True)
assert_false(np.any(np.isnan(X_scaled)))
assert_array_almost_equal(X_scaled.mean(axis=0), 5 * [0.0])
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
# Check that X has not been copied
assert_true(X_scaled is not X)
# check inverse transform
X_scaled_back = scaler.inverse_transform(X_scaled)
assert_true(X_scaled_back is not X)
assert_true(X_scaled_back is not X_scaled)
assert_array_almost_equal(X_scaled_back, X)
X_scaled = scale(X, axis=1, with_std=False)
assert_false(np.any(np.isnan(X_scaled)))
assert_array_almost_equal(X_scaled.mean(axis=1), 4 * [0.0])
X_scaled = scale(X, axis=1, with_std=True)
assert_false(np.any(np.isnan(X_scaled)))
assert_array_almost_equal(X_scaled.mean(axis=1), 4 * [0.0])
assert_array_almost_equal(X_scaled.std(axis=1), 4 * [1.0])
# Check that the data hasn't been modified
assert_true(X_scaled is not X)
X_scaled = scaler.fit(X).transform(X, copy=False)
assert_false(np.any(np.isnan(X_scaled)))
assert_array_almost_equal(X_scaled.mean(axis=0), 5 * [0.0])
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
# Check that X has not been copied
assert_true(X_scaled is X)
X = rng.randn(4, 5)
X[:, 0] = 1.0 # first feature is a constant, non zero feature
scaler = Scaler()
X_scaled = scaler.fit(X).transform(X, copy=True)
assert_false(np.any(np.isnan(X_scaled)))
assert_array_almost_equal(X_scaled.mean(axis=0), 5 * [0.0])
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
# Check that X has not been copied
assert_true(X_scaled is not X)
开发者ID:AlexLerman,项目名称:scikit-learn,代码行数:47,代码来源:test_preprocessing.py
示例12: SVM_fit
def SVM_fit(X_in, y_in, X_out, gamma, C):
M = len(X_in[0]) #Number of features
seed(time())
#To prevent data snooping, breakes the input set into train. cross validation and test sets, with sizes proportional to 8-1-1
#First puts aside 10% of the data for the tests
test_indices, train_indices = split_indices(len(X_in), int(round(0.1*len(X_in))))
shuffle(X_in, y_in)
X_test = [X_in[i] for i in test_indices]
y_test = [y_in[i] for i in test_indices]
X_in = [X_in[i] for i in train_indices]
y_in = [y_in[i] for i in train_indices]
#scale data first
scaler = Scaler(copy=False) #in place modification
#Normalize the data and stores as inner parameters the mean and standard deviation
#To avoid data snooping, normalization is computed on training set only, and then reported on data
scaler.fit(X_test, y_test)
X_in = scaler.transform(X_in)
X_test = scaler.transform(X_test)
X_out = scaler.transform(X_out) #uses the same transformation (same mean_ and std_) fit before
std_test = X_test.std(axis=0)
f_indices = [j for j in range(M) if std_test[j] > 1e-7]
#Removes feature with null variance
X_in = [[X_in[i][j] for j in f_indices] for i in range(len(X_in))]
X_test = [[X_test[i][j] for j in f_indices] for i in range(len(X_test))]
X_out = [[X_out[i][j] for j in f_indices] for i in range(len(X_out))]
M = len(f_indices)
#Then, on the remaining data, performs a ten-fold cross validation over the number of features considered
svc = svm.SVC(kernel='rbf', C=C, gamma=gamma, verbose=False, cache_size=4092, tol=1e-5)
svc.fit(X_in, y_in)
y_out = svc.predict(X_out)
return y_out
开发者ID:atul2512,项目名称:Quora,代码行数:42,代码来源:answer_classifier.py
示例13: test_scaler_without_centering
def test_scaler_without_centering():
rng = np.random.RandomState(42)
X = rng.randn(4, 5)
X[:, 0] = 0.0 # first feature is always of zero
X_csr = sp.csr_matrix(X)
scaler = Scaler(with_mean=False).fit(X)
X_scaled = scaler.transform(X, copy=True)
assert_false(np.any(np.isnan(X_scaled)))
scaler_csr = Scaler(with_mean=False).fit(X_csr)
X_csr_scaled = scaler_csr.transform(X_csr, copy=True)
assert_false(np.any(np.isnan(X_csr_scaled.data)))
assert_equal(scaler.mean_, scaler_csr.mean_)
assert_array_almost_equal(scaler.std_, scaler_csr.std_)
assert_array_almost_equal(
X_scaled.mean(axis=0), [0., -0.01, 2.24, -0.35, -0.78], 2)
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis0(X_csr_scaled)
assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0))
# Check that X has not been modified (copy)
assert_true(X_scaled is not X)
assert_true(X_csr_scaled is not X_csr)
X_scaled_back = scaler.inverse_transform(X_scaled)
assert_true(X_scaled_back is not X)
assert_true(X_scaled_back is not X_scaled)
assert_array_almost_equal(X_scaled_back, X)
X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled)
assert_true(X_csr_scaled_back is not X_csr)
assert_true(X_csr_scaled_back is not X_csr_scaled)
assert_array_almost_equal(X_scaled_back, X)
开发者ID:AlexLerman,项目名称:scikit-learn,代码行数:38,代码来源:test_preprocessing.py
示例14: Logistic_train
def Logistic_train(X_in, y_in, X_out, cs, file_log=None):
if file_log:
file_log.writelines('# of Samples: {}, # of Features: {}\n'.format(len(X_in), len(X_in[0])))
M = len(X_in[0]) #Number of features
seed(time())
#To prevent data snooping, breakes the input set into train. cross validation and test sets, with sizes proportional to 8-1-1
#First puts aside 10% of the data for the tests
test_indices, train_indices = split_indices(len(X_in), int(round(0.1*len(X_in))))
X_scaler = [X_in[i] for i in test_indices]
y_scaler = [y_in[i] for i in test_indices]
X_in = [X_in[i] for i in train_indices]
y_in = [y_in[i] for i in train_indices]
#scale data first
scaler = Scaler(copy=False) #in place modification
#Normalize the data and stores as inner parameters the mean and standard deviation
#To avoid data snooping, normalization is computed on training set only, and then reported on data
scaler.fit(X_scaler, y_scaler)
X_scaler = scaler.transform(X_scaler)
X_in = scaler.transform(X_in)
X_out = scaler.transform(X_out) #uses the same transformation (same mean_ and std_) fit before
std_test = X_scaler.std(axis=0)
f_indices = [j for j in range(M) if std_test[j] > 1e-7]
#Removes feature with null variance
X_in = [[X_in[i][j] for j in f_indices] for i in range(len(X_in))]
X_scaler = [[X_scaler[i][j] for j in f_indices] for i in range(len(X_scaler))]
X_out = [[X_out[i][j] for j in f_indices] for i in range(len(X_out))]
M = len(X_in[0])
#Then, on the remaining data, performs a ten-fold cross validation over the number of features considered
best_cv_accuracy = 0.
best_c = 0.
for c in cs:
kfold = cross_validation.StratifiedKFold(y_in, k=10)
lrc = LogisticRegression(C=c, tol=1e-5)
in_accuracy = 0.
cv_accuracy = 0.
for t_indices, cv_indices in kfold:
X_train = array([X_in[i][:] for i in t_indices])
y_train = [y_in[i] for i in t_indices]
X_cv = array([X_in[i][:] for i in cv_indices])
y_cv = [y_in[i] for i in cv_indices]
lrc.fit(X_train, y_train)
in_accuracy += lrc.score(X_train, y_train)
cv_accuracy += lrc.score(X_cv, y_cv)
in_accuracy /= kfold.k
cv_accuracy /= kfold.k
if file_log:
file_log.writelines('C: {}\n'.format(c))
file_log.writelines('\tEin= {}\n'.format(1. - in_accuracy))
file_log.writelines('\tEcv= {}\n'.format(1. - cv_accuracy))
if (cv_accuracy > best_cv_accuracy):
best_c = c
best_cv_accuracy = cv_accuracy
#Now tests the out of sample error
if file_log:
file_log.writelines('\nBEST result: E_cv={}, C={}\n'.format(1. - best_cv_accuracy, best_c))
lrc = LogisticRegression(C=best_c, tol=1e-5)
lrc.fit(X_in, y_in)
if file_log:
file_log.writelines('Ein= {}\n'.format(1. - lrc.score(X_in, y_in)))
file_log.writelines('Etest= {}\n'.format(1. - lrc.score(X_scaler, y_scaler)))
y_out = lrc.predict(X_out)
return y_out
开发者ID:atul2512,项目名称:Quora,代码行数:85,代码来源:answer_classifier.py
示例15: SVM_train
def SVM_train(X_in, y_in, X_out, gammas, cs, file_log=None):
if file_log:
file_log.writelines('# of Samples: {}, # of Features: {}\n'.format(len(X_in), len(X_in[0])))
M = len(X_in[0]) #Number of features
seed(time())
#To prevent data snooping, breaks the input set into train. cross validation
#and scale sets, with sizes proportional to 8-1-1
#First puts aside 10% of the data for the tests
scale_set_indices, train_indices = split_indices(len(X_in), int(round(0.1*len(X_in))))
# shuffle(X_in, y_in)
X_scale = [X_in[i] for i in scale_set_indices]
y_scale = [y_in[i] for i in scale_set_indices]
X_in = [X_in[i] for i in train_indices]
y_in = [y_in[i] for i in train_indices]
#Scale data first
scaler = Scaler(copy=False) #WARNING: copy=False => in place modification
#Normalize the data and stores as inner parameters the mean and standard deviation
#To avoid data snooping, normalization is computed on a separate subsetonly, and then reported on data
scaler.fit(X_scale, y_scale)
X_scale = scaler.transform(X_scale)
X_in = scaler.transform(X_in)
X_out = scaler.transform(X_out) #uses the same transformation (same mean_ and std_) fit before
std_test = X_scale.std(axis=0)
f_indices = [j for j in range(M) if std_test[j] > 1e-7]
#Removes feature with null variance
X_in = [[X_in[i][j] for j in f_indices] for i in range(len(X_in))]
X_scale = [[X_scale[i][j] for j in f_indices] for i in range(len(X_scale))]
X_out = [[X_out[i][j] for j in f_indices] for i in range(len(X_out))]
if file_log:
file_log.writelines('Initial features :{}, Features used: {}\n'.format(M, len(X_in[0])))
M = len(f_indices)
best_cv_accuracy = 0.
best_gamma = 0.
best_c = 0.
#Then, on the remaining data, performs a ten-fold cross validation over the number of features considered
for c in cs:
for g in gammas:
#Balanced cross validation (keeps the ratio of the two classes as
#constant as possible across the k folds).
kfold = cross_validation.StratifiedKFold(y_in, k=10)
svc = svm.SVC(kernel='rbf', C=c, gamma=g, verbose=False, cache_size=4092, tol=1e-5)
in_accuracy = 0.
cv_accuracy = 0.
for t_indices, cv_indices in kfold:
X_train = array([X_in[i][:] for i in t_indices])
y_train = [y_in[i] for i in t_indices]
X_cv = array([X_in[i][:] for i in cv_indices])
y_cv = [y_in[i] for i in cv_indices]
svc.fit(X_train, y_train)
in_accuracy += svc.score(X_train, y_train)
cv_accuracy += svc.score(X_cv, y_cv)
in_accuracy /= kfold.k
cv_accuracy /= kfold.k
if file_log:
file_log.writelines('C:{}, gamma:{}\n'.format(c, g))
file_log.writelines('\tEin= {}\n'.format(1. - in_accuracy))
file_log.writelines('\tEcv= {}\n'.format(1. - cv_accuracy))
if (cv_accuracy > best_cv_accuracy):
best_gamma = g
best_c = c
best_cv_accuracy = cv_accuracy
if file_log:
file_log.writelines('\nBEST result: E_cv={}, C={}, gamma={}\n'.format(1. - best_cv_accuracy, best_c, best_gamma))
svc = svm.SVC(kernel='rbf', C=best_c, gamma=best_gamma, verbose=False, cache_size=4092, tol=1e-5)
svc.fit(X_in, y_in)
if file_log:
file_log.writelines('Ein= {}\n'.format(1. - svc.score(X_in, y_in)))
file_log.writelines('Etest= {}\n'.format(1. - svc.score(X_scale, y_scale)))
y_out = svc.predict(X_out)
#DEBUG: output = ['{} {:+}\n'.format(id_out[i], int(y_scale[i])) for i in range(len(X_out))]
#DEBUG: file_log.writelines('------------------------')
return y_out
开发者ID:atul2512,项目名称:Quora,代码行数:94,代码来源:answer_classifier.py
示例16: StratifiedKFold
if folding == "stratified":
cv = StratifiedKFold(y, k=n_folds)
elif folding == "kfolding":
cv = KFold(n=y.shape[0], k=n_folds)
elif folding == "leaveoneout":
n_folds[0] = y.shape[0]
cv = LeaveOneOut(n=y.shape[0])
else:
print("unknown crossvalidation method!")
# -- classifier
clf = svm.SVC(kernel="linear", probability=True, C=svm_C)
# -- normalizer
scaler = Scaler()
# -- feature selection
fs = SelectPercentile(f_classif, percentile=fs_n)
print("INITIALIZE RESULTS")
if compute_predict:
predict = np.zeros([n_splits, n_samples, n_dims, n_dims_tg]) ** np.nan
predictg = np.zeros([n_splits, n_samplesg, n_dimsg, n_dimsg_tg, n_folds]) ** np.nan
else:
predict = []
predictg = []
if compute_probas:
probas = np.zeros([n_splits, n_samples, n_dims, n_dims_tg, n_classes]) ** np.nan
probasg = np.zeros([n_splits, n_samplesg, n_dimsg, n_dimsg_tg, n_classes, n_folds]) ** np.nan
开发者ID:kingjr,项目名称:natmeg_arhus,代码行数:31,代码来源:skl_king.py
示例17: StratifiedKFold
if folding == 'stratified':
cv = StratifiedKFold(y, k=n_folds)
elif folding == 'kfolding':
cv = KFold(n=y.shape[0], k=n_folds)
elif folding == 'leaveoneout':
n_folds[0] = y.shape[0]
cv = LeaveOneOut(n=y.shape[0])
else:
print("unknown crossvalidation method!")
#-- classifier
clf = svm.SVC(kernel='linear', probability=True, C=svm_C)
#-- normalizer
scaler = Scaler()
#-- feature selection
fs = SelectPercentile(f_classif, percentile=fs_n)
#-- grid search
#parameters = {'svm__C': (1e-6,1e-3, 1e-1, .4)}
#clf = GridSearchCV(svm, parameters,n_jobs=1)
#-- initialize results
predict = np.zeros([n_splits, n_samples, n_dims]) ** np.nan
probas = np.zeros([n_splits, n_samples, n_dims, n_classes]) ** np.nan
predictg = np.zeros([n_splits, n_samplesg, n_dimsg, n_folds]) ** np.nan
probasg = np.zeros([n_splits, n_samplesg, n_dimsg, n_classes, n_folds]) ** np.nan
coef = np.empty([n_splits, n_folds, n_dims, n_classes * (n_classes - 1) / 2, n_features]) ** 0
all_folds = np.zeros([n_splits, n_folds, n_samples]) ** np.nan
y_shfl = np.copy(y)
开发者ID:SherazKhan,项目名称:natmeg_arhus,代码行数:31,代码来源:skl_svm.py
示例18: Scaler
k = 10
records = data[:,1:]
labels = data[:,0]
n_train = 35000
#n_val = n - n_train
n_val = 7000
trainset = records[:n_train,:]
trainlabels = labels[:n_train]
#valset = records[n_train:,:]
#vallabels = labels[n_train:,:]
valset = records[n_train:n_train+n_val,:]
vallabels = labels[n_train:n_train+n_val]
n,dim = trainset.shape
# mean centering, stdev normalization and whitening
scaler = Scaler()
scaler.fit(trainset)
trainset = scaler.transform(trainset)
valset = scaler.transform(valset)
pca = PCA(n_components=dim,whiten=True)
pca.fit(trainset)
trainset = pca.transform(trainset)
valset = pca.transform(valset)
config = Train_config()
config.iterations = 10
config.nonlinearity = 'tanh'
config.batchsize = 50
config.learning_rate = 0.2
config.momentum = 0.7
log = open('log.txt','w')
开发者ID:hendrik-p,项目名称:neural_net,代码行数:31,代码来源:net.py
示例19: main
def main():
X =[]
Y=[]
featuresDB = Base(os.getcwd()+"\\Databases\\features.db")
featuresDB.open()
print "features open"
for rec in featuresDB:
vec = []
vec.append(rec.f1)
vec.append(rec.f3)
vec.append(rec.f4)
vec.append(rec.f5)
vec.append(rec.f6)
vec.append(rec.f7)
vec.append(rec.f10)
vec.append(rec.f11)
vec.append(rec.f12)
vec.append(rec.f13)
vec.append(rec.f14)
vec.append(rec.f15)
vec.append(rec.f16)
vec.append(rec.f17)
vec.append(rec.f18)
vec.append(rec.f19)
vec.append(rec.f20)
vec.append(rec.f21)
vec.append(rec.f22)
vec.append(rec.f23)
X.append(vec)
Y.append(rec.score)
print "building classifier"
Y = np.array(Y)
ybar = Y.mean()
for i in range(len(Y)):
if Y[i]<ybar:
Y[i]=1
else:
Y[i]=2
scaler = Scaler().fit(X)
X = scaler.transform(X)
X= np.array(X)
Y=np.array(Y)
skf = cross_validation.StratifiedKFold(Y,k=2)
for train, test in skf:
X_train, X_test = X[train], X[test]
y_train, y_test = Y[train], Y[test]
clf = ExtraTreesClassifier(n_estimators=8,max_depth=None,min_split=1,random_state=0,compute_importances=True)
scores = cross_validation.cross_val_score(clf,X_train,y_train,cv=5)
clf.fit_transform(X_train,y_train)
print "Accuracy: %0.4f (+/- %0.2f)" % (scores.mean(), scores.std() / 2)
print clf.feature_importances_
y_pred =clf.predict(X_test)
print classification_report(y_test,y_pred)
model=(scaler,clf)
joblib.dump(model,'AestheticModel\\aestheticModel.pkl')
print "Done"
开发者ID:Perchik,项目名称:RoD2,代码行数:66,代码来源:AestheticModel.py
示例20: load_kernels
def load_kernels(
dataset, tr_norms=['std', 'sqrt', 'L2'], te_norms=['std', 'sqrt', 'L2'],
analytical_fim=False, pi_derivatives=False, sqrt_nr_descs=False,
only_train=False, verbose=0, do_plot=False, outfile=None):
tr_outfile = outfile % "train" if outfile is not None else outfile
# Load sufficient statistics.
samples, _ = dataset.get_data('train')
tr_data, tr_counts, tr_labels = load_video_data(
dataset, samples, outfile=tr_outfile, analytical_fim=analytical_fim,
pi_derivatives=pi_derivatives, sqrt_nr_descs=sqrt_nr_descs, verbose=verbose)
if verbose > 0:
print "Train data: %dx%d" % tr_data.shape
if do_plot:
plot_fisher_vector(tr_data[0], 'before')
scalers = []
for norm in tr_norms:
if norm == 'std':
scaler = Scaler()
tr_data = scaler.fit_transform(tr_data)
scalers.append(scaler)
elif norm == 'sqrt':
tr_data = power_normalize(tr_data, 0.5)
elif norm == 'sqrt_cnt':
tr_data = approximate_signed_sqrt(
tr_data, tr_counts, pi_derivatives=pi_derivatives)
elif norm == 'L2':
tr_data = L2_normalize(tr_data)
if do_plot:
plot_fisher_vector(tr_data[0], 'after_%s' % norm)
tr_kernel = np.dot(tr_data, tr_data.T)
if only_train:
return tr_kernel, tr_labels, scalers, tr_data
te_outfile = outfile % "test" if outfile is not None else outfile
# Load sufficient statistics.
samples, _ = dataset.get_data('test')
te_data, te_counts, te_labels = load_video_data(
dataset, samples, outfile=te_outfile, analytical_fim=analytical_fim,
pi_derivatives=pi_derivatives, sqrt_nr_descs=sqrt_nr_descs, verbose=verbose)
if verbose > 0:
print "Test data: %dx%d" % te_data.shape
ii = 0
for norm in te_norms:
if norm == 'std':
te_data = scalers[ii].transform(te_data)
ii += 1
elif norm == 'sqrt':
te_data = power_normalize(te_data, 0.5)
elif norm == 'sqrt_cnt':
te_data = approximate_signed_sqrt(
te_data, te_counts, pi_derivatives=pi_derivatives)
elif norm == 'L2':
te_data = L2_normalize(te_data)
te_kernel = np.dot(te_data, tr_data.T)
return tr_kernel, tr_labels, te_kernel, te_labels
开发者ID:danoneata,项目名称:approx_norm_fv,代码行数:67,代码来源:load_data.py
注:本文中的sklearn.preprocessing.Scaler类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论