本文整理汇总了Python中sklearn.preprocessing.data.StandardScaler类的典型用法代码示例。如果您正苦于以下问题:Python StandardScaler类的具体用法?Python StandardScaler怎么用?Python StandardScaler使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了StandardScaler类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_scaler_1d
def test_scaler_1d():
"""Test scaling of dataset along single axis"""
rng = np.random.RandomState(0)
X = rng.randn(5)
X_orig_copy = X.copy()
scaler = StandardScaler()
X_scaled = scaler.fit(X).transform(X, copy=False)
assert_array_almost_equal(X_scaled.mean(axis=0), 0.0)
assert_array_almost_equal(X_scaled.std(axis=0), 1.0)
# check inverse transform
X_scaled_back = scaler.inverse_transform(X_scaled)
assert_array_almost_equal(X_scaled_back, X_orig_copy)
# Test with 1D list
X = [0., 1., 2, 0.4, 1.]
scaler = StandardScaler()
X_scaled = scaler.fit(X).transform(X, copy=False)
assert_array_almost_equal(X_scaled.mean(axis=0), 0.0)
assert_array_almost_equal(X_scaled.std(axis=0), 1.0)
X_scaled = scale(X)
assert_array_almost_equal(X_scaled.mean(axis=0), 0.0)
assert_array_almost_equal(X_scaled.std(axis=0), 1.0)
X = np.ones(5)
assert_array_equal(scale(X, with_mean=False), X)
开发者ID:CodeGenerator,项目名称:scikit-learn,代码行数:28,代码来源:test_data.py
示例2: read_file
def read_file():
file_content = pd.read_csv('train.csv')
exc_cols = [u'Id', u'Response']
cols = [c for c in file_content.columns if c not in exc_cols]
train_datas = file_content.ix[:, cols]
train_lables = file_content['Response'].values
test_file = pd.read_csv('test.csv')
test_ids = test_file['Id'].values
test_datas = test_file.ix[:, [c for c in test_file.columns if c not in [u'Id']]]
# 填充平均值
test_datas = test_datas.fillna(-1)
train_datas = train_datas.fillna(-1)
all_datas = pd.concat([train_datas, test_datas], axis=0)
# 对数据进行一下划分
categoricalVariables = ["Product_Info_1", "Product_Info_2", "Product_Info_3", "Product_Info_5", "Product_Info_6", "Product_Info_7", "Employment_Info_2", "Employment_Info_3", "Employment_Info_5", "InsuredInfo_1", "InsuredInfo_2", "InsuredInfo_3", "InsuredInfo_4", "InsuredInfo_5", "InsuredInfo_6", "InsuredInfo_7", "Insurance_History_1", "Insurance_History_2", "Insurance_History_3", "Insurance_History_4", "Insurance_History_7", "Insurance_History_8", "Insurance_History_9", "Family_Hist_1", "Medical_History_2", "Medical_History_3", "Medical_History_4", "Medical_History_5", "Medical_History_6", "Medical_History_7", "Medical_History_8", "Medical_History_9", "Medical_History_10", "Medical_History_11", "Medical_History_12", "Medical_History_13", "Medical_History_14", "Medical_History_16", "Medical_History_17", "Medical_History_18", "Medical_History_19", "Medical_History_20", "Medical_History_21", "Medical_History_22", "Medical_History_23", "Medical_History_25", "Medical_History_26", "Medical_History_27", "Medical_History_28", "Medical_History_29", "Medical_History_30", "Medical_History_31", "Medical_History_33", "Medical_History_34", "Medical_History_35", "Medical_History_36", "Medical_History_37", "Medical_History_38", "Medical_History_39", "Medical_History_40", "Medical_History_41"]
all_file_data = all_datas.ix[:, [c for c in all_datas.columns if c not in categoricalVariables]]
all_file_cate = all_datas.ix[:, [c for c in categoricalVariables]]
# 归一化 对数值数据
scalar_this = StandardScaler()
scalar_this.fit_transform(all_file_data)
# 重新组合数据
train_datas = pd.concat([all_file_data[:train_datas.shape[0]], all_file_cate[:train_datas.shape[0]]], axis=1)
test_datas = pd.concat([all_file_data[file_content.shape[0]:], all_file_cate[file_content.shape[0]:]], axis=1)
# 向量化
train_datas = DictVectorizer().fit_transform(train_datas.to_dict(outtype='records')).toarray()
test_datas = DictVectorizer().fit_transform(test_datas.to_dict(outtype='records')).toarray()
return (train_datas, train_lables, test_ids, test_datas)
开发者ID:xuerenlv,项目名称:PaperWork,代码行数:34,代码来源:kaggle_homework.py
示例3: test_scalar
def test_scalar():
from sklearn.preprocessing.data import MinMaxScaler, StandardScaler
scalar = StandardScaler()
training = pd.read_csv(TRAIN_FEATURES_CSV, nrows=200000)
test = pd.read_csv(TEST_FEATURES_CSV)
# normalize the values
for column in TOTAL_TRAINING_FEATURE_COLUMNS:
training[column] = scalar.fit_transform(training[column])
test[column] = scalar.transform(test[column])
开发者ID:testing32,项目名称:bimbo,代码行数:11,代码来源:analyze_data.py
示例4: test_scale_sparse_with_mean_raise_exception
def test_scale_sparse_with_mean_raise_exception():
rng = np.random.RandomState(42)
X = rng.randn(4, 5)
X_csr = sparse.csr_matrix(X)
# check scaling and fit with direct calls on sparse data
assert_raises(ValueError, scale, X_csr, with_mean=True)
assert_raises(ValueError, StandardScaler(with_mean=True).fit, X_csr)
# check transform and inverse_transform after a fit on a dense array
scaler = StandardScaler(with_mean=True).fit(X)
assert_raises(ValueError, scaler.transform, X_csr)
X_transformed_csr = sparse.csr_matrix(scaler.transform(X))
assert_raises(ValueError, scaler.inverse_transform, X_transformed_csr)
开发者ID:CodeGenerator,项目名称:scikit-learn,代码行数:15,代码来源:test_data.py
示例5: test_center_kernel
def test_center_kernel():
"""Test that KernelCenterer is equivalent to StandardScaler
in feature space"""
rng = np.random.RandomState(0)
X_fit = rng.random_sample((5, 4))
scaler = StandardScaler(with_std=False)
scaler.fit(X_fit)
X_fit_centered = scaler.transform(X_fit)
K_fit = np.dot(X_fit, X_fit.T)
# center fit time matrix
centerer = KernelCenterer()
K_fit_centered = np.dot(X_fit_centered, X_fit_centered.T)
K_fit_centered2 = centerer.fit_transform(K_fit)
assert_array_almost_equal(K_fit_centered, K_fit_centered2)
# center predict time matrix
X_pred = rng.random_sample((2, 4))
K_pred = np.dot(X_pred, X_fit.T)
X_pred_centered = scaler.transform(X_pred)
K_pred_centered = np.dot(X_pred_centered, X_fit_centered.T)
K_pred_centered2 = centerer.transform(K_pred)
assert_array_almost_equal(K_pred_centered, K_pred_centered2)
开发者ID:CodeGenerator,项目名称:scikit-learn,代码行数:23,代码来源:test_data.py
示例6: train
def train(self, dataset_filename,
scale=True,
feature_selector=None,
feature_selection_params={},
feature_selection_threshold=.25,
learning_params={},
optimize=True,
optimization_params={},
scorers=['f1_score'],
attribute_set=None,
class_name=None,
metaresults_prefix="./0-",
**kwargs):
plot_filename = "{}{}".format(metaresults_prefix, "featureselection.pdf")
data, labels = dataset_to_instances(dataset_filename, attribute_set, class_name, **kwargs)
learner = self.learner
#the class must remember the attribute_set and the class_name in order to reproduce the vectors
self.attribute_set = attribute_set
self.class_name = class_name
#scale data to the mean
if scale:
log.info("Scaling datasets...")
log.debug("Data shape before scaling: {}".format(data.shape))
self.scaler = StandardScaler()
data = self.scaler.fit_transform(data)
log.debug("Data shape after scaling: {}".format(data.shape))
log.debug("Mean: {} , Std: {}".format(self.scaler.mean_, self.scaler.std_))
#avoid any NaNs and Infs that may have occurred due to the scaling
data = np.nan_to_num(data)
#feature selection
if isinstance(feature_selection_params, basestring):
feature_selection_params = eval(feature_selection_params)
self.featureselector, data, metadata = self.run_feature_selection(data, labels, feature_selector, feature_selection_params, feature_selection_threshold, plot_filename)
#initialize learning method and scoring functions and optimize
self.learner, self.scorers = self.initialize_learning_method(learner, data, labels, learning_params, optimize, optimization_params, scorers)
log.info("Data shape before fitting: {}".format(data.shape))
self.learner.fit(data, labels)
self.fit = True
return metadata
开发者ID:lefterav,项目名称:qualitative,代码行数:48,代码来源:ranking.py
示例7: test_scaler_2d_arrays
def test_scaler_2d_arrays():
"""Test scaling of 2d array along first axis"""
rng = np.random.RandomState(0)
X = rng.randn(4, 5)
X[:, 0] = 0.0 # first feature is always of zero
scaler = StandardScaler()
X_scaled = scaler.fit(X).transform(X, copy=True)
assert_false(np.any(np.isnan(X_scaled)))
assert_array_almost_equal(X_scaled.mean(axis=0), 5 * [0.0])
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
# Check that X has been copied
assert_true(X_scaled is not X)
# check inverse transform
X_scaled_back = scaler.inverse_transform(X_scaled)
assert_true(X_scaled_back is not X)
assert_true(X_scaled_back is not X_scaled)
assert_array_almost_equal(X_scaled_back, X)
X_scaled = scale(X, axis=1, with_std=False)
assert_false(np.any(np.isnan(X_scaled)))
assert_array_almost_equal(X_scaled.mean(axis=1), 4 * [0.0])
X_scaled = scale(X, axis=1, with_std=True)
assert_false(np.any(np.isnan(X_scaled)))
assert_array_almost_equal(X_scaled.mean(axis=1), 4 * [0.0])
assert_array_almost_equal(X_scaled.std(axis=1), 4 * [1.0])
# Check that the data hasn't been modified
assert_true(X_scaled is not X)
X_scaled = scaler.fit(X).transform(X, copy=False)
assert_false(np.any(np.isnan(X_scaled)))
assert_array_almost_equal(X_scaled.mean(axis=0), 5 * [0.0])
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
# Check that X has not been copied
assert_true(X_scaled is X)
X = rng.randn(4, 5)
X[:, 0] = 1.0 # first feature is a constant, non zero feature
scaler = StandardScaler()
X_scaled = scaler.fit(X).transform(X, copy=True)
assert_false(np.any(np.isnan(X_scaled)))
assert_array_almost_equal(X_scaled.mean(axis=0), 5 * [0.0])
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
# Check that X has not been copied
assert_true(X_scaled is not X)
开发者ID:CodeGenerator,项目名称:scikit-learn,代码行数:47,代码来源:test_data.py
示例8: test_scaler_int
def test_scaler_int():
# test that scaler converts integer input to floating
# for both sparse and dense matrices
rng = np.random.RandomState(42)
X = rng.randint(20, size=(4, 5))
X[:, 0] = 0 # first feature is always of zero
X_csr = sparse.csr_matrix(X)
X_csc = sparse.csc_matrix(X)
null_transform = StandardScaler(with_mean=False, with_std=False, copy=True)
with warnings.catch_warnings(record=True):
X_null = null_transform.fit_transform(X_csr)
assert_array_equal(X_null.data, X_csr.data)
X_orig = null_transform.inverse_transform(X_null)
assert_array_equal(X_orig.data, X_csr.data)
with warnings.catch_warnings(record=True):
scaler = StandardScaler(with_mean=False).fit(X)
X_scaled = scaler.transform(X, copy=True)
assert_false(np.any(np.isnan(X_scaled)))
with warnings.catch_warnings(record=True):
scaler_csr = StandardScaler(with_mean=False).fit(X_csr)
X_csr_scaled = scaler_csr.transform(X_csr, copy=True)
assert_false(np.any(np.isnan(X_csr_scaled.data)))
with warnings.catch_warnings(record=True):
scaler_csc = StandardScaler(with_mean=False).fit(X_csc)
X_csc_scaled = scaler_csr.transform(X_csc, copy=True)
assert_false(np.any(np.isnan(X_csc_scaled.data)))
assert_equal(scaler.mean_, scaler_csr.mean_)
assert_array_almost_equal(scaler.std_, scaler_csr.std_)
assert_equal(scaler.mean_, scaler_csc.mean_)
assert_array_almost_equal(scaler.std_, scaler_csc.std_)
assert_array_almost_equal(
X_scaled.mean(axis=0),
[0., 1.109, 1.856, 21., 1.559], 2)
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis0(
X_csr_scaled.astype(np.float))
assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0))
# Check that X has not been modified (copy)
assert_true(X_scaled is not X)
assert_true(X_csr_scaled is not X_csr)
X_scaled_back = scaler.inverse_transform(X_scaled)
assert_true(X_scaled_back is not X)
assert_true(X_scaled_back is not X_scaled)
assert_array_almost_equal(X_scaled_back, X)
X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled)
assert_true(X_csr_scaled_back is not X_csr)
assert_true(X_csr_scaled_back is not X_csr_scaled)
assert_array_almost_equal(X_csr_scaled_back.toarray(), X)
X_csc_scaled_back = scaler_csr.inverse_transform(X_csc_scaled.tocsc())
assert_true(X_csc_scaled_back is not X_csc)
assert_true(X_csc_scaled_back is not X_csc_scaled)
assert_array_almost_equal(X_csc_scaled_back.toarray(), X)
开发者ID:CodeGenerator,项目名称:scikit-learn,代码行数:65,代码来源:test_data.py
示例9: test_scaler_without_centering
def test_scaler_without_centering():
rng = np.random.RandomState(42)
X = rng.randn(4, 5)
X[:, 0] = 0.0 # first feature is always of zero
X_csr = sparse.csr_matrix(X)
X_csc = sparse.csc_matrix(X)
assert_raises(ValueError, StandardScaler().fit, X_csr)
null_transform = StandardScaler(with_mean=False, with_std=False, copy=True)
X_null = null_transform.fit_transform(X_csr)
assert_array_equal(X_null.data, X_csr.data)
X_orig = null_transform.inverse_transform(X_null)
assert_array_equal(X_orig.data, X_csr.data)
scaler = StandardScaler(with_mean=False).fit(X)
X_scaled = scaler.transform(X, copy=True)
assert_false(np.any(np.isnan(X_scaled)))
scaler_csr = StandardScaler(with_mean=False).fit(X_csr)
X_csr_scaled = scaler_csr.transform(X_csr, copy=True)
assert_false(np.any(np.isnan(X_csr_scaled.data)))
scaler_csc = StandardScaler(with_mean=False).fit(X_csc)
X_csc_scaled = scaler_csr.transform(X_csc, copy=True)
assert_false(np.any(np.isnan(X_csc_scaled.data)))
assert_equal(scaler.mean_, scaler_csr.mean_)
assert_array_almost_equal(scaler.std_, scaler_csr.std_)
assert_equal(scaler.mean_, scaler_csc.mean_)
assert_array_almost_equal(scaler.std_, scaler_csc.std_)
assert_array_almost_equal(
X_scaled.mean(axis=0), [0., -0.01, 2.24, -0.35, -0.78], 2)
assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis0(X_csr_scaled)
assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0))
# Check that X has not been modified (copy)
assert_true(X_scaled is not X)
assert_true(X_csr_scaled is not X_csr)
X_scaled_back = scaler.inverse_transform(X_scaled)
assert_true(X_scaled_back is not X)
assert_true(X_scaled_back is not X_scaled)
assert_array_almost_equal(X_scaled_back, X)
X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled)
assert_true(X_csr_scaled_back is not X_csr)
assert_true(X_csr_scaled_back is not X_csr_scaled)
assert_array_almost_equal(X_csr_scaled_back.toarray(), X)
X_csc_scaled_back = scaler_csr.inverse_transform(X_csc_scaled.tocsc())
assert_true(X_csc_scaled_back is not X_csc)
assert_true(X_csc_scaled_back is not X_csc_scaled)
assert_array_almost_equal(X_csc_scaled_back.toarray(), X)
开发者ID:CodeGenerator,项目名称:scikit-learn,代码行数:59,代码来源:test_data.py
示例10: SkRanker
class SkRanker(Ranker, SkLearner):
'''
Basic ranker wrapping scikit-learn functions
'''
def train(self, dataset_filename,
scale=True,
feature_selector=None,
feature_selection_params={},
feature_selection_threshold=.25,
learning_params={},
optimize=True,
optimization_params={},
scorers=['f1_score'],
attribute_set=None,
class_name=None,
metaresults_prefix="./0-",
**kwargs):
plot_filename = "{}{}".format(metaresults_prefix, "featureselection.pdf")
data, labels = dataset_to_instances(dataset_filename, attribute_set, class_name, **kwargs)
learner = self.learner
#the class must remember the attribute_set and the class_name in order to reproduce the vectors
self.attribute_set = attribute_set
self.class_name = class_name
#scale data to the mean
if scale:
log.info("Scaling datasets...")
log.debug("Data shape before scaling: {}".format(data.shape))
self.scaler = StandardScaler()
data = self.scaler.fit_transform(data)
log.debug("Data shape after scaling: {}".format(data.shape))
log.debug("Mean: {} , Std: {}".format(self.scaler.mean_, self.scaler.std_))
#avoid any NaNs and Infs that may have occurred due to the scaling
data = np.nan_to_num(data)
#feature selection
if isinstance(feature_selection_params, basestring):
feature_selection_params = eval(feature_selection_params)
self.featureselector, data, metadata = self.run_feature_selection(data, labels, feature_selector, feature_selection_params, feature_selection_threshold, plot_filename)
#initialize learning method and scoring functions and optimize
self.learner, self.scorers = self.initialize_learning_method(learner, data, labels, learning_params, optimize, optimization_params, scorers)
log.info("Data shape before fitting: {}".format(data.shape))
self.learner.fit(data, labels)
self.fit = True
return metadata
def get_model_description(self):
params = {}
if self.scaler:
params = self.scaler.get_params(deep=True)
try: #these are for SVC
if self.learner.kernel == "rbf":
params["gamma"] = self.learner.gamma
params["C"] = self.learner.C
for i, n_support in enumerate(self.learner.n_support_):
params["n_{}".format(i)] = n_support
log.debug(len(self.learner.dual_coef_))
return params
elif self.learner.kernel == "linear":
coefficients = self.learner.coef_
att_coefficients = {}
for attname, coeff in zip(self.attribute_set.get_names_pairwise(), coefficients[0]):
att_coefficients[attname] = coeff
return att_coefficients
except AttributeError:
pass
try: #adaboost etc
params = self.learner.get_params()
numeric_params = OrderedDict()
for key, value in params.iteritems():
try:
value = float(value)
except ValueError:
continue
numeric_params[key] = value
return numeric_params
except:
pass
return {}
def get_ranked_sentence(self, parallelsentence, critical_attribute="rank_predicted",
new_rank_name="rank_hard",
del_orig_class_att=False,
bidirectional_pairs=False,
ties=True,
reconstruct='hard'):
"""
"""
if type(self.learner) == str:
if self.classifier:
#.........这里部分代码省略.........
开发者ID:lefterav,项目名称:qualitative,代码行数:101,代码来源:ranking.py
注:本文中的sklearn.preprocessing.data.StandardScaler类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论