本文整理汇总了Python中sklearn.preprocessing.RobustScaler类的典型用法代码示例。如果您正苦于以下问题:Python RobustScaler类的具体用法?Python RobustScaler怎么用?Python RobustScaler使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了RobustScaler类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: scale_feature_matrix
def scale_feature_matrix(feature_M, linear=False, outliers=False):
from sklearn.preprocessing import StandardScaler, RobustScaler
import numpy as np
binary_fields = [col for col in feature_M.columns if len(set(feature_M[col])) == 2]
if outliers:
#Scaling 0 median & unit variance
scaler_obj = RobustScaler()
print 'centering around median'
else:
#Scale 0 mean & unit variance
scaler_obj = StandardScaler()
print 'centering around mean'
print 'found these binaries'
print '-' * 10
print '\n'.join(binary_fields)
X_scaled = scaler_obj.fit_transform(feature_M.drop(binary_fields, axis=1))
X_scaled_w_cats = np.c_[X_scaled, feature_M[binary_fields].as_matrix()]
return X_scaled_w_cats, scaler_obj
开发者ID:asharma567,项目名称:cool_tools,代码行数:25,代码来源:utils_preprocessing.py
示例2: _robust_scaler
def _robust_scaler(self, input_df):
"""Uses Scikit-learn's RobustScaler to scale the features using statistics that are robust to outliers
Parameters
----------
input_df: pandas.DataFrame {n_samples, n_features+['class', 'group', 'guess']}
Input DataFrame to scale
Returns
-------
scaled_df: pandas.DataFrame {n_samples, n_features + ['guess', 'group', 'class']}
Returns a DataFrame containing the scaled features
"""
training_features = input_df.loc[input_df['group'] == 'training'].drop(['class', 'group', 'guess'], axis=1)
if len(training_features.columns.values) == 0:
return input_df.copy()
# The scaler must be fit on only the training data
scaler = RobustScaler()
scaler.fit(training_features.values.astype(np.float64))
scaled_features = scaler.transform(input_df.drop(['class', 'group', 'guess'], axis=1).values.astype(np.float64))
for col_num, column in enumerate(input_df.drop(['class', 'group', 'guess'], axis=1).columns.values):
input_df.loc[:, column] = scaled_features[:, col_num]
return input_df.copy()
开发者ID:vsolano,项目名称:tpot,代码行数:28,代码来源:tpot.py
示例3: processing
def processing(df):
dummies_df = pd.get_dummies(df["City Group"])
def add_CG(name):
return "CG_" + name
dummies_df = dummies_df.rename(columns=add_CG)
# print dummies_df.head()
df = pd.concat([df, dummies_df.iloc[:, 0]], axis=1)
dummies_df = pd.get_dummies(df["Type"])
def add_Type(name):
return "Type_" + name
dummies_df = dummies_df.rename(columns=add_Type)
df = pd.concat([df, dummies_df.iloc[:, 0:3]], axis=1)
# try to put in age as a column
def add_Age(string):
age = datetime.datetime.now() - datetime.datetime.strptime(string, "%m/%d/%Y")
return age.days
df["Age"] = df["Open Date"].map(add_Age)
df = df.drop(["Id", "Open Date", "City", "City Group", "Type", "revenue"], axis=1)
# scaler = StandardScaler().fit(df)
scaler = RobustScaler().fit(df)
df = scaler.transform(df)
# print df.head()
return df
开发者ID:dtamayo,项目名称:MachineLearning,代码行数:31,代码来源:svm.py
示例4: ica_analysis
def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
##
## ICA
##
ica = FastICA(n_components=X_train_scl.shape[1])
X_ica = ica.fit_transform(X_train_scl)
##
## Plots
##
ph = plot_helper()
kurt = kurtosis(X_ica)
print(kurt)
title = 'Kurtosis (FastICA) for ' + data_set_name
name = data_set_name.lower() + '_ica_kurt'
filename = './' + self.out_dir + '/' + name + '.png'
ph.plot_simple_bar(np.arange(1, len(kurt)+1, 1),
kurt,
np.arange(1, len(kurt)+1, 1).astype('str'),
'Feature Index',
'Kurtosis',
title,
filename)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:30,代码来源:part2.py
示例5: best_ica_wine
def best_ica_wine(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
ica = FastICA(n_components=X_train_scl.shape[1])
X_train_transformed = ica.fit_transform(X_train_scl, y_train)
X_test_transformed = ica.transform(X_test_scl)
## top 2
kurt = kurtosis(X_train_transformed)
i = kurt.argsort()[::-1]
X_train_transformed_sorted = X_train_transformed[:, i]
X_train_transformed = X_train_transformed_sorted[:,0:2]
kurt = kurtosis(X_test_transformed)
i = kurt.argsort()[::-1]
X_test_transformed_sorted = X_test_transformed[:, i]
X_test_transformed = X_test_transformed_sorted[:,0:2]
# save
filename = './' + self.save_dir + '/wine_ica_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_ica_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_ica_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_ica_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:35,代码来源:part2.py
示例6: best_rp_nba
def best_rp_nba(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_nba_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
X_train_transformed = rp.fit_transform(X_train_scl, y_train)
X_test_transformed = rp.transform(X_test_scl)
## top 2
kurt = kurtosis(X_train_transformed)
i = kurt.argsort()[::-1]
X_train_transformed_sorted = X_train_transformed[:, i]
X_train_transformed = X_train_transformed_sorted[:,0:2]
kurt = kurtosis(X_test_transformed)
i = kurt.argsort()[::-1]
X_test_transformed_sorted = X_test_transformed[:, i]
X_test_transformed = X_test_transformed_sorted[:,0:2]
# save
filename = './' + self.save_dir + '/nba_rp_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_rp_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_rp_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_rp_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:35,代码来源:part2.py
示例7: num_scaler
def num_scaler(d_num,t_num):
scl = RobustScaler()
scl.fit(d_num)
d_num = scl.transform(d_num)
t_num = scl.transform(t_num)
return d_num, t_num
开发者ID:pankaj077,项目名称:TI_work,代码行数:7,代码来源:AutoClassification.py
示例8: rp_analysis
def rp_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
ks = []
for i in range(1000):
##
## Random Projection
##
rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
rp.fit(X_train_scl)
X_train_rp = rp.transform(X_train_scl)
ks.append(kurtosis(X_train_rp))
mean_k = np.mean(ks, 0)
##
## Plots
##
ph = plot_helper()
title = 'Kurtosis (Randomized Projection) for ' + data_set_name
name = data_set_name.lower() + '_rp_kurt'
filename = './' + self.out_dir + '/' + name + '.png'
ph.plot_simple_bar(np.arange(1, len(mean_k)+1, 1),
mean_k,
np.arange(1, len(mean_k)+1, 1).astype('str'),
'Feature Index',
'Kurtosis',
title,
filename)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:33,代码来源:part2.py
示例9: nn_wine_orig
def nn_wine_orig(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
self.part4.nn_analysis(X_train_scl, X_test_scl, y_train, y_test, 'Wine', 'Neural Network Original')
开发者ID:rbaxter1,项目名称:CS7641,代码行数:9,代码来源:part5.py
示例10: standardize_columns
def standardize_columns(data):
"""
We decided to standardize the weather factor due to outliers.
"""
columns_to_standardize = ['temp', 'atemp', 'humidity', 'windspeed']
min_max_scaler = RobustScaler()
for column in columns_to_standardize:
data[column] = min_max_scaler.fit_transform(data[column])
return data
开发者ID:drawer87,项目名称:kaggle,代码行数:10,代码来源:training.py
示例11: lda_analysis
def lda_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
##
## Plots
##
ph = plot_helper()
scores = []
train_scores = []
rng = range(1, X_train_scl.shape[1]+1)
for i in rng:
lda = LinearDiscriminantAnalysis(n_components=i)
cv = KFold(X_train_scl.shape[0], 3, shuffle=True)
# cross validation
cv_scores = []
for (train, test) in cv:
lda.fit(X_train_scl[train], y_train[train])
score = lda.score(X_train_scl[test], y_train[test])
cv_scores.append(score)
mean_score = np.mean(cv_scores)
scores.append(mean_score)
# train score
lda = LinearDiscriminantAnalysis(n_components=i)
lda.fit(X_train_scl, y_train)
train_score = lda.score(X_train_scl, y_train)
train_scores.append(train_score)
print(i, mean_score)
##
## Score Plot
##
title = 'Score Summary Plot (LDA) for ' + data_set_name
name = data_set_name.lower() + '_lda_score'
filename = './' + self.out_dir + '/' + name + '.png'
ph.plot_series(rng,
[scores, train_scores],
[None, None],
['cross validation score', 'training score'],
cm.viridis(np.linspace(0, 1, 2)),
['o', '*'],
title,
'n_components',
'Score',
filename)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:52,代码来源:part2.py
示例12: demensionReduction
def demensionReduction(numFeatures,cateFeatures):
"""
:param numFeatures:
:param labels:
:return:
"""
scaler = RobustScaler()
scaledFeatures = scaler.fit_transform(numFeatures)
pca = PCA(n_components=5)
reducedFeatures = pca.fit_transform(scaledFeatures)
allFeatures = np.concatenate((reducedFeatures,cateFeatures),axis=1)
return allFeatures
开发者ID:WeihuaLei,项目名称:LearnSpark,代码行数:13,代码来源:credit.py
示例13: test_robustscaler_vs_sklearn
def test_robustscaler_vs_sklearn():
# Compare msmbuilder.preprocessing.RobustScaler
# with sklearn.preprocessing.RobustScaler
robustscalerr = RobustScalerR()
robustscalerr.fit(np.concatenate(trajs))
robustscaler = RobustScaler()
robustscaler.fit(trajs)
y_ref1 = robustscalerr.transform(trajs[0])
y1 = robustscaler.transform(trajs)[0]
np.testing.assert_array_almost_equal(y_ref1, y1)
开发者ID:Eigenstate,项目名称:msmbuilder,代码行数:14,代码来源:test_preprocessing.py
示例14: best_lda_cluster_wine
def best_lda_cluster_wine(self):
dh = data_helper()
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data_lda_best()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
##
## K-Means
##
km = KMeans(n_clusters=4, algorithm='full')
X_train_transformed = km.fit_transform(X_train_scl)
X_test_transformed = km.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/wine_kmeans_lda_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
##
## GMM
##
gmm = GaussianMixture(n_components=4, covariance_type='full')
X_train_transformed = km.fit_transform(X_train_scl)
X_test_transformed = km.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/wine_gmm_lda_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:48,代码来源:part3.py
示例15: transform_dataframe
def transform_dataframe(dataframe):
"""
Function to read dataframe and standardize the dataframe with
a mean 0 and unit variance on every column
Parameters:
dataframe : Input pandas dataframe
Input types: pd.Dataframe
Output types: pd.Dataframe
"""
cols = [col for col in dataframe.columns]
robust_scaler = RobustScaler()
df = robust_scaler.fit_transform(dataframe[cols])
dataframe.columns = df
return dataframe
开发者ID:gitter-badger,项目名称:USP-inhibition,代码行数:17,代码来源:utils.py
示例16: scale
def scale(self,columns,categorical_cols,apply_list,target_column):
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()
if apply_list:
numerical_cols = columns
else:
numerical_cols = []
for col in self.dataset.columns.values:
if col not in categorical_cols:
numerical_cols.append(col)
else:
pass
# We don't want to scale the target variable, as it is already binary.
# The target column uses the same value as target_value from Split Data section
# in the settings popup.
numerical_cols.remove(target_column)
# Scale, fit and transform all the numerical columns
scaled_data = scaler.fit_transform(self.dataset[numerical_cols])
self.dataset[numerical_cols] = scaled_data
return self.dataset
开发者ID:Dismeth,项目名称:gui,代码行数:21,代码来源:dataset.py
示例17: best_lda_nba
def best_lda_nba(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_nba_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
lda = LinearDiscriminantAnalysis(n_components=2)
X_train_transformed = lda.fit_transform(X_train_scl, y_train)
X_test_transformed = lda.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/nba_lda_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_lda_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_lda_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/nba_lda_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:24,代码来源:part2.py
示例18: detect_bad_channels
def detect_bad_channels(inst, pick_types=None, threshold=.2):
from sklearn.preprocessing import RobustScaler
from sklearn.covariance import EmpiricalCovariance
from jr.stats import median_abs_deviation
if pick_types is None:
pick_types = dict(meg='mag')
inst = inst.pick_types(copy=True, **pick_types)
cov = EmpiricalCovariance()
cov.fit(inst._data.T)
cov = cov.covariance_
# center
scaler = RobustScaler()
cov = scaler.fit_transform(cov).T
cov /= median_abs_deviation(cov)
cov -= np.median(cov)
# compute robust summary metrics
mu = np.median(cov, axis=0)
sigma = median_abs_deviation(cov, axis=0)
mu /= median_abs_deviation(mu)
sigma /= median_abs_deviation(sigma)
distance = np.sqrt(mu ** 2 + sigma ** 2)
bad = np.where(distance < threshold)[0]
bad = [inst.ch_names[ch] for ch in bad]
return bad
开发者ID:LauraGwilliams,项目名称:jr-tools,代码行数:24,代码来源:base.py
示例19: best_pca_wine
def best_pca_wine(self):
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
pca = PCA(n_components=3)
X_train_transformed = pca.fit_transform(X_train_scl, y_train)
X_test_transformed = pca.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/wine_pca_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_pca_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_pca_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_pca_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:24,代码来源:part2.py
示例20: __init__
def __init__(self, *args, scale=False, center=False, **kwargs):
"""
A machine learned model. Beyond :class:`revscoring.Model`, this
"Learned" models implement
:func:`~revscoring.scoring.models.Learned.fit` and
:func:`~revscoring.scoring.models.Learned.cross_validate`.
"""
super().__init__(*args, **kwargs)
self.trained = None
if scale or center:
self.scaler = RobustScaler(with_centering=center,
with_scaling=scale)
else:
self.scaler = None
self.params.update({
'scale': scale,
'center': center
})
开发者ID:wiki-ai,项目名称:revscoring,代码行数:19,代码来源:model.py
注:本文中的sklearn.preprocessing.RobustScaler类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论