• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python preprocessing.RobustScaler类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中sklearn.preprocessing.RobustScaler的典型用法代码示例。如果您正苦于以下问题:Python RobustScaler类的具体用法?Python RobustScaler怎么用?Python RobustScaler使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了RobustScaler类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: scale_feature_matrix

def scale_feature_matrix(feature_M, linear=False, outliers=False):
    from sklearn.preprocessing import StandardScaler, RobustScaler
    import numpy as np
    
    binary_fields = [col for col in feature_M.columns if len(set(feature_M[col])) == 2]
            
    if outliers:
        #Scaling 0 median & unit variance
        scaler_obj = RobustScaler()
        print 'centering around median'

    else:
        #Scale 0 mean & unit variance
        scaler_obj = StandardScaler()
        print 'centering around mean'
    
    print 'found these binaries'
    print '-' * 10
    print '\n'.join(binary_fields)

        
    X_scaled = scaler_obj.fit_transform(feature_M.drop(binary_fields, axis=1))
    X_scaled_w_cats = np.c_[X_scaled, feature_M[binary_fields].as_matrix()]
    
    return X_scaled_w_cats, scaler_obj
开发者ID:asharma567,项目名称:cool_tools,代码行数:25,代码来源:utils_preprocessing.py


示例2: _robust_scaler

    def _robust_scaler(self, input_df):
        """Uses Scikit-learn's RobustScaler to scale the features using statistics that are robust to outliers

        Parameters
        ----------
        input_df: pandas.DataFrame {n_samples, n_features+['class', 'group', 'guess']}
            Input DataFrame to scale

        Returns
        -------
        scaled_df: pandas.DataFrame {n_samples, n_features + ['guess', 'group', 'class']}
            Returns a DataFrame containing the scaled features

        """
        training_features = input_df.loc[input_df['group'] == 'training'].drop(['class', 'group', 'guess'], axis=1)

        if len(training_features.columns.values) == 0:
            return input_df.copy()

        # The scaler must be fit on only the training data
        scaler = RobustScaler()
        scaler.fit(training_features.values.astype(np.float64))
        scaled_features = scaler.transform(input_df.drop(['class', 'group', 'guess'], axis=1).values.astype(np.float64))

        for col_num, column in enumerate(input_df.drop(['class', 'group', 'guess'], axis=1).columns.values):
            input_df.loc[:, column] = scaled_features[:, col_num]

        return input_df.copy()
开发者ID:vsolano,项目名称:tpot,代码行数:28,代码来源:tpot.py


示例3: processing

def processing(df):
    dummies_df = pd.get_dummies(df["City Group"])

    def add_CG(name):
        return "CG_" + name

    dummies_df = dummies_df.rename(columns=add_CG)
    # print dummies_df.head()
    df = pd.concat([df, dummies_df.iloc[:, 0]], axis=1)

    dummies_df = pd.get_dummies(df["Type"])

    def add_Type(name):
        return "Type_" + name

    dummies_df = dummies_df.rename(columns=add_Type)
    df = pd.concat([df, dummies_df.iloc[:, 0:3]], axis=1)

    # try to put in age as a column
    def add_Age(string):
        age = datetime.datetime.now() - datetime.datetime.strptime(string, "%m/%d/%Y")
        return age.days

    df["Age"] = df["Open Date"].map(add_Age)
    df = df.drop(["Id", "Open Date", "City", "City Group", "Type", "revenue"], axis=1)
    # scaler = StandardScaler().fit(df)
    scaler = RobustScaler().fit(df)
    df = scaler.transform(df)

    # print df.head()
    return df
开发者ID:dtamayo,项目名称:MachineLearning,代码行数:31,代码来源:svm.py


示例4: ica_analysis

    def ica_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
        scl = RobustScaler()
        X_train_scl = scl.fit_transform(X_train)
        X_test_scl = scl.transform(X_test)
        
        ##
        ## ICA
        ##
        ica = FastICA(n_components=X_train_scl.shape[1])
        X_ica = ica.fit_transform(X_train_scl)
        
        ##
        ## Plots
        ##
        ph = plot_helper()

        kurt = kurtosis(X_ica)
        print(kurt)
        
        title = 'Kurtosis (FastICA) for ' + data_set_name
        name = data_set_name.lower() + '_ica_kurt'
        filename = './' + self.out_dir + '/' + name + '.png'
        
        ph.plot_simple_bar(np.arange(1, len(kurt)+1, 1),
                           kurt,
                           np.arange(1, len(kurt)+1, 1).astype('str'),
                           'Feature Index',
                           'Kurtosis',
                           title,
                           filename)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:30,代码来源:part2.py


示例5: best_ica_wine

 def best_ica_wine(self):
     dh = data_helper()
     X_train, X_test, y_train, y_test = dh.get_wine_data()
     
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     ica = FastICA(n_components=X_train_scl.shape[1])
     X_train_transformed = ica.fit_transform(X_train_scl, y_train)
     X_test_transformed = ica.transform(X_test_scl)
     
     ## top 2
     kurt = kurtosis(X_train_transformed)
     i = kurt.argsort()[::-1]
     X_train_transformed_sorted = X_train_transformed[:, i]
     X_train_transformed = X_train_transformed_sorted[:,0:2]
     
     kurt = kurtosis(X_test_transformed)
     i = kurt.argsort()[::-1]
     X_test_transformed_sorted = X_test_transformed[:, i]
     X_test_transformed = X_test_transformed_sorted[:,0:2]
     
     # save
     filename = './' + self.save_dir + '/wine_ica_x_train.txt'
     pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_ica_x_test.txt'
     pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_ica_y_train.txt'
     pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_ica_y_test.txt'
     pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:35,代码来源:part2.py


示例6: best_rp_nba

 def best_rp_nba(self):
     dh = data_helper()
     X_train, X_test, y_train, y_test = dh.get_nba_data()
     
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
     X_train_transformed = rp.fit_transform(X_train_scl, y_train)
     X_test_transformed = rp.transform(X_test_scl)
     
     ## top 2
     kurt = kurtosis(X_train_transformed)
     i = kurt.argsort()[::-1]
     X_train_transformed_sorted = X_train_transformed[:, i]
     X_train_transformed = X_train_transformed_sorted[:,0:2]
     
     kurt = kurtosis(X_test_transformed)
     i = kurt.argsort()[::-1]
     X_test_transformed_sorted = X_test_transformed[:, i]
     X_test_transformed = X_test_transformed_sorted[:,0:2]
     
     # save
     filename = './' + self.save_dir + '/nba_rp_x_train.txt'
     pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_rp_x_test.txt'
     pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_rp_y_train.txt'
     pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_rp_y_test.txt'
     pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:35,代码来源:part2.py


示例7: num_scaler

def num_scaler(d_num,t_num):
    scl = RobustScaler()
    scl.fit(d_num)
    d_num = scl.transform(d_num)
    t_num = scl.transform(t_num)
    
    return d_num, t_num
开发者ID:pankaj077,项目名称:TI_work,代码行数:7,代码来源:AutoClassification.py


示例8: rp_analysis

 def rp_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     
     ks = []
     for i in range(1000):
         ##
         ## Random Projection
         ##
         rp = GaussianRandomProjection(n_components=X_train_scl.shape[1])
         rp.fit(X_train_scl)
         X_train_rp = rp.transform(X_train_scl)
         
         ks.append(kurtosis(X_train_rp))
         
     mean_k = np.mean(ks, 0)
         
     ##
     ## Plots
     ##
     ph = plot_helper()
     
     title = 'Kurtosis (Randomized Projection) for ' + data_set_name
     name = data_set_name.lower() + '_rp_kurt'
     filename = './' + self.out_dir + '/' + name + '.png'
     
     ph.plot_simple_bar(np.arange(1, len(mean_k)+1, 1),
                        mean_k,
                        np.arange(1, len(mean_k)+1, 1).astype('str'),
                        'Feature Index',
                        'Kurtosis',
                        title,
                        filename)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:33,代码来源:part2.py


示例9: nn_wine_orig

 def nn_wine_orig(self):
     dh = data_helper()
     X_train, X_test, y_train, y_test = dh.get_wine_data()
     
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     self.part4.nn_analysis(X_train_scl, X_test_scl, y_train, y_test, 'Wine', 'Neural Network Original')
开发者ID:rbaxter1,项目名称:CS7641,代码行数:9,代码来源:part5.py


示例10: standardize_columns

def standardize_columns(data):
    """
    We decided to standardize the weather factor due to outliers.
    """
    columns_to_standardize = ['temp', 'atemp', 'humidity', 'windspeed']
    min_max_scaler = RobustScaler()

    for column in columns_to_standardize:
        data[column] = min_max_scaler.fit_transform(data[column])
    return data
开发者ID:drawer87,项目名称:kaggle,代码行数:10,代码来源:training.py


示例11: lda_analysis

 def lda_analysis(self, X_train, X_test, y_train, y_test, data_set_name):
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     ##
     ## Plots
     ##
     ph = plot_helper()
     
     scores = []
     train_scores = []
     rng = range(1, X_train_scl.shape[1]+1)
     for i in rng:
         lda = LinearDiscriminantAnalysis(n_components=i)
         cv = KFold(X_train_scl.shape[0], 3, shuffle=True)
         
         # cross validation
         cv_scores = []
         for (train, test) in cv:
             lda.fit(X_train_scl[train], y_train[train])
             score = lda.score(X_train_scl[test], y_train[test])
             cv_scores.append(score)
         
         mean_score = np.mean(cv_scores)
         scores.append(mean_score)
         
         # train score
         lda = LinearDiscriminantAnalysis(n_components=i)
         lda.fit(X_train_scl, y_train)
         train_score = lda.score(X_train_scl, y_train)
         train_scores.append(train_score)
         
         print(i, mean_score)
         
     ##
     ## Score Plot
     ##
     title = 'Score Summary Plot (LDA) for ' + data_set_name
     name = data_set_name.lower() + '_lda_score'
     filename = './' + self.out_dir + '/' + name + '.png'
                 
     ph.plot_series(rng,
                    [scores, train_scores],
                    [None, None],
                    ['cross validation score', 'training score'],
                    cm.viridis(np.linspace(0, 1, 2)),
                    ['o', '*'],
                    title,
                    'n_components',
                    'Score',
                    filename)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:52,代码来源:part2.py


示例12: demensionReduction

def demensionReduction(numFeatures,cateFeatures):
    """

    :param numFeatures:
    :param labels:
    :return:
    """
    scaler = RobustScaler()
    scaledFeatures = scaler.fit_transform(numFeatures)
    pca = PCA(n_components=5)
    reducedFeatures = pca.fit_transform(scaledFeatures)
    allFeatures = np.concatenate((reducedFeatures,cateFeatures),axis=1)
    return allFeatures
开发者ID:WeihuaLei,项目名称:LearnSpark,代码行数:13,代码来源:credit.py


示例13: test_robustscaler_vs_sklearn

def test_robustscaler_vs_sklearn():
    # Compare msmbuilder.preprocessing.RobustScaler
    # with sklearn.preprocessing.RobustScaler

    robustscalerr = RobustScalerR()
    robustscalerr.fit(np.concatenate(trajs))

    robustscaler = RobustScaler()
    robustscaler.fit(trajs)

    y_ref1 = robustscalerr.transform(trajs[0])
    y1 = robustscaler.transform(trajs)[0]

    np.testing.assert_array_almost_equal(y_ref1, y1)
开发者ID:Eigenstate,项目名称:msmbuilder,代码行数:14,代码来源:test_preprocessing.py


示例14: best_lda_cluster_wine

 def best_lda_cluster_wine(self):
     dh = data_helper()
     dh = data_helper()
     X_train, X_test, y_train, y_test = dh.get_wine_data_lda_best()
     
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     ##
     ## K-Means
     ##
     km = KMeans(n_clusters=4, algorithm='full')
     X_train_transformed = km.fit_transform(X_train_scl)
     X_test_transformed = km.transform(X_test_scl)
     
     # save
     filename = './' + self.save_dir + '/wine_kmeans_lda_x_train.txt'
     pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_kmeans_lda_x_test.txt'
     pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_kmeans_lda_y_train.txt'
     pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_kmeans_lda_y_test.txt'
     pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
     
     ##
     ## GMM
     ##
     gmm = GaussianMixture(n_components=4, covariance_type='full')
     X_train_transformed = km.fit_transform(X_train_scl)
     X_test_transformed = km.transform(X_test_scl)
     
     # save
     filename = './' + self.save_dir + '/wine_gmm_lda_x_train.txt'
     pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_gmm_lda_x_test.txt'
     pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_gmm_lda_y_train.txt'
     pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_gmm_lda_y_test.txt'
     pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:48,代码来源:part3.py


示例15: transform_dataframe

def transform_dataframe(dataframe):

    """
    Function to read dataframe and standardize the dataframe with
    a mean 0 and unit variance on every column

    Parameters:
        dataframe : Input pandas dataframe
    Input types: pd.Dataframe
    Output types: pd.Dataframe

    """
    cols = [col for col in dataframe.columns]
    robust_scaler = RobustScaler()
    df = robust_scaler.fit_transform(dataframe[cols])
    dataframe.columns = df
    return dataframe
开发者ID:gitter-badger,项目名称:USP-inhibition,代码行数:17,代码来源:utils.py


示例16: scale

    def scale(self,columns,categorical_cols,apply_list,target_column):
        from sklearn.preprocessing import RobustScaler
        scaler = RobustScaler()

        if apply_list:
            numerical_cols = columns
        else:
            numerical_cols = []
            for col in self.dataset.columns.values:
                if col not in categorical_cols:
                    numerical_cols.append(col)
                else:
                    pass
        # We don't want to scale the target variable, as it is already binary.
        # The target column uses the same value as target_value from Split Data section
        # in the settings popup.
        numerical_cols.remove(target_column)
        # Scale, fit and transform all the numerical columns
        scaled_data = scaler.fit_transform(self.dataset[numerical_cols])
        self.dataset[numerical_cols] = scaled_data
        return self.dataset
开发者ID:Dismeth,项目名称:gui,代码行数:21,代码来源:dataset.py


示例17: best_lda_nba

 def best_lda_nba(self):
     dh = data_helper()
     X_train, X_test, y_train, y_test = dh.get_nba_data()
     
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     lda = LinearDiscriminantAnalysis(n_components=2)
     X_train_transformed = lda.fit_transform(X_train_scl, y_train)
     X_test_transformed = lda.transform(X_test_scl)
     
     # save
     filename = './' + self.save_dir + '/nba_lda_x_train.txt'
     pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_lda_x_test.txt'
     pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_lda_y_train.txt'
     pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/nba_lda_y_test.txt'
     pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:24,代码来源:part2.py


示例18: detect_bad_channels

def detect_bad_channels(inst, pick_types=None, threshold=.2):
    from sklearn.preprocessing import RobustScaler
    from sklearn.covariance import EmpiricalCovariance
    from jr.stats import median_abs_deviation
    if pick_types is None:
        pick_types = dict(meg='mag')
    inst = inst.pick_types(copy=True, **pick_types)
    cov = EmpiricalCovariance()
    cov.fit(inst._data.T)
    cov = cov.covariance_
    # center
    scaler = RobustScaler()
    cov = scaler.fit_transform(cov).T
    cov /= median_abs_deviation(cov)
    cov -= np.median(cov)
    # compute robust summary metrics
    mu = np.median(cov, axis=0)
    sigma = median_abs_deviation(cov, axis=0)
    mu /= median_abs_deviation(mu)
    sigma /= median_abs_deviation(sigma)
    distance = np.sqrt(mu ** 2 + sigma ** 2)
    bad = np.where(distance < threshold)[0]
    bad = [inst.ch_names[ch] for ch in bad]
    return bad
开发者ID:LauraGwilliams,项目名称:jr-tools,代码行数:24,代码来源:base.py


示例19: best_pca_wine

 def best_pca_wine(self):
     dh = data_helper()
     X_train, X_test, y_train, y_test = dh.get_wine_data()
     
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     pca = PCA(n_components=3)
     X_train_transformed = pca.fit_transform(X_train_scl, y_train)
     X_test_transformed = pca.transform(X_test_scl)
     
     # save
     filename = './' + self.save_dir + '/wine_pca_x_train.txt'
     pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_pca_x_test.txt'
     pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_pca_y_train.txt'
     pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_pca_y_test.txt'
     pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:24,代码来源:part2.py


示例20: __init__

    def __init__(self, *args, scale=False, center=False, **kwargs):
        """
        A machine learned model.  Beyond :class:`revscoring.Model`, this
        "Learned" models implement
        :func:`~revscoring.scoring.models.Learned.fit` and
        :func:`~revscoring.scoring.models.Learned.cross_validate`.
        """
        super().__init__(*args, **kwargs)
        self.trained = None
        if scale or center:
            self.scaler = RobustScaler(with_centering=center,
                                       with_scaling=scale)
        else:
            self.scaler = None

        self.params.update({
            'scale': scale,
            'center': center
        })
开发者ID:wiki-ai,项目名称:revscoring,代码行数:19,代码来源:model.py



注:本文中的sklearn.preprocessing.RobustScaler类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python preprocessing.Scaler类代码示例发布时间:2022-05-27
下一篇:
Python preprocessing.PolynomialFeatures类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap