• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python predstd.wls_prediction_std函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中statsmodels.sandbox.regression.predstd.wls_prediction_std函数的典型用法代码示例。如果您正苦于以下问题:Python wls_prediction_std函数的具体用法?Python wls_prediction_std怎么用?Python wls_prediction_std使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了wls_prediction_std函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: try_prod24h_before

def try_prod24h_before(columns=['Tout', 'vWind', 'vWindavg24', 'prod24h_before'], add_const=False, y=y):
    plt.close('all')
    X = all_data[columns]
    res = mlin_regression(y, X, add_const=add_const)
    timesteps = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    
    plt.subplot(2,1,1)
    plt.plot_date(timesteps, y, 'b', label='Actual prodution')
    plt.plot_date(timesteps, res.fittedvalues, 'r', label='Weather model')
    prstd, iv_l, iv_u = wls_prediction_std(res)    
    plt.plot_date(timesteps, iv_u, 'r--', label='95% conf. int.')
    plt.plot_date(timesteps, iv_l, 'r--')
    plt.ylabel('MW')
    plt.legend(loc=2)
    plt.subplot(2,1,2)
    plt.plot_date(timesteps, res.resid, '-', label='Residual')
    plt.ylabel('MW')
    plt.legend()
    
    print "MAE = " + str(mae(res.resid))
    print "MAPE = " + str(mape(res.resid, y))
    print "RMSE = " + str(rmse(res.resid))
    
    print res.summary()
    
       
    return res
开发者ID:magndahl,项目名称:dmi_ensemble_handler,代码行数:27,代码来源:model_selection.py


示例2: plot_best_model

def plot_best_model():
    plt.close('all')
    columns = ['Tout', 'Toutavg24', 'vWind', 'vWindavg24']#, 'hours', 'hours2','hours3', 'hours4','hours5', 'hours6']#, 'hours7', 'hours8']#,'hours5', 'hours6']
    X = all_data[columns]
    res = mlin_regression(y, X)
    timesteps = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    
    plt.subplot(2,1,1)
    plt.plot_date(timesteps, y, 'b', label='Actual prodution')
    plt.plot_date(timesteps, res.fittedvalues, 'r', label='Weather model')
    prstd, iv_l, iv_u = wls_prediction_std(res)    
    plt.plot_date(timesteps, iv_u, 'r--', label='95% conf. int.')
    plt.plot_date(timesteps, iv_l, 'r--')
    mean_day_resid = [res.resid[i::24].mean() for i in range(24)]
    mean_resid_series = np.tile(mean_day_resid, 29)
    plt.plot_date(timesteps, res.fittedvalues + mean_resid_series, 'g', label='Weather model + avg daily profile')
    plt.ylabel('MW')
    plt.legend(loc=2)
    plt.subplot(2,1,2)
    plt.plot_date(timesteps, res.resid, '-', label='Residual')
    
    plt.plot_date(timesteps, mean_resid_series)
    plt.ylabel('MW')
    plt.legend()
    
    mape = np.mean(np.abs((res.fittedvalues + mean_resid_series-y)/y))
    mape2 = np.mean(np.abs((res.resid)/y))
    mae = np.mean(np.abs((res.fittedvalues + mean_resid_series-y)))
    
    print mape, mape2, mae
    
    
    res.summary()
    return res
开发者ID:magndahl,项目名称:dmi_ensemble_handler,代码行数:34,代码来源:model_selection.py


示例3: user_model

def user_model(data):
    """
    This function allows the user to enter their own linear regression
    model formula, which is then run in the statsmodels package and
    returns model results.
    """

    # List available covariates in the data set
    print('The data set contains the following covariates: \n')
    print(list(data.columns), '\n')

    # Prompt user to input model formula, in R type syntax
    userFormula = choose_data = input('Enter your regression model formula, using syntax as shown: \n \n dependent_variable ~ covariate1 + covariate 2 + ... \n \n')

    # Run the user-defined model as a statsmodels linear regression
    userModel = smf.ols(formula=userFormula, data=data).fit()
    print('\n', userModel.summary(), '\n')

    # Retrieve y variable and time variable for plotting
    yvar = userModel.model.endog_names
    y = data[yvar]
    timeVar = list(data.columns[data.dtypes == 'datetime64[ns]'])
    x = data[timeVar]
    # covars = list(userModel.params.keys())


    # Plot dependent variable data and model fitted values vs time
    prstd, iv_l, iv_u = wls_prediction_std(userModel)
    fig = plt.figure(figsize=(12,6))

    plt.plot(x, userModel.fittedvalues, 'r.', alpha=0.2, label='Fitted Values')
    plt.plot(x, y, 'b.', alpha=0.2, label='%s data' % yvar)
    plt.legend(loc='upper left')
    plt.title('%s actual data and model fitted values' % yvar, fontsize='x-large')
开发者ID:cmfeng,项目名称:class_project,代码行数:34,代码来源:regression.py


示例4: plot_locality_regression

def plot_locality_regression(snps,cob,gene_limit=10):
    # Get degree and bootstrap degree
    log('Fetching Empirical Degree')
    degree = cob.locality(cob.refgen.candidate_genes(snps,gene_limit=gene_limit,chain=True)).sort('local')
    log('Fetching BS Degree')
    #bsdegree = pd.concat([cob.locality(cob.refgen.bootstrap_candidate_genes(snps,gene_limit=gene_limit,chain=True)) for x in range(50)]).sort('local')
    # get OLS for the bootstrapped degree 
    log('Fitting models')
    model = sm.OLS(degree['global'],degree.local)
    res = model.fit()
    std, iv_l, iv_u = wls_prediction_std(res)
    # plot the bootstrapped data
    fig,ax = pylab.subplots(figsize=(8,6)) 
    fig.hold(True)
    ax.set_xlim(0,max(degree.local))
    ax.set_ylim(0,max(degree['global']))
    # plot the bootstraps std
    # plot the true data
    log('Plotting Empirical')
    ax.plot(degree.local,degree['global'],'o',label='Empirical')
    log('Plotting Residuals')
    ax.plot(degree.local,res.fittedvalues,'--')
    ax.plot(degree.local,res.fittedvalues+2.5*std,'r--')
    ax.plot(degree.local,res.fittedvalues-2.5*std,'r--')
    ax.set_xlabel('Number Local Interactions')
    ax.set_ylabel('Number Global Interactions')
    log('Saving Figure')
    fig.savefig('{}_locality.png'.format(cob.name))
开发者ID:hawkaa,项目名称:Camoco,代码行数:28,代码来源:Tools.py


示例5: summary_obs

def summary_obs(res, alpha=0.05):

    from scipy import stats
    from statsmodels.sandbox.regression.predstd import wls_prediction_std

    infl = Influence(res)

    #standard error for predicted mean
    #Note: using hat_matrix only works for fitted values
    predict_mean_se = np.sqrt(infl.hat_matrix_diag*res.mse_resid)

    tppf = stats.t.isf(alpha/2., res.df_resid)
    predict_mean_ci = np.column_stack([
                        res.fittedvalues - tppf * predict_mean_se,
                        res.fittedvalues + tppf * predict_mean_se])


    #standard error for predicted observation
    predict_se, predict_ci_low, predict_ci_upp = wls_prediction_std(res)
    predict_ci = np.column_stack((predict_ci_low, predict_ci_upp))

    #standard deviation of residual
    resid_se = np.sqrt(res.mse_resid * (1 - infl.hat_matrix_diag))

    table_sm = np.column_stack([
                                  np.arange(res.nobs) + 1,
                                  res.model.endog,
                                  res.fittedvalues,
                                  predict_mean_se,
                                  predict_mean_ci[:,0],
                                  predict_mean_ci[:,1],
                                  predict_ci[:,0],
                                  predict_ci[:,1],
                                  res.resid,
                                  resid_se,
                                  infl.resid_studentized_internal,
                                  infl.cooks_distance()[0]
                                  ])


    #colnames, data = zip(*table_raw) #unzip
    data = table_sm
    ss2 = ['Obs', 'Dep Var\nPopulation', 'Predicted\nValue', 'Std Error\nMean Predict', 'Mean ci\n95% low', 'Mean ci\n95% upp', 'Predict ci\n95% low', 'Predict ci\n95% upp', 'Residual', 'Std Error\nResidual', 'Student\nResidual', "Cook's\nD"]
    colnames = ss2
    #self.table_data = data
    #data = np.column_stack(data)
    data = np.round(data,4)
    #self.table = data
    from statsmodels.iolib.table import SimpleTable, default_html_fmt
    from statsmodels.iolib.tableformatting import fmt_base
    from copy import deepcopy
    fmt = deepcopy(fmt_base)
    fmt_html = deepcopy(default_html_fmt)
    fmt['data_fmts'] = ["%4d"] + ["%6.3f"] * (data.shape[1] - 1)
    #fmt_html['data_fmts'] = fmt['data_fmts']
    st = SimpleTable(data, headers=colnames, txt_fmt=fmt,
                       html_fmt=fmt_html)

    return st, data, ss2
开发者ID:CRP,项目名称:statsmodels,代码行数:59,代码来源:outliers_influence.py


示例6: plot_fit

def plot_fit(results, exog_idx, y_true=None, ax=None, **kwargs):
    """Plot fit against one regressor.

    This creates one graph with the scatterplot of observed values compared to
    fitted values.

    Parameters
    ----------
    results : result instance
        result instance with resid, model.endog and model.exog as attributes
    x_var : int or str
        Name or index of regressor in exog matrix.
    y_true : array_like
        (optional) If this is not None, then the array is added to the plot
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.
    kwargs
        The keyword arguments are passed to the plot command for the fitted
        values points.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.
    """
    fig, ax = utils.create_mpl_ax(ax)

    exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)
    results = maybe_unwrap_results(results)

    #maybe add option for wendog, wexog
    y = results.model.endog
    x1 = results.model.exog[:, exog_idx]
    x1_argsort = np.argsort(x1)
    y = y[x1_argsort]
    x1 = x1[x1_argsort]

    ax.plot(x1, y, 'bo', label=results.model.endog_names)
    if not y_true is None:
        ax.plot(x1, y_true[x1_argsort], 'b-', label='True values')
    title = 'Fitted values versus %s' % exog_name

    prstd, iv_l, iv_u = wls_prediction_std(results)
    ax.plot(x1, results.fittedvalues[x1_argsort], 'D', color='r',
            label='fitted', **kwargs)
    ax.vlines(x1, iv_l[x1_argsort], iv_u[x1_argsort], linewidth=1, color='k',
            alpha=.7)
    #ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1,
    #                    color='k')
    ax.set_title(title)
    ax.set_xlabel(exog_name)
    ax.set_ylabel(results.model.endog_names)
    ax.legend(loc='best')

    return fig
开发者ID:joonro,项目名称:statsmodels,代码行数:57,代码来源:regressionplots.py


示例7: plot_fit

def plot_fit(res, exog_idx, exog_name='', y_true=None, ax=None, fontsize='small'):
    """Plot fit against one regressor.

    This creates one graph with the scatterplot of observed values compared to
    fitted values.

    Parameters
    ----------
    res : result instance
        result instance with resid, model.endog and model.exog as attributes
    exog_idx : int
        index of regressor in exog matrix
    y_true : array_like
        (optional) If this is not None, then the array is added to the plot
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    Notes
    -----
    This is currently very simple, no options or varnames yet.

    """
    fig, ax = utils.create_mpl_ax(ax)

    if exog_name == '':
        exog_name = 'variable %d' % exog_idx

    #maybe add option for wendog, wexog
    y = res.model.endog
    x1 = res.model.exog[:, exog_idx]
    x1_argsort = np.argsort(x1)
    y = y[x1_argsort]
    x1 = x1[x1_argsort]

    ax.plot(x1, y, 'bo', label='observed')
    if not y_true is None:
        ax.plot(x1, y_true[x1_argsort], 'b-', label='true')
        title = 'fitted versus regressor %s' % exog_name
    else:
        title = 'fitted versus regressor %s' % exog_name

    prstd, iv_l, iv_u = wls_prediction_std(res)
    ax.plot(x1, res.fittedvalues[x1_argsort], 'k-', label='fitted') #'k-o')
    #ax.plot(x1, iv_u, 'r--')
    #ax.plot(x1, iv_l, 'r--')
    ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1, color='k')
    ax.set_title(title, fontsize=fontsize)

    return fig
开发者ID:bendmorris,项目名称:statsmodels,代码行数:56,代码来源:regressionplots.py


示例8: lm

def lm(x, y):
    "fits an OLS from statsmodels. returns tuple."
    x, y = map(_plot_friendly, [x, y])
    if _isdate(x[0]):
        x = np.array([i.toordinal() for i in x])
    df = pd.DataFrame({"x": x, "y": y})
    df["const"] = 1.0
    fit = sm.OLS(df.y, df[["x", "const"]]).fit()
    df["predicted_y"] = fit.fittedvalues
    df["predstd"], df["interval_l"], df["interval_u"] = wls_prediction_std(fit)
    return (df.predicted_y, df.interval_l, df.interval_u)
开发者ID:neuropil,项目名称:ggplot,代码行数:11,代码来源:smoothers.py


示例9: _predict

    def _predict(self, fit, df):
        """
        Return a df with predictions and confidence interval

        Notes
        -----
        The df will contain the following columns:
        - 'predicted': the model output
        - 'interval_u', 'interval_l': upper and lower confidence bounds.

        The result will depend on the following attributes of self:
        confint : float (default=0.95)
            Confidence level for two-sided hypothesis
        allow_negative_predictions : bool (default=True)
            If False, correct negative predictions to zero (typically for energy consumption predictions)

        Parameters
        ----------
        fit : Statsmodels fit
        df : pandas DataFrame or None (default)
            If None, use self.df


        Returns
        -------
        df_res : pandas DataFrame
            Copy of df with additional columns 'predicted', 'interval_u' and 'interval_l'
        """

        # Add model results to data as column 'predictions'
        df_res = df.copy()
        if 'Intercept' in fit.model.exog_names:
            df_res['Intercept'] = 1.0
        df_res['predicted'] = fit.predict(df_res)
        if not self.allow_negative_predictions:
            df_res.loc[df_res['predicted'] < 0, 'predicted'] = 0

        def rename(x):
            if x == 'Intercept':
                return x
            else:
                return self.quote(x)

        prstd, interval_l, interval_u = wls_prediction_std(fit,
                                                           df_res.rename(columns=rename)[fit.model.exog_names],
                                                           alpha=1 - self.confint)
        df_res['interval_l'] = interval_l
        df_res['interval_u'] = interval_u

        if 'Intercept' in df_res:
            df_res.drop(labels=['Intercept'], axis=1, inplace=True)

        return df_res
开发者ID:kdebrab,项目名称:opengrid,代码行数:53,代码来源:regression.py


示例10: plot_locality

    def plot_locality(self,gene_list,bootstraps=10,num_windows=100,sd_thresh=2):
        '''
            Make a fancy locality plot.
        '''
        # Generate a blank fig
        fig,ax = plt.subplots(figsize=(8,6)) 
        fig.hold(True)
        # Y axis is local degree (what we are TRYING to predict)
        degree = self.locality(gene_list).sort('global')
        ax.set_ylim(0,max(degree['local']))
        ax.set_xlim(0,max(degree['global']))
        if bootstraps > 0:
            bs = pd.concat(
                [self.locality(
                    self.refgen.bootstrap_candidate_genes(gene_list)
                ) for x in range(10)]
            ).sort('global')
            ax.set_ylim(0,max(bs['local']))
            ax.set_xlim(0,max(bs['global']))
            plt.plot(bs['global'],bs['local'],'ro',alpha=0.05,label='Bootstraps')
        # Plot the bootstraps and the empirical
        plt.plot(degree['global'],degree['local'],'bo',label='Empirical')
        emp_ols = sm.OLS(degree['local'],degree['global']).fit()
        ax.plot(degree['global'],emp_ols.fittedvalues,'k:',label='Empirical OLS')

        if bootstraps > 0:
            # Get the OLS
            bs_ols = sm.OLS(bs['local'],bs['global']).fit()
            bs['resid'] = bs_ols.resid
            bs['fitted'] = bs_ols.fittedvalues
            ax.plot(bs['global'],bs_ols.fittedvalues,'g--',label='bootstrap OLS')
            # Do lowess on the residuals
            # We only care about windows within the empirical part
            window_tick = len(bs)/num_windows
            bs['window'] = [int(x/window_tick) for x in range(len(bs))]
            # get std for each window
            win_std = bs.groupby('window').apply(lambda df: df['resid'].std()).to_dict()
            bs['std_envelope'] = [win_std[x] for x in bs.window.values]
            # Plot confidence intervals
            prstd, iv_l, iv_u = wls_prediction_std(bs_ols)           
            ax.plot(bs['global'], iv_u, 'g--',label='conf int.')
            ax.plot(bs['global'], iv_l, 'g--')
            # plot the  
            ax.plot(
                bs['global'],bs['fitted']+(sd_thresh*bs['std_envelope']),'r--'
                ,label='{} s.d. envelope'.format(sd_thresh)
            )
            ax.plot(bs['global'],bs['fitted']-(sd_thresh*bs['std_envelope']),'r--')
        ax.set_xlabel('Number Global Interactions')
        ax.set_ylabel('Number Local Interactions')
        legend = ax.legend(loc='best')
        return plt
开发者ID:gitter-badger,项目名称:Camoco,代码行数:52,代码来源:COB.py


示例11: run_ordinary_least_squares

 def run_ordinary_least_squares(ols_dates, ols_data, statsmodels_settings):
     """
     This method receives the dates and prices of a Quandl data-set as well as settings for the StatsModels package,
     it then calculates the regression lines and / or the confidence lines are returns the objects
     """
     intercept = np.column_stack((ols_dates, ols_dates ** statsmodels_settings.exponent))
     constant = sm.add_constant(intercept)
     statsmodel_regression = sm.OLS(ols_data, constant).fit()
     print(statsmodel_regression.summary())
     if statsmodels_settings.confidence:
         prstd, lower, upper = wls_prediction_std(statsmodel_regression)
         return statsmodel_regression, lower, upper
     else:
         return statsmodel_regression
开发者ID:bheemeshk,项目名称:StreamSpace,代码行数:14,代码来源:RegressionAnalysis.py


示例12: predict

 def predict(self, ID, ALPHA=0.5):
     list1 = get_data(ID)
     vector = self.vectorizer.transform([list1[0]])
     vector = self.lsa.transform(vector)
     array = np.array([list1[1:4]])**2.0 / self.sum
     array = array**0.5
     vector= np.hstack([vector, array])
     vector = del_vector(vector, self.dellist)
     
     estimated = self.results.predict(vector)
     prstdn, infa, supa = wls_prediction_std(self.results, vector, alpha = ALPHA)
     if infa[0] < 0:
         infa[0] = 0
     return estimated[0]**2.0, infa[0]**2.0, supa[0]**2.0
开发者ID:takeru-nitta,项目名称:auction,代码行数:14,代码来源:estimator.py


示例13: main

def main():
    df = pickle.loads(open("OLS_data", "r").read())
    df = df.sort(columns="White")
    y = df["Tip Perc"]
    X = df[["White", "const"]]
    result = sm.OLS(y, X).fit()
    yhat = result.predict(X)
    prstd, iv_l, iv_u = wls_prediction_std(result)
    plt.scatter(X["White"], y, color="b", alpha=0.9)
    plt.plot(X["White"], yhat, color="r", alpha=0.7)
    plt.plot(X["White"], iv_u, "--", color="r", alpha=0.7, linewidth=0.7)
    plt.plot(X["White"], iv_l, "--", color="r", alpha=0.7, linewidth=0.7)
    plt.text(1.05, 25, "$R^2$=$%.3f$" % result.rsquared, ha="center", va="center")
    plt.xlabel("White Rate")
    plt.ylabel("Average Tip Percentage")
    plt.title("Regress Tip Percentage on White Rate")
    plt.show()
开发者ID:LEONOB2014,项目名称:bigdata2015-finalproject,代码行数:17,代码来源:White.py


示例14: main

def main():
    df = pickle.loads(open('OLS_data','r').read())
    df = df.sort(columns='Median household income')
    y = df['Tip Perc']
    X = df[['Median household income','Income2','const']]
    result = sm.OLS(y, X).fit()
    yhat = result.predict(X)
    prstd, iv_l, iv_u = wls_prediction_std(result)
    plt.scatter(X['Median household income'],y,color = 'b', alpha = 0.9)
    plt.plot(X['Median household income'],yhat, color = 'r', alpha = 0.7)
    plt.plot(X['Median household income'], iv_u, '--', color ='r',alpha = 0.7, linewidth = 0.7)
    plt.plot(X['Median household income'], iv_l, '--', color ='r', alpha = 0.7, linewidth = 0.7)
    plt.text(125000, 24.5,'$R^2$=$%.3f$' % result.rsquared, ha='center', va='center')
    plt.xlabel('Median Household Income ($)')
    plt.ylabel('Average Tip Percentage')
    plt.title('Regress Tip Percentage on Median Household Income')
    plt.show()
开发者ID:LEONOB2014,项目名称:bigdata2015-finalproject,代码行数:17,代码来源:Income.py


示例15: lm

def lm(x, y, alpha=ALPHA):
    "fits an OLS from statsmodels. returns tuple."
    x, y = map(plot_friendly, [x,y])
    if _isdate(x[0]):
        x = np.array([i.toordinal() for i in x])
    X = sm.add_constant(x)
    fit = sm.OLS(y, X).fit()
    prstd, iv_l, iv_u = wls_prediction_std(fit)
    _, summary_values, summary_names = summary_table(fit, alpha=alpha)
    df = pd.DataFrame(summary_values, columns=map(snakify, summary_names))
    fittedvalues        = df['predicted_value']
    predict_mean_se     = df['std_error_mean_predict']
    predict_mean_ci_low = df['mean_ci_95%_low']
    predict_mean_ci_upp = df['mean_ci_95%_upp']
    predict_ci_low      = df['predict_ci_95%_low']
    predict_ci_upp      = df['predict_ci_95%_upp']
    return (fittedvalues, predict_mean_ci_low, predict_mean_ci_upp)
开发者ID:elkclone,项目名称:ggplot,代码行数:17,代码来源:smoothers.py


示例16: returnOutliers

def returnOutliers(results, x, y, alpha=0.05):
    o_x = []
    o_y = []

    #print results.cov_params().shape[0]
    exog = results.model.exog
    #print exog.shape
    #print x.shape[0]
    pred_y, iv_l, iv_u = wls_prediction_std(results, exog=x, weights=None, alpha=alpha)

    i = 0
    for val in y:
        if (val > iv_u[i] or val < iv_l[i]):
            o_x.append(x[i][1])
            o_y.append(val)
        i += 1

    return o_x, o_y
开发者ID:Harshit661000143,项目名称:cs527enerj-math,代码行数:18,代码来源:plot_inerr_vs_outerr_CI_dir.py


示例17: test_ci

    def test_ci(self):
        res_wls = self.res_wls
        prstd, iv_l, iv_u = wls_prediction_std(res_wls)
        pred_res = get_prediction(res_wls)
        ci = pred_res.conf_int(obs=True)

        assert_allclose(pred_res.se_obs, prstd, rtol=1e-13)
        assert_allclose(ci, np.column_stack((iv_l, iv_u)), rtol=1e-13)

        sf = pred_res.summary_frame()

        col_names = ['mean', 'mean_se', 'mean_ci_lower', 'mean_ci_upper',
                      'obs_ci_lower', 'obs_ci_upper']
        assert_equal(sf.columns.tolist(), col_names)

        pred_res2 = res_wls.get_prediction()
        ci2 = pred_res2.conf_int(obs=True)

        assert_allclose(pred_res2.se_obs, prstd, rtol=1e-13)
        assert_allclose(ci2, np.column_stack((iv_l, iv_u)), rtol=1e-13)

        sf2 = pred_res2.summary_frame()
        assert_equal(sf2.columns.tolist(), col_names)

        # check that list works, issue 4437
        x = res_wls.model.exog.mean(0)
        pred_res3 = res_wls.get_prediction(x)
        ci3 = pred_res3.conf_int(obs=True)
        pred_res3b = res_wls.get_prediction(x.tolist())
        ci3b = pred_res3b.conf_int(obs=True)
        assert_allclose(pred_res3b.se_obs, pred_res3.se_obs, rtol=1e-13)
        assert_allclose(ci3b, ci3, rtol=1e-13)
        res_df = pred_res3b.summary_frame()
        assert_equal(res_df.index.values, [0])

        x = res_wls.model.exog[-2:]
        pred_res3 = res_wls.get_prediction(x)
        ci3 = pred_res3.conf_int(obs=True)
        pred_res3b = res_wls.get_prediction(x.tolist())
        ci3b = pred_res3b.conf_int(obs=True)
        assert_allclose(pred_res3b.se_obs, pred_res3.se_obs, rtol=1e-13)
        assert_allclose(ci3b, ci3, rtol=1e-13)
        res_df = pred_res3b.summary_frame()
        assert_equal(res_df.index.values, [0, 1])
开发者ID:ChadFulton,项目名称:statsmodels,代码行数:44,代码来源:test_predict.py


示例18: test_pred_interval

def test_pred_interval(show_plot=False):
    from ml_ext import examples
    (coefs,df)=examples.gen_simplemodel_data(n=50,k=3)
    df.sort('X1',inplace=True)
    lr=LinModel()
    X=df[df.columns[df.columns!='y']]
    y=df.y


    lr.fit(X=X,y=y)
    lr.summary()
    df_ci=lr.get_confidence_interval_for_mean(X)
    df_pi=lr.get_prediction_interval(X)

    #Now use statsmodels to compare
    from statsmodels.sandbox.regression.predstd import wls_prediction_std
    import statsmodels.api as sm
    re = sm.OLS(y, X).fit()
    prstd, iv_l, iv_u = wls_prediction_std(re)

    if show_plot:
        (fig,ax)=plt.subplots(nrows=2,ncols=1,figsize=[14,12])

        cols=sns.color_palette('husl',n_colors=4)
        ax[0].scatter(X.X1,y,label='y',color=cols[3],alpha=0.4)
        
        ax[0].plot(X.X1,df_pi['upper_pred'],label='pred',color=cols[1],alpha=0.5)
        ax[0].plot(X.X1,df_pi['lower_pred'],color=cols[1],alpha=0.5)
        ax[0].plot(X.X1,df_ci['upper_mean'],color=cols[2],alpha=0.5)
        ax[0].plot(X.X1,df_ci['lower_mean'],label='mean_ci',color=cols[2],alpha=0.5)
        ax[0].scatter(X.X1,df_pi['y_hat'],label='y_hat',color=cols[0],alpha=0.5)
        ax[0].legend(loc='best')

        ax[1].scatter(X.X1,y,label='y',color=cols[3],alpha=0.4)
        ax[1].scatter(X.X1,df_ci['y_hat'],label='y_hat',color=cols[0],alpha=0.5)
        ax[1].plot(X.X1,iv_u,label='wls',color=cols[1],alpha=0.5)
        ax[1].plot(X.X1,iv_l,color=cols[1],alpha=0.5)
        ax[1].legend(loc='best')

    #get difference between uppers from each and check they are within 1%
    overall_diff=100*numpy.sum(iv_u-df_pi['upper_pred'])/numpy.sum(iv_u)
    logging.debug("Overall % difference in prediction ranges for upper bound: {}".format(overall_diff))
    assert overall_diff<0.1
开发者ID:ejokeeffe,项目名称:ML,代码行数:43,代码来源:lin_model.py


示例19: lm

def lm(x, y, alpha=ALPHA):
    "fits an OLS from statsmodels. returns tuple."
    x_is_date = _isdate(x.iloc[0])
    if x_is_date:
        x = np.array([i.toordinal() for i in x])
    X = sm.add_constant(x)
    fit = sm.OLS(y, X).fit()
    prstd, iv_l, iv_u = wls_prediction_std(fit)
    _, summary_values, summary_names = summary_table(fit, alpha=alpha)
    df = pd.DataFrame(summary_values, columns=map(_snakify, summary_names))
    # TODO: indexing w/ data frame is messing everything up
    fittedvalues        = df['predicted_value'].values
    predict_mean_ci_low = df['mean_ci_95%_low'].values
    predict_mean_ci_upp = df['mean_ci_95%_upp'].values
    predict_ci_low      = df['predict_ci_95%_low'].values
    predict_ci_upp      = df['predict_ci_95%_upp'].values

    if x_is_date:
        x = [Timestamp.fromordinal(int(i)) for i in x]
    return (x, fittedvalues, predict_mean_ci_low, predict_mean_ci_upp)
开发者ID:ChickenProp,项目名称:ggplot,代码行数:20,代码来源:smoothers.py


示例20: test_nonlinear

def test_nonlinear():
    np.random.seed(111)
    
    n_sample = 50
    max_val = 30
    sig = 0.5

    x = np.linspace(0, max_val, n_sample)
    X = np.c_[x, np.sin(x), (x - 5)**2, np.ones(n_sample)]
    beta = np.array([0.5, 0.5, -0.02, 5.0])
    e = np.random.normal(size=n_sample) 

    #X = sm.add_constant(X, prepend=False)
    y_true = np.dot(X, beta)
    y = y_true + sig * e

    for i in xrange(5):
        print '%3d: %s %s' % (i, X[i, :], y[i])

    print
    print
    model = sm.OLS(y, X)
    results = model.fit()
    print results.summary()
    print
    print
    print results.params
    print results.rsquared 
    print results.bse
    print results.predict()

    
    plt.figure()
    plt.plot(x, y, 'o', x, y_true, 'b-')
    prstd, iv_l, iv_u = wls_prediction_std(results)
    plt.plot(x, results.fittedvalues, 'r--.')
    plt.plot(x, iv_u, 'r--')
    plt.plot(x, iv_l, 'r--')
    plt.title('blue: true,   red: OLS')
    plt.show()
开发者ID:AntHar,项目名称:stats,代码行数:40,代码来源:forward_stepwise_regression.py



注:本文中的statsmodels.sandbox.regression.predstd.wls_prediction_std函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python multicomp.multipletests函数代码示例发布时间:2022-05-27
下一篇:
Python scale.mad函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap