本文整理汇总了Python中statsmodels.sandbox.regression.predstd.wls_prediction_std函数的典型用法代码示例。如果您正苦于以下问题:Python wls_prediction_std函数的具体用法?Python wls_prediction_std怎么用?Python wls_prediction_std使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了wls_prediction_std函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: try_prod24h_before
def try_prod24h_before(columns=['Tout', 'vWind', 'vWindavg24', 'prod24h_before'], add_const=False, y=y):
plt.close('all')
X = all_data[columns]
res = mlin_regression(y, X, add_const=add_const)
timesteps = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
plt.subplot(2,1,1)
plt.plot_date(timesteps, y, 'b', label='Actual prodution')
plt.plot_date(timesteps, res.fittedvalues, 'r', label='Weather model')
prstd, iv_l, iv_u = wls_prediction_std(res)
plt.plot_date(timesteps, iv_u, 'r--', label='95% conf. int.')
plt.plot_date(timesteps, iv_l, 'r--')
plt.ylabel('MW')
plt.legend(loc=2)
plt.subplot(2,1,2)
plt.plot_date(timesteps, res.resid, '-', label='Residual')
plt.ylabel('MW')
plt.legend()
print "MAE = " + str(mae(res.resid))
print "MAPE = " + str(mape(res.resid, y))
print "RMSE = " + str(rmse(res.resid))
print res.summary()
return res
开发者ID:magndahl,项目名称:dmi_ensemble_handler,代码行数:27,代码来源:model_selection.py
示例2: plot_best_model
def plot_best_model():
plt.close('all')
columns = ['Tout', 'Toutavg24', 'vWind', 'vWindavg24']#, 'hours', 'hours2','hours3', 'hours4','hours5', 'hours6']#, 'hours7', 'hours8']#,'hours5', 'hours6']
X = all_data[columns]
res = mlin_regression(y, X)
timesteps = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
plt.subplot(2,1,1)
plt.plot_date(timesteps, y, 'b', label='Actual prodution')
plt.plot_date(timesteps, res.fittedvalues, 'r', label='Weather model')
prstd, iv_l, iv_u = wls_prediction_std(res)
plt.plot_date(timesteps, iv_u, 'r--', label='95% conf. int.')
plt.plot_date(timesteps, iv_l, 'r--')
mean_day_resid = [res.resid[i::24].mean() for i in range(24)]
mean_resid_series = np.tile(mean_day_resid, 29)
plt.plot_date(timesteps, res.fittedvalues + mean_resid_series, 'g', label='Weather model + avg daily profile')
plt.ylabel('MW')
plt.legend(loc=2)
plt.subplot(2,1,2)
plt.plot_date(timesteps, res.resid, '-', label='Residual')
plt.plot_date(timesteps, mean_resid_series)
plt.ylabel('MW')
plt.legend()
mape = np.mean(np.abs((res.fittedvalues + mean_resid_series-y)/y))
mape2 = np.mean(np.abs((res.resid)/y))
mae = np.mean(np.abs((res.fittedvalues + mean_resid_series-y)))
print mape, mape2, mae
res.summary()
return res
开发者ID:magndahl,项目名称:dmi_ensemble_handler,代码行数:34,代码来源:model_selection.py
示例3: user_model
def user_model(data):
"""
This function allows the user to enter their own linear regression
model formula, which is then run in the statsmodels package and
returns model results.
"""
# List available covariates in the data set
print('The data set contains the following covariates: \n')
print(list(data.columns), '\n')
# Prompt user to input model formula, in R type syntax
userFormula = choose_data = input('Enter your regression model formula, using syntax as shown: \n \n dependent_variable ~ covariate1 + covariate 2 + ... \n \n')
# Run the user-defined model as a statsmodels linear regression
userModel = smf.ols(formula=userFormula, data=data).fit()
print('\n', userModel.summary(), '\n')
# Retrieve y variable and time variable for plotting
yvar = userModel.model.endog_names
y = data[yvar]
timeVar = list(data.columns[data.dtypes == 'datetime64[ns]'])
x = data[timeVar]
# covars = list(userModel.params.keys())
# Plot dependent variable data and model fitted values vs time
prstd, iv_l, iv_u = wls_prediction_std(userModel)
fig = plt.figure(figsize=(12,6))
plt.plot(x, userModel.fittedvalues, 'r.', alpha=0.2, label='Fitted Values')
plt.plot(x, y, 'b.', alpha=0.2, label='%s data' % yvar)
plt.legend(loc='upper left')
plt.title('%s actual data and model fitted values' % yvar, fontsize='x-large')
开发者ID:cmfeng,项目名称:class_project,代码行数:34,代码来源:regression.py
示例4: plot_locality_regression
def plot_locality_regression(snps,cob,gene_limit=10):
# Get degree and bootstrap degree
log('Fetching Empirical Degree')
degree = cob.locality(cob.refgen.candidate_genes(snps,gene_limit=gene_limit,chain=True)).sort('local')
log('Fetching BS Degree')
#bsdegree = pd.concat([cob.locality(cob.refgen.bootstrap_candidate_genes(snps,gene_limit=gene_limit,chain=True)) for x in range(50)]).sort('local')
# get OLS for the bootstrapped degree
log('Fitting models')
model = sm.OLS(degree['global'],degree.local)
res = model.fit()
std, iv_l, iv_u = wls_prediction_std(res)
# plot the bootstrapped data
fig,ax = pylab.subplots(figsize=(8,6))
fig.hold(True)
ax.set_xlim(0,max(degree.local))
ax.set_ylim(0,max(degree['global']))
# plot the bootstraps std
# plot the true data
log('Plotting Empirical')
ax.plot(degree.local,degree['global'],'o',label='Empirical')
log('Plotting Residuals')
ax.plot(degree.local,res.fittedvalues,'--')
ax.plot(degree.local,res.fittedvalues+2.5*std,'r--')
ax.plot(degree.local,res.fittedvalues-2.5*std,'r--')
ax.set_xlabel('Number Local Interactions')
ax.set_ylabel('Number Global Interactions')
log('Saving Figure')
fig.savefig('{}_locality.png'.format(cob.name))
开发者ID:hawkaa,项目名称:Camoco,代码行数:28,代码来源:Tools.py
示例5: summary_obs
def summary_obs(res, alpha=0.05):
from scipy import stats
from statsmodels.sandbox.regression.predstd import wls_prediction_std
infl = Influence(res)
#standard error for predicted mean
#Note: using hat_matrix only works for fitted values
predict_mean_se = np.sqrt(infl.hat_matrix_diag*res.mse_resid)
tppf = stats.t.isf(alpha/2., res.df_resid)
predict_mean_ci = np.column_stack([
res.fittedvalues - tppf * predict_mean_se,
res.fittedvalues + tppf * predict_mean_se])
#standard error for predicted observation
predict_se, predict_ci_low, predict_ci_upp = wls_prediction_std(res)
predict_ci = np.column_stack((predict_ci_low, predict_ci_upp))
#standard deviation of residual
resid_se = np.sqrt(res.mse_resid * (1 - infl.hat_matrix_diag))
table_sm = np.column_stack([
np.arange(res.nobs) + 1,
res.model.endog,
res.fittedvalues,
predict_mean_se,
predict_mean_ci[:,0],
predict_mean_ci[:,1],
predict_ci[:,0],
predict_ci[:,1],
res.resid,
resid_se,
infl.resid_studentized_internal,
infl.cooks_distance()[0]
])
#colnames, data = zip(*table_raw) #unzip
data = table_sm
ss2 = ['Obs', 'Dep Var\nPopulation', 'Predicted\nValue', 'Std Error\nMean Predict', 'Mean ci\n95% low', 'Mean ci\n95% upp', 'Predict ci\n95% low', 'Predict ci\n95% upp', 'Residual', 'Std Error\nResidual', 'Student\nResidual', "Cook's\nD"]
colnames = ss2
#self.table_data = data
#data = np.column_stack(data)
data = np.round(data,4)
#self.table = data
from statsmodels.iolib.table import SimpleTable, default_html_fmt
from statsmodels.iolib.tableformatting import fmt_base
from copy import deepcopy
fmt = deepcopy(fmt_base)
fmt_html = deepcopy(default_html_fmt)
fmt['data_fmts'] = ["%4d"] + ["%6.3f"] * (data.shape[1] - 1)
#fmt_html['data_fmts'] = fmt['data_fmts']
st = SimpleTable(data, headers=colnames, txt_fmt=fmt,
html_fmt=fmt_html)
return st, data, ss2
开发者ID:CRP,项目名称:statsmodels,代码行数:59,代码来源:outliers_influence.py
示例6: plot_fit
def plot_fit(results, exog_idx, y_true=None, ax=None, **kwargs):
"""Plot fit against one regressor.
This creates one graph with the scatterplot of observed values compared to
fitted values.
Parameters
----------
results : result instance
result instance with resid, model.endog and model.exog as attributes
x_var : int or str
Name or index of regressor in exog matrix.
y_true : array_like
(optional) If this is not None, then the array is added to the plot
ax : Matplotlib AxesSubplot instance, optional
If given, this subplot is used to plot in instead of a new figure being
created.
kwargs
The keyword arguments are passed to the plot command for the fitted
values points.
Returns
-------
fig : Matplotlib figure instance
If `ax` is None, the created figure. Otherwise the figure to which
`ax` is connected.
"""
fig, ax = utils.create_mpl_ax(ax)
exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)
results = maybe_unwrap_results(results)
#maybe add option for wendog, wexog
y = results.model.endog
x1 = results.model.exog[:, exog_idx]
x1_argsort = np.argsort(x1)
y = y[x1_argsort]
x1 = x1[x1_argsort]
ax.plot(x1, y, 'bo', label=results.model.endog_names)
if not y_true is None:
ax.plot(x1, y_true[x1_argsort], 'b-', label='True values')
title = 'Fitted values versus %s' % exog_name
prstd, iv_l, iv_u = wls_prediction_std(results)
ax.plot(x1, results.fittedvalues[x1_argsort], 'D', color='r',
label='fitted', **kwargs)
ax.vlines(x1, iv_l[x1_argsort], iv_u[x1_argsort], linewidth=1, color='k',
alpha=.7)
#ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1,
# color='k')
ax.set_title(title)
ax.set_xlabel(exog_name)
ax.set_ylabel(results.model.endog_names)
ax.legend(loc='best')
return fig
开发者ID:joonro,项目名称:statsmodels,代码行数:57,代码来源:regressionplots.py
示例7: plot_fit
def plot_fit(res, exog_idx, exog_name='', y_true=None, ax=None, fontsize='small'):
"""Plot fit against one regressor.
This creates one graph with the scatterplot of observed values compared to
fitted values.
Parameters
----------
res : result instance
result instance with resid, model.endog and model.exog as attributes
exog_idx : int
index of regressor in exog matrix
y_true : array_like
(optional) If this is not None, then the array is added to the plot
ax : Matplotlib AxesSubplot instance, optional
If given, this subplot is used to plot in instead of a new figure being
created.
Returns
-------
fig : Matplotlib figure instance
If `ax` is None, the created figure. Otherwise the figure to which
`ax` is connected.
Notes
-----
This is currently very simple, no options or varnames yet.
"""
fig, ax = utils.create_mpl_ax(ax)
if exog_name == '':
exog_name = 'variable %d' % exog_idx
#maybe add option for wendog, wexog
y = res.model.endog
x1 = res.model.exog[:, exog_idx]
x1_argsort = np.argsort(x1)
y = y[x1_argsort]
x1 = x1[x1_argsort]
ax.plot(x1, y, 'bo', label='observed')
if not y_true is None:
ax.plot(x1, y_true[x1_argsort], 'b-', label='true')
title = 'fitted versus regressor %s' % exog_name
else:
title = 'fitted versus regressor %s' % exog_name
prstd, iv_l, iv_u = wls_prediction_std(res)
ax.plot(x1, res.fittedvalues[x1_argsort], 'k-', label='fitted') #'k-o')
#ax.plot(x1, iv_u, 'r--')
#ax.plot(x1, iv_l, 'r--')
ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1, color='k')
ax.set_title(title, fontsize=fontsize)
return fig
开发者ID:bendmorris,项目名称:statsmodels,代码行数:56,代码来源:regressionplots.py
示例8: lm
def lm(x, y):
"fits an OLS from statsmodels. returns tuple."
x, y = map(_plot_friendly, [x, y])
if _isdate(x[0]):
x = np.array([i.toordinal() for i in x])
df = pd.DataFrame({"x": x, "y": y})
df["const"] = 1.0
fit = sm.OLS(df.y, df[["x", "const"]]).fit()
df["predicted_y"] = fit.fittedvalues
df["predstd"], df["interval_l"], df["interval_u"] = wls_prediction_std(fit)
return (df.predicted_y, df.interval_l, df.interval_u)
开发者ID:neuropil,项目名称:ggplot,代码行数:11,代码来源:smoothers.py
示例9: _predict
def _predict(self, fit, df):
"""
Return a df with predictions and confidence interval
Notes
-----
The df will contain the following columns:
- 'predicted': the model output
- 'interval_u', 'interval_l': upper and lower confidence bounds.
The result will depend on the following attributes of self:
confint : float (default=0.95)
Confidence level for two-sided hypothesis
allow_negative_predictions : bool (default=True)
If False, correct negative predictions to zero (typically for energy consumption predictions)
Parameters
----------
fit : Statsmodels fit
df : pandas DataFrame or None (default)
If None, use self.df
Returns
-------
df_res : pandas DataFrame
Copy of df with additional columns 'predicted', 'interval_u' and 'interval_l'
"""
# Add model results to data as column 'predictions'
df_res = df.copy()
if 'Intercept' in fit.model.exog_names:
df_res['Intercept'] = 1.0
df_res['predicted'] = fit.predict(df_res)
if not self.allow_negative_predictions:
df_res.loc[df_res['predicted'] < 0, 'predicted'] = 0
def rename(x):
if x == 'Intercept':
return x
else:
return self.quote(x)
prstd, interval_l, interval_u = wls_prediction_std(fit,
df_res.rename(columns=rename)[fit.model.exog_names],
alpha=1 - self.confint)
df_res['interval_l'] = interval_l
df_res['interval_u'] = interval_u
if 'Intercept' in df_res:
df_res.drop(labels=['Intercept'], axis=1, inplace=True)
return df_res
开发者ID:kdebrab,项目名称:opengrid,代码行数:53,代码来源:regression.py
示例10: plot_locality
def plot_locality(self,gene_list,bootstraps=10,num_windows=100,sd_thresh=2):
'''
Make a fancy locality plot.
'''
# Generate a blank fig
fig,ax = plt.subplots(figsize=(8,6))
fig.hold(True)
# Y axis is local degree (what we are TRYING to predict)
degree = self.locality(gene_list).sort('global')
ax.set_ylim(0,max(degree['local']))
ax.set_xlim(0,max(degree['global']))
if bootstraps > 0:
bs = pd.concat(
[self.locality(
self.refgen.bootstrap_candidate_genes(gene_list)
) for x in range(10)]
).sort('global')
ax.set_ylim(0,max(bs['local']))
ax.set_xlim(0,max(bs['global']))
plt.plot(bs['global'],bs['local'],'ro',alpha=0.05,label='Bootstraps')
# Plot the bootstraps and the empirical
plt.plot(degree['global'],degree['local'],'bo',label='Empirical')
emp_ols = sm.OLS(degree['local'],degree['global']).fit()
ax.plot(degree['global'],emp_ols.fittedvalues,'k:',label='Empirical OLS')
if bootstraps > 0:
# Get the OLS
bs_ols = sm.OLS(bs['local'],bs['global']).fit()
bs['resid'] = bs_ols.resid
bs['fitted'] = bs_ols.fittedvalues
ax.plot(bs['global'],bs_ols.fittedvalues,'g--',label='bootstrap OLS')
# Do lowess on the residuals
# We only care about windows within the empirical part
window_tick = len(bs)/num_windows
bs['window'] = [int(x/window_tick) for x in range(len(bs))]
# get std for each window
win_std = bs.groupby('window').apply(lambda df: df['resid'].std()).to_dict()
bs['std_envelope'] = [win_std[x] for x in bs.window.values]
# Plot confidence intervals
prstd, iv_l, iv_u = wls_prediction_std(bs_ols)
ax.plot(bs['global'], iv_u, 'g--',label='conf int.')
ax.plot(bs['global'], iv_l, 'g--')
# plot the
ax.plot(
bs['global'],bs['fitted']+(sd_thresh*bs['std_envelope']),'r--'
,label='{} s.d. envelope'.format(sd_thresh)
)
ax.plot(bs['global'],bs['fitted']-(sd_thresh*bs['std_envelope']),'r--')
ax.set_xlabel('Number Global Interactions')
ax.set_ylabel('Number Local Interactions')
legend = ax.legend(loc='best')
return plt
开发者ID:gitter-badger,项目名称:Camoco,代码行数:52,代码来源:COB.py
示例11: run_ordinary_least_squares
def run_ordinary_least_squares(ols_dates, ols_data, statsmodels_settings):
"""
This method receives the dates and prices of a Quandl data-set as well as settings for the StatsModels package,
it then calculates the regression lines and / or the confidence lines are returns the objects
"""
intercept = np.column_stack((ols_dates, ols_dates ** statsmodels_settings.exponent))
constant = sm.add_constant(intercept)
statsmodel_regression = sm.OLS(ols_data, constant).fit()
print(statsmodel_regression.summary())
if statsmodels_settings.confidence:
prstd, lower, upper = wls_prediction_std(statsmodel_regression)
return statsmodel_regression, lower, upper
else:
return statsmodel_regression
开发者ID:bheemeshk,项目名称:StreamSpace,代码行数:14,代码来源:RegressionAnalysis.py
示例12: predict
def predict(self, ID, ALPHA=0.5):
list1 = get_data(ID)
vector = self.vectorizer.transform([list1[0]])
vector = self.lsa.transform(vector)
array = np.array([list1[1:4]])**2.0 / self.sum
array = array**0.5
vector= np.hstack([vector, array])
vector = del_vector(vector, self.dellist)
estimated = self.results.predict(vector)
prstdn, infa, supa = wls_prediction_std(self.results, vector, alpha = ALPHA)
if infa[0] < 0:
infa[0] = 0
return estimated[0]**2.0, infa[0]**2.0, supa[0]**2.0
开发者ID:takeru-nitta,项目名称:auction,代码行数:14,代码来源:estimator.py
示例13: main
def main():
df = pickle.loads(open("OLS_data", "r").read())
df = df.sort(columns="White")
y = df["Tip Perc"]
X = df[["White", "const"]]
result = sm.OLS(y, X).fit()
yhat = result.predict(X)
prstd, iv_l, iv_u = wls_prediction_std(result)
plt.scatter(X["White"], y, color="b", alpha=0.9)
plt.plot(X["White"], yhat, color="r", alpha=0.7)
plt.plot(X["White"], iv_u, "--", color="r", alpha=0.7, linewidth=0.7)
plt.plot(X["White"], iv_l, "--", color="r", alpha=0.7, linewidth=0.7)
plt.text(1.05, 25, "$R^2$=$%.3f$" % result.rsquared, ha="center", va="center")
plt.xlabel("White Rate")
plt.ylabel("Average Tip Percentage")
plt.title("Regress Tip Percentage on White Rate")
plt.show()
开发者ID:LEONOB2014,项目名称:bigdata2015-finalproject,代码行数:17,代码来源:White.py
示例14: main
def main():
df = pickle.loads(open('OLS_data','r').read())
df = df.sort(columns='Median household income')
y = df['Tip Perc']
X = df[['Median household income','Income2','const']]
result = sm.OLS(y, X).fit()
yhat = result.predict(X)
prstd, iv_l, iv_u = wls_prediction_std(result)
plt.scatter(X['Median household income'],y,color = 'b', alpha = 0.9)
plt.plot(X['Median household income'],yhat, color = 'r', alpha = 0.7)
plt.plot(X['Median household income'], iv_u, '--', color ='r',alpha = 0.7, linewidth = 0.7)
plt.plot(X['Median household income'], iv_l, '--', color ='r', alpha = 0.7, linewidth = 0.7)
plt.text(125000, 24.5,'$R^2$=$%.3f$' % result.rsquared, ha='center', va='center')
plt.xlabel('Median Household Income ($)')
plt.ylabel('Average Tip Percentage')
plt.title('Regress Tip Percentage on Median Household Income')
plt.show()
开发者ID:LEONOB2014,项目名称:bigdata2015-finalproject,代码行数:17,代码来源:Income.py
示例15: lm
def lm(x, y, alpha=ALPHA):
"fits an OLS from statsmodels. returns tuple."
x, y = map(plot_friendly, [x,y])
if _isdate(x[0]):
x = np.array([i.toordinal() for i in x])
X = sm.add_constant(x)
fit = sm.OLS(y, X).fit()
prstd, iv_l, iv_u = wls_prediction_std(fit)
_, summary_values, summary_names = summary_table(fit, alpha=alpha)
df = pd.DataFrame(summary_values, columns=map(snakify, summary_names))
fittedvalues = df['predicted_value']
predict_mean_se = df['std_error_mean_predict']
predict_mean_ci_low = df['mean_ci_95%_low']
predict_mean_ci_upp = df['mean_ci_95%_upp']
predict_ci_low = df['predict_ci_95%_low']
predict_ci_upp = df['predict_ci_95%_upp']
return (fittedvalues, predict_mean_ci_low, predict_mean_ci_upp)
开发者ID:elkclone,项目名称:ggplot,代码行数:17,代码来源:smoothers.py
示例16: returnOutliers
def returnOutliers(results, x, y, alpha=0.05):
o_x = []
o_y = []
#print results.cov_params().shape[0]
exog = results.model.exog
#print exog.shape
#print x.shape[0]
pred_y, iv_l, iv_u = wls_prediction_std(results, exog=x, weights=None, alpha=alpha)
i = 0
for val in y:
if (val > iv_u[i] or val < iv_l[i]):
o_x.append(x[i][1])
o_y.append(val)
i += 1
return o_x, o_y
开发者ID:Harshit661000143,项目名称:cs527enerj-math,代码行数:18,代码来源:plot_inerr_vs_outerr_CI_dir.py
示例17: test_ci
def test_ci(self):
res_wls = self.res_wls
prstd, iv_l, iv_u = wls_prediction_std(res_wls)
pred_res = get_prediction(res_wls)
ci = pred_res.conf_int(obs=True)
assert_allclose(pred_res.se_obs, prstd, rtol=1e-13)
assert_allclose(ci, np.column_stack((iv_l, iv_u)), rtol=1e-13)
sf = pred_res.summary_frame()
col_names = ['mean', 'mean_se', 'mean_ci_lower', 'mean_ci_upper',
'obs_ci_lower', 'obs_ci_upper']
assert_equal(sf.columns.tolist(), col_names)
pred_res2 = res_wls.get_prediction()
ci2 = pred_res2.conf_int(obs=True)
assert_allclose(pred_res2.se_obs, prstd, rtol=1e-13)
assert_allclose(ci2, np.column_stack((iv_l, iv_u)), rtol=1e-13)
sf2 = pred_res2.summary_frame()
assert_equal(sf2.columns.tolist(), col_names)
# check that list works, issue 4437
x = res_wls.model.exog.mean(0)
pred_res3 = res_wls.get_prediction(x)
ci3 = pred_res3.conf_int(obs=True)
pred_res3b = res_wls.get_prediction(x.tolist())
ci3b = pred_res3b.conf_int(obs=True)
assert_allclose(pred_res3b.se_obs, pred_res3.se_obs, rtol=1e-13)
assert_allclose(ci3b, ci3, rtol=1e-13)
res_df = pred_res3b.summary_frame()
assert_equal(res_df.index.values, [0])
x = res_wls.model.exog[-2:]
pred_res3 = res_wls.get_prediction(x)
ci3 = pred_res3.conf_int(obs=True)
pred_res3b = res_wls.get_prediction(x.tolist())
ci3b = pred_res3b.conf_int(obs=True)
assert_allclose(pred_res3b.se_obs, pred_res3.se_obs, rtol=1e-13)
assert_allclose(ci3b, ci3, rtol=1e-13)
res_df = pred_res3b.summary_frame()
assert_equal(res_df.index.values, [0, 1])
开发者ID:ChadFulton,项目名称:statsmodels,代码行数:44,代码来源:test_predict.py
示例18: test_pred_interval
def test_pred_interval(show_plot=False):
from ml_ext import examples
(coefs,df)=examples.gen_simplemodel_data(n=50,k=3)
df.sort('X1',inplace=True)
lr=LinModel()
X=df[df.columns[df.columns!='y']]
y=df.y
lr.fit(X=X,y=y)
lr.summary()
df_ci=lr.get_confidence_interval_for_mean(X)
df_pi=lr.get_prediction_interval(X)
#Now use statsmodels to compare
from statsmodels.sandbox.regression.predstd import wls_prediction_std
import statsmodels.api as sm
re = sm.OLS(y, X).fit()
prstd, iv_l, iv_u = wls_prediction_std(re)
if show_plot:
(fig,ax)=plt.subplots(nrows=2,ncols=1,figsize=[14,12])
cols=sns.color_palette('husl',n_colors=4)
ax[0].scatter(X.X1,y,label='y',color=cols[3],alpha=0.4)
ax[0].plot(X.X1,df_pi['upper_pred'],label='pred',color=cols[1],alpha=0.5)
ax[0].plot(X.X1,df_pi['lower_pred'],color=cols[1],alpha=0.5)
ax[0].plot(X.X1,df_ci['upper_mean'],color=cols[2],alpha=0.5)
ax[0].plot(X.X1,df_ci['lower_mean'],label='mean_ci',color=cols[2],alpha=0.5)
ax[0].scatter(X.X1,df_pi['y_hat'],label='y_hat',color=cols[0],alpha=0.5)
ax[0].legend(loc='best')
ax[1].scatter(X.X1,y,label='y',color=cols[3],alpha=0.4)
ax[1].scatter(X.X1,df_ci['y_hat'],label='y_hat',color=cols[0],alpha=0.5)
ax[1].plot(X.X1,iv_u,label='wls',color=cols[1],alpha=0.5)
ax[1].plot(X.X1,iv_l,color=cols[1],alpha=0.5)
ax[1].legend(loc='best')
#get difference between uppers from each and check they are within 1%
overall_diff=100*numpy.sum(iv_u-df_pi['upper_pred'])/numpy.sum(iv_u)
logging.debug("Overall % difference in prediction ranges for upper bound: {}".format(overall_diff))
assert overall_diff<0.1
开发者ID:ejokeeffe,项目名称:ML,代码行数:43,代码来源:lin_model.py
示例19: lm
def lm(x, y, alpha=ALPHA):
"fits an OLS from statsmodels. returns tuple."
x_is_date = _isdate(x.iloc[0])
if x_is_date:
x = np.array([i.toordinal() for i in x])
X = sm.add_constant(x)
fit = sm.OLS(y, X).fit()
prstd, iv_l, iv_u = wls_prediction_std(fit)
_, summary_values, summary_names = summary_table(fit, alpha=alpha)
df = pd.DataFrame(summary_values, columns=map(_snakify, summary_names))
# TODO: indexing w/ data frame is messing everything up
fittedvalues = df['predicted_value'].values
predict_mean_ci_low = df['mean_ci_95%_low'].values
predict_mean_ci_upp = df['mean_ci_95%_upp'].values
predict_ci_low = df['predict_ci_95%_low'].values
predict_ci_upp = df['predict_ci_95%_upp'].values
if x_is_date:
x = [Timestamp.fromordinal(int(i)) for i in x]
return (x, fittedvalues, predict_mean_ci_low, predict_mean_ci_upp)
开发者ID:ChickenProp,项目名称:ggplot,代码行数:20,代码来源:smoothers.py
示例20: test_nonlinear
def test_nonlinear():
np.random.seed(111)
n_sample = 50
max_val = 30
sig = 0.5
x = np.linspace(0, max_val, n_sample)
X = np.c_[x, np.sin(x), (x - 5)**2, np.ones(n_sample)]
beta = np.array([0.5, 0.5, -0.02, 5.0])
e = np.random.normal(size=n_sample)
#X = sm.add_constant(X, prepend=False)
y_true = np.dot(X, beta)
y = y_true + sig * e
for i in xrange(5):
print '%3d: %s %s' % (i, X[i, :], y[i])
print
print
model = sm.OLS(y, X)
results = model.fit()
print results.summary()
print
print
print results.params
print results.rsquared
print results.bse
print results.predict()
plt.figure()
plt.plot(x, y, 'o', x, y_true, 'b-')
prstd, iv_l, iv_u = wls_prediction_std(results)
plt.plot(x, results.fittedvalues, 'r--.')
plt.plot(x, iv_u, 'r--')
plt.plot(x, iv_l, 'r--')
plt.title('blue: true, red: OLS')
plt.show()
开发者ID:AntHar,项目名称:stats,代码行数:40,代码来源:forward_stepwise_regression.py
注:本文中的statsmodels.sandbox.regression.predstd.wls_prediction_std函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论