本文整理汇总了Python中scikits.statsmodels.tools.tools.add_constant函数的典型用法代码示例。如果您正苦于以下问题:Python add_constant函数的具体用法?Python add_constant怎么用?Python add_constant使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了add_constant函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: notyet_atst
def notyet_atst():
d = macrodata.load().data
realinv = d['realinv']
realgdp = d['realgdp']
realint = d['realint']
endog = realinv
exog = add_constant(np.c_[realgdp, realint],prepend=True)
res_ols1 = OLS(endog, exog).fit()
#growth rates
gs_l_realinv = 400 * np.diff(np.log(d['realinv']))
gs_l_realgdp = 400 * np.diff(np.log(d['realgdp']))
lint = d['realint'][:-1]
tbilrate = d['tbilrate'][:-1]
endogg = gs_l_realinv
exogg = add_constant(np.c_[gs_l_realgdp, lint], prepend=True)
exogg2 = add_constant(np.c_[gs_l_realgdp, tbilrate], prepend=True)
res_ols = OLS(endogg, exogg).fit()
res_ols2 = OLS(endogg, exogg2).fit()
#the following were done accidentally with res_ols1 in R,
#with original Greene data
params = np.array([-272.3986041341653, 0.1779455206941112,
0.2149432424658157])
cov_hac_4 = np.array([1321.569466333051, -0.2318836566017612,
37.01280466875694, -0.2318836566017614, 4.602339488102263e-05,
-0.0104687835998635, 37.012804668757, -0.0104687835998635,
21.16037144168061]).reshape(3,3, order='F')
cov_hac_10 = np.array([2027.356101193361, -0.3507514463299015,
54.81079621448568, -0.350751446329901, 6.953380432635583e-05,
-0.01268990195095196, 54.81079621448564, -0.01268990195095195,
22.92512402151113]).reshape(3,3, order='F')
#goldfeld-quandt
het_gq_greater = dict(statistic=13.20512768685082, df1=99, df2=98,
pvalue=1.246141976112324e-30, distr='f')
het_gq_less = dict(statistic=13.20512768685082, df1=99, df2=98, pvalue=1.)
het_gq_2sided = dict(statistic=13.20512768685082, df1=99, df2=98,
pvalue=1.246141976112324e-30, distr='f')
#goldfeld-quandt, fraction = 0.5
het_gq_greater_2 = dict(statistic=87.1328934692124, df1=48, df2=47,
pvalue=2.154956842194898e-33, distr='f')
gq = smsdia.het_goldfeldquandt(endog, exog, split=0.5)
compare_t_est(gq, het_gq_greater, decimal=(13, 14))
assert_equal(gq[-1], 'increasing')
harvey_collier = dict(stat=2.28042114041313, df=199,
pvalue=0.02364236161988260, distr='t')
#hc = harvtest(fm, order.by=ggdp , data = list())
harvey_collier_2 = dict(stat=0.7516918462158783, df=199,
pvalue=0.4531244858006127, distr='t')
开发者ID:takluyver,项目名称:statsmodels,代码行数:58,代码来源:test_diagnostic.py
示例2: coint
def coint(y1, y2, regression="c"):
"""
This is a simple cointegration test. Uses unit-root test on residuals to
test for cointegrated relationship
See Hamilton (1994) 19.2
Parameters
----------
y1 : array_like, 1d
first element in cointegrating vector
y2 : array_like
remaining elements in cointegrating vector
c : str {'c'}
Included in regression
* 'c' : Constant
Returns
-------
coint_t : float
t-statistic of unit-root test on residuals
pvalue : float
MacKinnon's approximate p-value based on MacKinnon (1994)
crit_value : dict
Critical values for the test statistic at the 1 %, 5 %, and 10 % levels.
Notes
-----
The Null hypothesis is that there is no cointegration, the alternative
hypothesis is that there is cointegrating relationship. If the pvalue is
small, below a critical size, then we can reject the hypothesis that there
is no cointegrating relationship.
P-values are obtained through regression surface approximation from
MacKinnon 1994.
References
----------
MacKinnon, J.G. 1994. "Approximate asymptotic distribution functions for
unit-root and cointegration tests. `Journal of Business and Economic
Statistics` 12, 167-76.
"""
regression = regression.lower()
if regression not in ['c','nc','ct','ctt']:
raise ValueError("regression option %s not understood") % regression
y1 = np.asarray(y1)
y2 = np.asarray(y2)
if regression == 'c':
y2 = add_constant(y2)
st1_resid = OLS(y1, y2).fit().resid #stage one residuals
lgresid_cons = add_constant(st1_resid[0:-1])
uroot_reg = OLS(st1_resid[1:], lgresid_cons).fit()
coint_t = (uroot_reg.params[0]-1)/uroot_reg.bse[0]
pvalue = mackinnonp(coint_t, regression="c", N=2, lags=None)
crit_value = mackinnoncrit(N=1, regression="c", nobs=len(y1))
return coint_t, pvalue, crit_value
开发者ID:chrisjordansquire,项目名称:statsmodels,代码行数:57,代码来源:stattools.py
示例3: test_hac_simple
def test_hac_simple():
from scikits.statsmodels.datasets import macrodata
d2 = macrodata.load().data
g_gdp = 400*np.diff(np.log(d2['realgdp']))
g_inv = 400*np.diff(np.log(d2['realinv']))
exogg = add_constant(np.c_[g_gdp, d2['realint'][:-1]],prepend=True)
res_olsg = OLS(g_inv, exogg).fit()
#> NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, verbose=TRUE, adjust=TRUE)
#Lag truncation parameter chosen: 4
# (Intercept) ggdp lint
cov1_r = [[ 1.40643899878678802, -0.3180328707083329709, -0.060621111216488610],
[ -0.31803287070833292, 0.1097308348999818661, 0.000395311760301478],
[ -0.06062111121648865, 0.0003953117603014895, 0.087511528912470993]]
#> NeweyWest(fm, lag = 4, prewhite = FALSE, sandwich = TRUE, verbose=TRUE, adjust=FALSE)
#Lag truncation parameter chosen: 4
# (Intercept) ggdp lint
cov2_r = [[ 1.3855512908840137, -0.313309610252268500, -0.059720797683570477],
[ -0.3133096102522685, 0.108101169035130618, 0.000389440793564339],
[ -0.0597207976835705, 0.000389440793564336, 0.086211852740503622]]
cov1, se1 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=True)
cov2, se2 = sw.cov_hac_simple(res_olsg, nlags=4, use_correction=False)
assert_almost_equal(cov1, cov1_r, decimal=14)
assert_almost_equal(cov2, cov2_r, decimal=14)
开发者ID:takluyver,项目名称:statsmodels,代码行数:29,代码来源:test_sandwich.py
示例4: setupClass
def setupClass(cls):
data = longley.load()
data.exog = add_constant(data.exog)
ols_res = OLS(data.endog, data.exog).fit()
gls_res = GLS(data.endog, data.exog).fit()
cls.res1 = gls_res
cls.res2 = ols_res
开发者ID:pombredanne,项目名称:statsmodels,代码行数:7,代码来源:test_regression.py
示例5: pacf_ols
def pacf_ols(x, nlags=40):
'''Calculate partial autocorrelations
Parameters
----------
x : 1d array
observations of time series for which pacf is calculated
nlags : int
Number of lags for which pacf is returned. Lag 0 is not returned.
Returns
-------
pacf : 1d array
partial autocorrelations, maxlag+1 elements
Notes
-----
This solves a separate OLS estimation for each desired lag.
'''
#TODO: add warnings for Yule-Walker
#NOTE: demeaning and not using a constant gave incorrect answers?
#JP: demeaning should have a better estimate of the constant
#maybe we can compare small sample properties with a MonteCarlo
xlags, x0 = lagmat(x, nlags, original='sep')
#xlags = sm.add_constant(lagmat(x, nlags), prepend=True)
xlags = add_constant(xlags, prepend=True)
pacf = [1.]
for k in range(1, nlags+1):
res = OLS(x0[k:], xlags[k:,:k+1]).fit()
#np.take(xlags[k:], range(1,k+1)+[-1],
pacf.append(res.params[-1])
return np.array(pacf)
开发者ID:scottpiraino,项目名称:statsmodels,代码行数:33,代码来源:stattools.py
示例6: test_cov_cluster_2groups
def test_cov_cluster_2groups():
#comparing cluster robust standard errors to Peterson
#requires Petersen's test_data
#http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt
import os
cur_dir = os.path.abspath(os.path.dirname(__file__))
fpath = os.path.join(cur_dir,"test_data.txt")
pet = np.genfromtxt(fpath)
endog = pet[:,-1]
group = pet[:,0].astype(int)
time = pet[:,1].astype(int)
exog = add_constant(pet[:,2], prepend=True)
res = OLS(endog, exog).fit()
cov01, covg, covt = sw.cov_cluster_2groups(res, group, group2=time)
#Reference number from Petersen
#http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.htm
bse_petw = [0.0284, 0.0284]
bse_pet0 = [0.0670, 0.0506]
bse_pet1 = [0.0234, 0.0334] #year
bse_pet01 = [0.0651, 0.0536] #firm and year
bse_0 = sw.se_cov(covg)
bse_1 = sw.se_cov(covt)
bse_01 = sw.se_cov(cov01)
#print res.HC0_se, bse_petw - res.HC0_se
#print bse_0, bse_0 - bse_pet0
#print bse_1, bse_1 - bse_pet1
#print bse_01, bse_01 - bse_pet01
assert_almost_equal(bse_petw, res.HC0_se, decimal=4)
assert_almost_equal(bse_0, bse_pet0, decimal=4)
assert_almost_equal(bse_1, bse_pet1, decimal=4)
assert_almost_equal(bse_01, bse_pet01, decimal=4)
开发者ID:takluyver,项目名称:statsmodels,代码行数:34,代码来源:test_sandwich.py
示例7: test_prefect_pred
def test_prefect_pred():
cur_dir = os.path.dirname(os.path.abspath(__file__))
iris = np.genfromtxt(os.path.join(cur_dir, 'results', 'iris.csv'),
delimiter=",", skip_header=1)
y = iris[:,-1]
X = iris[:,:-1]
X = X[y != 2]
y = y[y != 2]
X = add_constant(X, prepend=True)
glm = GLM(y, X, family=sm.families.Binomial())
assert_raises(PerfectSeparationError, glm.fit)
开发者ID:collinstocks,项目名称:statsmodels,代码行数:11,代码来源:test_glm.py
示例8: setupClass
def setupClass(cls):
from results.results_regression import LongleyGls
data = longley.load()
exog = add_constant(np.column_stack((data.exog[:, 1], data.exog[:, 4])))
tmp_results = OLS(data.endog, exog).fit()
rho = np.corrcoef(tmp_results.resid[1:], tmp_results.resid[:-1])[0][1] # by assumption
order = toeplitz(np.arange(16))
sigma = rho ** order
GLS_results = GLS(data.endog, exog, sigma=sigma).fit()
cls.res1 = GLS_results
cls.res2 = LongleyGls()
开发者ID:katherineranney,项目名称:statsmodels,代码行数:12,代码来源:test_regression.py
示例9: setupClass
def setupClass(cls):
from results.results_glm import Cpunish
from scikits.statsmodels.datasets.cpunish import load
data = load()
data.exog[:, 3] = np.log(data.exog[:, 3])
data.exog = add_constant(data.exog)
exposure = [100] * len(data.endog)
cls.res1 = GLM(data.endog, data.exog, family=sm.families.Poisson(), exposure=exposure).fit()
cls.res1.params[-1] += np.log(100) # add exposure back in to param
# to make the results the same
cls.res2 = Cpunish()
开发者ID:pombredanne,项目名称:statsmodels,代码行数:12,代码来源:test_glm.py
示例10: qqline
def qqline(ax, line, x=None, y=None, dist=None, fmt='r-'):
"""
Plot a reference line for a qqplot.
Parameters
----------
ax : matplotlib axes instance
The axes on which to plot the line
line : str {'45','r','s','q'}
Options for the reference line to which the data is compared.
'45' - 45-degree line
's' - standardized line, the expected order statistics are scaled by the
standard deviation of the given sample and have the mean added to them
'r' - A regression line is fit
'q' - A line is fit through the quartiles.
None - By default no reference line is added to the plot.
x : array
X data for plot. Not needed if line is '45'.
y : array
Y data for plot. Not needed if line is '45'.
dist : scipy.stats.distribution
A scipy.stats distribution, needed if line is 'q'.
Notes
-----
There is no return value. The line is plotted on the given `ax`.
"""
if line == '45':
end_pts = zip(ax.get_xlim(), ax.get_ylim())
end_pts[0] = max(end_pts[0])
end_pts[1] = min(end_pts[1])
ax.plot(end_pts, end_pts, fmt)
return # does this have any side effects?
if x is None and y is None:
raise ValueError("If line is not 45, x and y cannot be None.")
elif line == 'r':
# could use ax.lines[0].get_xdata(), get_ydata(),
# but don't know axes are 'clean'
y = OLS(y, add_constant(x)).fit().fittedvalues
ax.plot(x,y,fmt)
elif line == 's':
m,b = y.std(), y.mean()
ref_line = x*m + b
ax.plot(x, ref_line, fmt)
elif line == 'q':
q25 = stats.scoreatpercentile(y, 25)
q75 = stats.scoreatpercentile(y, 75)
theoretical_quartiles = dist.ppf([.25,.75])
m = (q75 - q25) / np.diff(theoretical_quartiles)
b = q25 - m*theoretical_quartiles[0]
ax.plot(x, m*x + b, fmt)
开发者ID:takluyver,项目名称:statsmodels,代码行数:51,代码来源:gofplots.py
示例11: __init__
def __init__(self):
d = macrodata.load().data
#growth rates
gs_l_realinv = 400 * np.diff(np.log(d['realinv']))
gs_l_realgdp = 400 * np.diff(np.log(d['realgdp']))
lint = d['realint'][:-1]
tbilrate = d['tbilrate'][:-1]
endogg = gs_l_realinv
exogg = add_constant(np.c_[gs_l_realgdp, lint], prepend=True)
exogg2 = add_constant(np.c_[gs_l_realgdp, tbilrate], prepend=True)
exogg3 = add_constant(np.c_[gs_l_realgdp], prepend=True)
res_ols = OLS(endogg, exogg).fit()
res_ols2 = OLS(endogg, exogg2).fit()
res_ols3 = OLS(endogg, exogg3).fit()
self.res = res_ols
self.res2 = res_ols2
self.res3 = res_ols3
self.endog = self.res.model.endog
self.exog = self.res.model.exog
开发者ID:takluyver,项目名称:statsmodels,代码行数:23,代码来源:test_diagnostic.py
示例12: __init__
def __init__(self):
'''
Tests Poisson family with canonical log link.
Test results were obtained by R.
'''
from results.results_glm import Cpunish
from scikits.statsmodels.datasets.cpunish import load
self.data = load()
self.data.exog[:,3] = np.log(self.data.exog[:,3])
self.data.exog = add_constant(self.data.exog)
self.res1 = GLM(self.data.endog, self.data.exog,
family=sm.families.Poisson()).fit()
self.res2 = Cpunish()
开发者ID:chrisjordansquire,项目名称:statsmodels,代码行数:14,代码来源:test_glm.py
示例13: test_add_constant_1d
def test_add_constant_1d(self):
x = np.arange(1,5)
x = tools.add_constant(x, prepend=True)
y = np.asarray([[1,1,1,1],[1,2,3,4.]]).T
assert_equal(x, y)
开发者ID:pombredanne,项目名称:statsmodels,代码行数:5,代码来源:test_tools.py
示例14: test_add_constant_has_constant1d
def test_add_constant_has_constant1d(self):
x = np.ones(5)
x = tools.add_constant(x)
assert_equal(x, np.ones(5))
开发者ID:pombredanne,项目名称:statsmodels,代码行数:4,代码来源:test_tools.py
示例15: add_trend
def add_trend(X, trend="c", prepend=False):
"""
Adds a trend and/or constant to an array.
Parameters
----------
X : array-like
Original array of data.
trend : str {"c","t","ct","ctt"}
"c" add constant only
"t" add trend only
"ct" add constant and linear trend
"ctt" add constant and linear and quadratic trend.
prepend : bool
If True, prepends the new data to the columns of X.
Notes
-----
Returns columns as ["ctt","ct","c"] whenever applicable. There is currently
no checking for an existing constant or trend.
See also
--------
scikits.statsmodels.add_constant
"""
#TODO: could be generalized for trend of aribitrary order
trend = trend.lower()
if trend == "c": # handles structured arrays
return add_constant(X, prepend=prepend)
elif trend == "ct" or trend == "t":
trendorder = 1
elif trend == "ctt":
trendorder = 2
else:
raise ValueError("trend %s not understood" % trend)
X = np.asanyarray(X)
nobs = len(X)
trendarr = np.vander(np.arange(1,nobs+1, dtype=float), trendorder+1)
# put in order ctt
trendarr = np.fliplr(trendarr)
if trend == "t":
trendarr = trendarr[:,1]
if not X.dtype.names:
if not prepend:
X = np.column_stack((X, trendarr))
else:
X = np.column_stack((trendarr, X))
else:
return_rec = data.__clas__ is np.recarray
if trendorder == 1:
if trend == "ct":
dt = [('const',float),('trend',float)]
else:
dt = [('trend', float)]
elif trendorder == 2:
dt = [('const',float),('trend',float),('trend_squared', float)]
trendarr = trendarr.view(dt)
if prepend:
X = nprf.append_fields(trendarr, X.dtype.names, [X[i] for i
in data.dtype.names], usemask=False, asrecarray=return_rec)
else:
X = nprf.append_fields(X, trendarr.dtype.names, [trendarr[i] for i
in trendarr.dtype.names], usemask=false, asrecarray=return_rec)
return X
开发者ID:chrisjordansquire,项目名称:statsmodels,代码行数:64,代码来源:tsatools.py
示例16: add_constant
nsample = 300 #different pattern last graph with 100 or 200 or 500
sig = 0.5
np.random.seed(9876789) #9876543)
X = np.random.randn(nsample, 3)
X = np.column_stack((np.ones((nsample,1)), X))
beta = [1, 0.5, -0.5, 1.]
y_true2 = np.dot(X, beta)
x1 = np.linspace(0, 1, nsample)
gamma = np.array([1, 3.])
#with slope 3 instead of two, I get negative weights, Not correct
# - was misspecified, but the negative weights are still possible with identity link
#gamma /= gamma.sum() #normalize assuming x1.max is 1
z_true = add_constant(x1, prepend=True)
winv = np.dot(z_true, gamma)
het_params = sig**2 * np.array([1, 3.]) # for squared
sig2_het = sig**2 * winv
weights_dgp = 1/winv
weights_dgp /= weights_dgp.max() #should be already normalized - NOT check normalization
#y2[:nsample*6/10] = y_true2[:nsample*6/10] + sig*1. * np.random.normal(size=nsample*6/10)
z0 = np.zeros(nsample)
z0[(nsample * 5)//10:] = 1 #dummy for 2 halfs of sample
z0 = add_constant(z0, prepend=True)
z1 = add_constant(x1, prepend=True)
noise = np.sqrt(sig2_het) * np.random.normal(size=nsample)
开发者ID:takluyver,项目名称:statsmodels,代码行数:31,代码来源:ex_feasible_gls_het_0.py
示例17: test_add_constant_has_constant2d
def test_add_constant_has_constant2d(self):
x = np.asarray([[1,1,1,1],[1,2,3,4.]])
y = tools.add_constant(x)
assert_equal(x,y)
开发者ID:pombredanne,项目名称:statsmodels,代码行数:4,代码来源:test_tools.py
示例18: grangercausalitytests
def grangercausalitytests(x, maxlag, addconst=True, verbose=True):
'''four tests for granger causality of 2 timeseries
all four tests give similar results
`params_ftest` and `ssr_ftest` are equivalent based of F test which is identical to
lmtest:grangertest in R
Parameters
----------
x : array, 2d, (nobs,2)
data for test whether the time series in the second column Granger
causes the time series in the first column
maxlag : integer
the Granger causality test results are calculated for all lags up to
maxlag
verbose : bool
print results if true
Returns
-------
results : dictionary
all test results, dictionary keys are the number of lags. For each
lag the values are a tuple, with the first element a dictionary with
teststatistic, pvalues, degrees of freedom, the second element are
the OLS estimation results for the restricted model, the unrestricted
model and the restriction (contrast) matrix for the parameter f_test.
Notes
-----
TODO: convert to class and attach results properly
'params_ftest', 'ssr_ftest' are based on F test
'ssr_chi2test', 'lrtest' are based on chi-square test
'''
from scipy import stats # lazy import
resli = {}
for mlg in range(1, maxlag+1):
result = {}
if verbose:
print '\nGranger Causality'
print 'number of lags (no zero)', mlg
mxlg = mlg #+ 1 # Note number of lags starting at zero in lagmat
# create lagmat of both time series
dta = lagmat2ds(x, mxlg, trim='both', dropex=1)
#add constant
if addconst:
dtaown = add_constant(dta[:,1:mxlg+1])
dtajoint = add_constant(dta[:,1:])
else:
raise ValueError('Not Implemented')
dtaown = dta[:,1:mxlg]
dtajoint = dta[:,1:]
#run ols on both models without and with lags of second variable
res2down = OLS(dta[:,0], dtaown).fit()
res2djoint = OLS(dta[:,0], dtajoint).fit()
#print results
#for ssr based tests see: http://support.sas.com/rnd/app/examples/ets/granger/index.htm
#the other tests are made-up
# Granger Causality test using ssr (F statistic)
fgc1 = (res2down.ssr-res2djoint.ssr)/res2djoint.ssr/(mxlg)*res2djoint.df_resid
if verbose:
print 'ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d, df_num=%d' % \
(fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg)
result['ssr_ftest'] = (fgc1, stats.f.sf(fgc1, mxlg, res2djoint.df_resid), res2djoint.df_resid, mxlg)
# Granger Causality test using ssr (ch2 statistic)
fgc2 = res2down.nobs*(res2down.ssr-res2djoint.ssr)/res2djoint.ssr
if verbose:
print 'ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, df=%d' % \
(fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)
result['ssr_chi2test'] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg)
#likelihood ratio test pvalue:
lr = -2*(res2down.llf-res2djoint.llf)
if verbose:
print 'likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d' % \
(lr, stats.chi2.sf(lr, mxlg), mxlg)
result['lrtest'] = (lr, stats.chi2.sf(lr, mxlg), mxlg)
# F test that all lag coefficients of exog are zero
rconstr = np.column_stack((np.zeros((mxlg-1,mxlg-1)), np.eye(mxlg-1, mxlg-1),\
np.zeros((mxlg-1, 1))))
rconstr = np.column_stack((np.zeros((mxlg,mxlg)), np.eye(mxlg, mxlg),\
np.zeros((mxlg, 1))))
ftres = res2djoint.f_test(rconstr)
if verbose:
print 'parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d, df_num=%d' % \
(ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num)
result['params_ftest'] = (np.squeeze(ftres.fvalue)[()],
np.squeeze(ftres.pvalue)[()],
ftres.df_denom, ftres.df_num)
#.........这里部分代码省略.........
开发者ID:scottpiraino,项目名称:statsmodels,代码行数:101,代码来源:stattools.py
示例19: str
if returns == 'text':
return str(part1) + '\n' + str(part2) + '\n' + str(part3L)
elif returns == 'tables':
return [part1, part2 ,part3L]
elif returns == 'csv':
return part1.as_csv() + '\n' + part2.as_csv() + '\n' + \
part3L.as_csv()
elif returns == 'latex':
print('not available yet')
elif returns == 'html':
print('not available yet')
if __name__ == "__main__":
import scikits.statsmodels.api as sm
data = sm.datasets.longley.load()
data.exog = add_constant(data.exog)
ols_results = OLS(data.endog, data.exog).results
gls_results = GLS(data.endog, data.exog).results
print(ols_results.summary())
tables = ols_results.summary(returns='tables')
csv = ols_results.summary(returns='csv')
"""
Summary of Regression Results
=======================================
| Dependent Variable: ['y']|
| Model: OLS|
| Method: Least Squares|
| Date: Tue, 29 Jun 2010|
| Time: 22:32:21|
| # obs: 16.0|
| Df residuals: 9.0|
开发者ID:chrisjordansquire,项目名称:statsmodels,代码行数:31,代码来源:linear_model.py
示例20: test_all
def test_all(self):
d = macrodata.load().data
#import datasetswsm.greene as g
#d = g.load('5-1')
#growth rates
gs_l_realinv = 400 * np.diff(np.log(d['realinv']))
gs_l_realgdp = 400 * np.diff(np.log(d['realgdp']))
#simple diff, not growthrate, I want heteroscedasticity later for testing
endogd = np.diff(d['realinv'])
exogd = add_constant(np.c_[np.diff(d['realgdp']), d['realint'][:-1]],
prepend=True)
endogg = gs_l_realinv
exogg = add_constant(np.c_[gs_l_realgdp, d['realint'][:-1]],prepend=True)
res_ols = OLS(endogg, exogg).fit()
#print res_ols.params
mod_g1 = GLSAR(endogg, exogg, rho=-0.108136)
res_g1 = mod_g1.fit()
#print res_g1.params
mod_g2 = GLSAR(endogg, exogg, rho=-0.108136) #-0.1335859) from R
res_g2 = mod_g2.iterative_fit(maxiter=5)
#print res_g2.params
rho = -0.108136
# coefficient std. error t-ratio p-value 95% CONFIDENCE INTERVAL
partable = np.array([
[-9.50990, 0.990456, -9.602, 3.65e-018, -11.4631, -7.55670], # ***
[ 4.37040, 0.208146, 21.00, 2.93e-052, 3.95993, 4.78086], # ***
[-0.579253, 0.268009, -2.161, 0.0319, -1.10777, -0.0507346]]) # **
#Statistics based on the rho-differenced data:
result_gretl_g1 = dict(
endog_mean = ("Mean dependent var", 3.113973),
endog_std = ("S.D. dependent var", 18.67447),
ssr = ("Sum squared resid", 22530.90),
mse_resid_sqrt = ("S.E. of regression", 10.66735),
rsquared = ("R-squared", 0.676973),
rsquared_adj = ("Adjusted R-squared", 0.673710),
fvalue = ("F(2, 198)", 221.0475),
f_pvalue = ("P-value(F)", 3.56e-51),
resid_acf1 = ("rho", -0.003481),
dw = ("Durbin-Watson", 1.993858))
#fstatistic, p-value, df1, df2
reset_2_3 = [5.219019, 0.00619, 2, 197, "f"]
reset_2 = [7.268492, 0.00762, 1, 198, "f"]
reset_3 = [5.248951, 0.023, 1, 198, "f"]
#LM-statistic, p-value, df
arch_4 = [7.30776, 0.120491, 4, "chi2"]
#multicollinearity
vif = [1.002, 1.002]
cond_1norm = 6862.0664
determinant = 1.0296049e+009
reciprocal_condition_number = 0.013819244
#Chi-square(2): test-statistic, pvalue, df
normality = [20.2792, 3.94837e-005, 2]
#tests
res = res_g1 #with rho from Gretl
#basic
assert_almost_equal(res.params, partable[:,0], 4)
assert_almost_equal(res.bse, partable[:,1], 6)
assert_almost_equal(res.tvalues, partable[:,2], 2)
assert_almost_equal(res.ssr, result_gretl_g1['ssr'][1], decimal=2)
#assert_almost_equal(res.llf, result_gretl_g1['llf'][1], decimal=7) #not in gretl
#assert_almost_equal(res.rsquared, result_gretl_g1['rsquared'][1], decimal=7) #FAIL
#assert_almost_equal(res.rsquared_adj, result_gretl_g1['rsquared_adj'][1], decimal=7) #FAIL
assert_almost_equal(np.sqrt(res.mse_resid), result_gretl_g1['mse_resid_sqrt'][1], decimal=5)
assert_almost_equal(res.fvalue, result_gretl_g1['fvalue'][1], decimal=4)
assert_approx_equal(res.f_pvalue, result_gretl_g1['f_pvalue'][1], significant=2)
#assert_almost_equal(res.durbin_watson, result_gretl_g1['dw'][1], decimal=7) #TODO
#arch
#sm_arch = smsdia.acorr_lm(res.wresid**2, maxlag=4, autolag=None)
sm_arch = smsdia.het_arch(res.wresid, maxlag=4)
assert_almost_equal(sm_arch[0], arch_4[0], decimal=4)
assert_almost_equal(sm_arch[1], arch_4[1], decimal=6)
#tests
res = res_g2 #with estimated rho
#estimated lag coefficient
assert_almost_equal(res.model.rho, rho, decimal=3)
#basic
#.........这里部分代码省略.........
开发者ID:takluyver,项目名称:statsmodels,代码行数:101,代码来源:test_glsar_gretl.py
注:本文中的scikits.statsmodels.tools.tools.add_constant函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论