def checkdb(df,col):
"It also tells whether the Data is serially correlated or not "
r = k.durbin_watson(df[col], axis=0)
if(r==0):
print "Not Serially correlated "
return False,r
else:
print "Serially correlated "
return True,r
#.........这里部分代码省略.........
res = scipy.optimize.leastsq(error, pguess, args=(Xdata, Ydata, Errdata, dict_data), full_output=1)
(popt, pcov, infodict, errmsg, ier) = res
perr=scipy.sqrt(scipy.diag(pcov))
M=len(Ydata)
N=len(popt)
#Residuals
Y=f(Xdata,popt,dict_data)
residuals=(Y-Ydata)/Errdata
meanY=scipy.mean(Ydata)
squares=(Y-meanY)/Errdata
squaresT=(Ydata-meanY)/Errdata
SSM=sum(squares**2) #Corrected Sum of Squares
SSE=sum(residuals**2) #Sum of Squares of Errors
SST=sum(squaresT**2) #Total corrected sum of squares
DFM=N-1 #Degrees of freedom for model
DFE=M-N #Degrees of freedom for error
DFT=M-1 #Degrees of freedom total
MSM=SSM/DFM #Mean squares for model (explained variance)
MSE=SSE/DFE #Mean squares for Error (should be small wrt MSM) Unexplained Variance
MST=SST/DFT #Mean squares for total
R2=SSM/SST #proportion of explained variance
R2_adj=1-(1-R2)*(M-1)/(M-N-1) #Adjusted R2
#t-test to see if parameters are different from zero
t_stat=popt/perr #t-statistic for popt different from zero'
t_stat=t_stat.real
p_p=1.0-scipy.stats.t.cdf(t_stat,DFE) #should be low for good fit.
z=scipy.stats.t(M-N).ppf(0.95)
p95=perr*z
#Chisquared Analysis on Residuals
chisquared=sum(residuals**2)
degfreedom=M-N
chisquared_red=chisquared/degfreedom
p_chi2=1.0-scipy.stats.chi2.cdf(chisquared,degfreedom)
stderr_reg=scipy.sqrt(chisquared_red)
chisquare=(p_chi2,chisquared, chisquared_red, degfreedom,R2,R2_adj)
#Analysis of Residuals
w,p_shapiro=scipy.stats.shapiro(residuals)
mean_res=scipy.mean(residuals)
stddev_res=scipy.sqrt(scipy.var(residuals))
t_res=mean_res/stddev_res #t-statistic to test that mean_res is zero.
p_res=1.0-scipy.stats.t.cdf(t_res,M-1)
#if p_res <0.05, null hypothesis rejected and mean is non-zero.
#Should be high for good fit.
#F-test on residuals
F=MSM/MSE #explained variance/unexplained . Should be large
p_F=1.0-scipy.stats.f.cdf(F,DFM,DFE)
#if p_F <0.05n, null-hypothesis is rejected
#i.e. R^2>0 and at least one of the fitting parameters >0.
dw=stools.durbin_watson(residuals)
resanal=(p_shapiro,w,mean_res,F,p_F,dw)
if ax:
formataxis(ax)
ax.plot(Ydata,Y,'ro')
ax.errorbar(Ydata,Y,yerr=Errdata,fmt='.')
Ymin,Ymax=min((min(Y),min(Ydata))),max((max(Y),max(Ydata)))
ax.plot([Ymin,Ymax],[Ymin,Ymax],'b')
ax.xaxis.label.set_text('Data')
ax.yaxis.label.set_text('Fitted')
sigmay,avg_stddev_data=get_stderr_fit(f,Xdata,popt,pcov,dict_data)
Yplus=Y+sigmay
Yminus=Y-sigmay
ax.plot(Y,Yplus,'c',alpha=0.6,linestyle='--',linewidth=0.5)
ax.plot(Y,Yminus,'c',alpha=0.6,linestyle='--',linewidth=0.5)
ax.fill_between(Y,Yminus,Yplus,facecolor='cyan',alpha=0.5)
titletext='Parity plot for fit.\n'
titletext+=r'$r^2$=%5.2f, $r^2_{adj}$=%5.2f, '
titletext +='$\sigma_{exp}$=%5.2f,$\chi^2_{\nu}$=%5.2f, $p_{\chi^2}$=%5.2f, '
titletext +='$\sigma_{err}^{reg}$=%5.2f'
ax.title.set_text(titletext%(R2,R2_adj, avg_stddev_data, chisquared_red, p_chi2, stderr_reg))
ax.figure.canvas.draw()
if ax2:#Test for homoscedasticity
formataxis(ax2)
ax2.plot(Y,residuals,'ro')
ax2.xaxis.label.set_text('Fitted Data')
ax2.yaxis.label.set_text('Residuals')
titletext='Analysis of Residuals\n'
titletext+=r'mean=%5.2f, $p_{res}$=%5.2f, $p_{shapiro}$=%5.2f, $Durbin-Watson$=%2.1f'
titletext+='\nF=%5.2f, $p_F$=%3.2e'
ax2.title.set_text(titletext%(mean_res, p_res, p_shapiro, dw , F, p_F))
ax2.figure.canvas.draw()
return popt,pcov,perr,p95,p_p,chisquare,resanal
def fitdata(f,Xdata,Ydata,Errdata,pguess,ax=False,ax2=False):
'''
fitdata(f,Xdata,Ydata,Errdata,pguess):
'''
def error(p,Xdata,Ydata,Errdata):
Y=f(Xdata,p)
residuals=(Y-Ydata)/Errdata
return residuals
res=scipy.optimize.leastsq(error,pguess,args=(Xdata,Ydata,Errdata),full_output=1)
(popt,pcov,infodict,errmsg,ier)=res #optimize p
perr=scipy.sqrt(scipy.diag(pcov)) #vector of sd of p
M=len(Ydata)
N=len(popt)
#Residuals
Y=f(Xdata,popt)
residuals=(Y-Ydata)/Errdata
meanY=scipy.mean(Ydata)
squares=(Y-meanY)/Errdata
squaresT=(Ydata-meanY)/Errdata
SSM=sum(squares**2) #corrected sum of squares
SSE=sum(residuals**2) #sum of squares of errors
SST=sum(squaresT**2) #total corrected sum of squares
DFM=N-1 #for model
DFE=M-N #for error
DFT=M-1 #total
MSM=SSM/DFM #mean squares for model(explained variance)
MSE=SSE/DFE #mean squares for errors(should be small wrt MSM) unexplained variance
MST=SST/DFT #mean squares for total
R2=SSM/SST #proportion of explained variance
R2_adj=1-(1-R2)*(M-1)/(M-N-1) #adjusted R2
#ttest to see if parameters are different from zero
t_stat=popt/perr #tstatistic for popt different from zero
t_stat=t_stat.real
p_p=1.0-scipy.stats.t.cdf(t_stat,DFE) #should be low for good fit
z=scipy.stats.t(M-N).ppf(0.95)
p95=perr*z
#Chisquared ananlysis on residuals
chisquared=sum(residuals**2)
degfreedom=M-N
chisquared_red=chisquared/degfreedom
p_chi2=1.0-scipy.stats.chi2.cdf(chisquared,degfreedom)
stderr_reg=scipy.sqrt(chisquared_red)
chisquare=(p_chi2,chisquared,chisquared_red,degfreedom,R2,R2_adj)
#Analysis of residuals
w,p_shapiro=scipy.stats.shapiro(residuals) # to check if residuals are normally distributed
mean_res=scipy.mean(residuals)
stddev_res=scipy.sqrt(scipy.var(residuals))
t_res=mean_res/stddev_res
p_res=1.0-scipy.stats.t.cdf(t_res,M-1)
#if p_res<0.05,null hypothesis is rejected.
#R^2>0 and at least one of the fitting parameters>0
#F-test on residuals
F=MSM/MSE
p_F=1.0-scipy.stats.f.cdf(F,DFM,DFE)
dw=stools.durbin_watson(residuals) #to check if they are correlated
resanal=(p_shapiro,w,mean_res,p_res,F,p_F,dw)
if ax:
formataxis(ax)
ax.plot(Ydata,Y,'ro')
ax.errorbar(Ydata,Y,yerr=Errdata,fmt='.')
Ymin,Ymax=min((min(Y),min(Ydata))),max((max(Y),max(Ydata)))
ax.plot([Ymin,Ymax],[Ymin,Ymax],'b')
ax.xaxis.label.set_text('Data')
ax.yaxis.label.set_text('Fitted')
sigmay,avg_stddev_data=get_stderr_fit(f, Xdata, popt, pcov)
Yplus=Y+sigmay
Yminus=Y-sigmay
ax.plot(Y,Yplus,'c',alpha=.6,linestyle='--',linewidth=.5)
ax.plot(Y,Yminus,'c',alpha=.6,linestyle='--',linewidth=.5)
ax.fill_between(Y,Yminus,Yplus,facecolor='cyan',alpha=.5)
titletext='Parity plot for fit.\n'
titletext+=r'$r^2$=%5.2f,$r^2_{adj}$=%5.2f, '
titletext+='$\sigma_{exp}$=%5.2f,$\chi^2_{\nu}$=%5.2f,$p_{\chi^2}$=%5.2f, '
titletext+='$\sigma_{err}^{reg}$=%5.2f'
ax.title.set_text(titletext%(R2,R2_adj,avg_stddev_data,chisquared_red,p_chi2,stderr_reg))
ax.figure.canvas.draw()
if ax2:#test for homoscedasticity
formataxis(ax2)
ax2.plot(Y,residuals,'ro')
ax2.xaxis.label.set_text('Fitted data')
ax2.yaxis.label.set_text('Residuals')
titletext='Analysis of residuals\n'
titletext+=r'mean=%5.2f,$p_{res}$=%5.2f,$p_{shapiro}$=%5.2f,$Durbin-Watson$=%2.1f'
titletext+='\n F=%5.2f,$p_F$=%3.2e'
ax2.title.set_text(titletext%(mean_res,p_res,p_shapiro,dw,F,p_F))
ax2.figure.canvas.draw()
#.........这里部分代码省略.........
def fitdata(f,XData,YData,ErrData,pguess,ax=False,ax2=False):
def error(p,XData,YData,ErrData):
Y=f(XData,p)
residuals=(Y-YData)/ErrData
return residuals
res=scipy.optimize.leastsq(error,pguess,args=(XData,YData,ErrData),full_output=1)
(popt,pcov,infodict,errmsg,ier)=res
perr=scipy.sqrt(scipy.diag(pcov))
M=len(YData)
N=len(popt)
#residuals
Y=f(XData,popt)
residuals=(Y-YData)/ErrData
meanY=scipy.mean(YData)
squares=(Y-meanY)/ErrData
squaresT=(YData-meanY)/ErrData
SSM=sum(squares**2) #Corrected Sum of Squares
SSE=sum(residuals**2)#Sum of Squares of Errors
SST=sum(squaresT**2)#Totaal corrected sum of squares
DFM=N-1 #degrees of freedom for model
DFE=M-N #degrees of freedom for error
DFT=M-1 #degrees of freedom total
MSM=SSM/DFM #Mean squares for model (explained variance)
MSE=SSE/DFE #Mean squares for error
MST=SST/DFT #Mean squares for total
R2=SSM/SST #proportion of explained varience
R2_adj=1-(1-R2)*(M-1)/(M-N-1)#Adjusted R2
#t-test to see if parameters are different from zero
t_stat=popt/perr #t-statistic for popt different from zero
t_stat=t_stat.real
p_p=1.0-scipy.stats.t.cdf(t_stat,DFE) #should be low for good fit.
z=scipy.stats.t(M-N).ppf(0.95)
p95=perr*z
#chisquared analysis on residuals
chisquared=sum(residuals**2)
degfreedom=M-N
chisquared_red=chisquared/degfreedom
p_chi2=1.0-scipy.stats.chi2.cdf(chisquared,degfreedom)
stderr_reg=scipy.sqrt(chisquared_red)
chisquare=(p_chi2,chisquared_red,degfreedom,R2,R2_adj)
#Analysis of Residuals
w,p_shapiro=scipy.stats.shapiro(residuals)
mean_res=scipy.mean(residuals) #mean of all residuals
stddev_res=scipy.sqrt(scipy.var(residuals)) #standard deviation of all residuals
t_res=mean_res/stddev_res #t-statistic to test that was mean_res is zero.
p_res=1.0-scipy.stats.t.cdf(t_res,M-1)
#if p_res<0.05,null-hypothesis is rejected and mean is non-zero
#should be high for good fit.
# F-test on residuals
F=MSM/MSE #explained/un-explained. Should be large
p_F=1.0-scipy.stats.f.cdf(F,DFM,DFE)
#if p_F<0.05,null hypothesis is rejected
#i.e. R^2>0 and at least one of the fitting parameters >0.
dw=stools.durbin_watson(residuals)
resanal=(p_shapiro,w,mean_res,p_res,F,p_F,dw)
if ax:
formataxis(ax)
ax.plot(YData,Y,'ro')
ax.errorbar(YData,Y,yerr=ErrData,fmt='.')
Ymin,Ymax=min((min(Y),min(YData))), max((max(Y),max(YData)))
ax.plot([Ymin,Ymax],[Ymin,Ymax],'b')
ax.xaxis.label.set_text('Data')
ax.yaxis.label.set_text('Fitted')
sigmaY,avg_stddev_data=get_stderr_fit(f,XData,popt,pcov)
Yplus=Y+sigmaY
Yminus=Y-sigmaY
ax.plot(Y,Yplus,'C',alpha=0.6,linestyle='--',linewidth=0.5)
ax.plot(Y,Yminus,'c',alpha=0.6,linestyle='--',linewidth=0.5)
ax.fill_between(Y,Yminus,Yplus,facecolor='cyan',alpha=0.5)
titletext='Parity plot for fit.\n'
titletext += r'$r^2$ = %5.2f, $r^2_(adj)$= %5.2f'
titletext +='$\sigma_{exp}$ = %5.2f, $\chi^2_{\nu}$=%5.2f, $p_{\chi^2}$=%5.2f,'
titletext +='$\sigma_{ree}^{reg}$ = %5.2f'
ax.title.set_text(titletext%(R2,R2_adj,avg_stddev_data,chisquared_red,p_chi2,stderr_reg))
ax.figure.canvas.draw()
if ax2: #test for homoscedasticity
formataxis(ax2)
ax2.plot(Y,residuals,'ro')
ax2.xaxis.label.set_text('Fitted Data')
ax2.yaxis.label.set_text('Residuals')
titletext = 'Analysis of residuals\n'
titletext +=r'mean= %5.2f, $p_{res}$= %5.2f, $p_{shapiro}$=%5.2f , $Durbin-watson$=%2.1f'
titletext +='\n F=%5.2f,$p_F$ = %3.2e'
ax2.title.set_text(titletext%(mean_res,p_res,p_shapiro,dw,F,p_F))
#.........这里部分代码省略.........
def fitdata(f, Xdata,Ydata,Errdata, pguess,dict_data, ax=False, ax2=False):
def error(p,Xdata,Ydata,Errdata,dict_data):
Y=f(Xdata,p,dic_data)
residuals=(Y-Ydata)/Errdata
return residuals
res=scipy.optimize.leastsq(error,pguess,args=(Xdata,Ydata,Errdata,dict_data),full_output=1)
(popt,pcov,infodict,errmsg,ier)=res
perr=scipy.sqrt(scipy.diag(pcov))
M=len(Ydata)
N=len(popt)
#Residuals
Y=f(Xdata,popt,dict_data)
residuals=(Y-Ydata)/Errdata
meanY=scipy.mean(Ydata)
squares=(Y-meanY)/Errdata
squaresT=(Ydata-meanY)/Errdata
SSM=sum(squares**2) #Corrected Sum of Squares
SSE=sum(residuals**2) #Sum of Squares of Errors
SST=sum(squaresT**2)#Total Corrected sum of Squares
DFM=N-1 #Degree of Freedom for model
DFE=M-N #Degree of Freedom for error
DFT=M-1 #Degree of freedom total
MSM=SSM/DFM #Mean Squares for model(explained Variance)
MSE=SSE/DFE #Mean Squares for Error(should be small wrt MSM) unexplained Variance
MST=SST/DFT #Mean squares for total
R2=SSM/SST #proportion of unexplained variance
R2_adj= 1-(1-R2)*(M-1)/(M-N-1) #Adjusted R2
#t-test to see if parameters are different from zero
t_stat=popt/perr #t-stat for popt different from zero
t_stat=t_stat.real
p_p= 1.0-scipy.stats.t.cdf(t_stat,DFE) #should be low for good fit
z=scipy.stats.t(M-N).ppf(0.95)
p95=perr*z
#Chi-Squared Analysis on Residuals
chisquared=sum(residuals**2)
degfreedom=M-N
chisquared_red=chisquared/degfreedom
p_chi2=1.0-scipy.stats.chi2.cdf(chisquared, degfreedom)
stderr_reg=scipy.sqrt(chisquared_red)
chisquare=(p_chi2,chisquared,chisquared_red,degfreedom,R2,R2_adj)
#Analysis of Residuals
w, p_shapiro=scipy.stats.shapiro(residuals)
mean_res=scipy.mean(residuals)
stddev_res=scipy.sqrt(scipy.var(residuals))
t_res=mean_res/stddev_res #t-statistics
p_res=1.0-scipy.stats.cdf(t_res,M-1)
F=MSM/MSE
p_F=1.0-scipy.stats.f.cdf(F,DFM,DFE)
dw=stools.durbin_watson(residuals)
resanal=(p_shapiro,w,mean_res,p_res,F,p_F,dw)
if ax:
formataxis(ax)
ax.plot(Ydata,Y,'ro')
ax.errorbar(Ydata,Y,yerr=Errdata, fmt='.')
Ymin,Ymax=min((min(Y),min(Ydata))),max((max(Y),max(Ydata)))
ax.plot([Ymin,Ymax],[Ymin,Ymax],'b')
ax.xaxis.label.set_text('Data')
ax.yaxis.label.set_text('Fitted')
sigmay,avg_stddev_data=get_stderr_fit(f,xdata,popt,pcov,dict_data)
Yplus=Y+sigmay
Yminus=Y-sigmay
ax.plot(Y,Yplus,'c',alpha=0.6,linestyle='--',linewidth=0.5)
ax.plot(Y,Yminus,'c',alpha=0.6,linestyle='==',linewidth=0.5)
ax.fill_between(Y,Yminus,Yplus,facecolor='cyan',alpha=0.5)
titletext='Parity plot for fit.\n'
titletext+=r'$r^2$=%5.2f,$r^2_{adj}$=%5.2f,$p_{shapiro}$=%5.2f,$Durbin-Watson=%2.1f'
titletext+='\n F=%5.2f,$p_F$=%3.2e'
titletext+='$\sigma_{err}^{reg}$=%5.2f'
ax.title.set.text(titletext%(R2,R2_adj,avg_stddev_data,chisquared_red,p_chi2,stderr_reg))
ax.figure.canvas.draw()
if ax2:
formataxis(ax2)
ax2.plot(Y,residuals,'ro')
ax2.xaxis.label.set_text('Fitted Data')
ax2.yaxis.label.set_text('Residuals')
titletext='Analysis of Residuals\n'
titletext+=r'mean=%5.2f,$p_{res}$=%5.2f,$p_{shapiro}$=%5.2f,$Durbin-Watson$=%2.1f'
titletext+='\n F=%5.2f,$p_F$=%3.2e'
ax2.title.set_text(titletext%(mean_res,p_res,p_shapiro,dw,F,p_F))
return popt,pcov,perr, p95, p_p,chisquare, resanal
def fitdata(f, Xdata,Ydata,Errdata, pguess, ax=False, ax2=False):
'''
popt = vector of length N of the optimized parameters
pcov = Covariance matrix of the fit
perr = vector of length N of the std-dev of the optimized parameters
p95 = half width of the 95% confidence interval for each parameter
p_p = vector of length N of the p-value for the parameters being zero
(if p<0.05, null hypothesis rejected and parameter is non-zero)
chisquared = chisquared value for the fit
chisquared_red = chisquared/degfreedom
chisquare = (p, chisquared, chisquared_red, degfreedom)
p = Probability of finding a chisquared value at least as extreme as the one shown
chisquared_red = chisquared/degfreedom. value should be approx. 1 for a good fit.
R2 = correlation coefficient or proportion of explained variance
R2_adj = adjusted R2 taking into account number of predictors
resanal = (p, w, mean, stddev)
Analysis of residuals
p = Probability of finding a w at least as extreme as the one observed (should be high for good fit)
w = Shapiro-Wilk test criterion
mean = mean of residuals
p_res = probability that the mean value obtained is different from zero merely by chance
F = F-statistic for the fit msm/msE.
Null hypothesis is that there is NO Difference between the two variances.
p_F = probability that this value of F can arise by chance alone.
p_F < 0.05 to reject null hypothesis and prove that the fit is good.
dw = Durbin_Watson statistic (value between 0 and 4).
2 = no-autocorrelation. 0 = .ve autocorrelation, 4 = -ve autocorrelation.
'''
def error(p,Xdata,Ydata,Errdata):
Y=f(Xdata,p)
residuals=(Y-Ydata)/Errdata
return residuals
res=scipy.optimize.leastsq(error,pguess,args=(Xdata,Ydata,Errdata),full_output=1)
(popt,pcov,infodict,errmsg,ier)=res
perr=scipy.sqrt(scipy.diag(pcov))
M=len(Ydata)
N=len(popt)
#Residuals
Y=f(Xdata,popt)
residuals=(Y-Ydata)/Errdata
meanY=scipy.mean(Ydata)
squares=(Y-meanY)/Errdata
squaresT=(Ydata-meanY)/Errdata
SSM=sum(squares**2) #Corrected Sum of Squares
SSE=sum(residuals**2) #Sum of Squares of Errors
SST=sum(squaresT**2)#Total Corrected sum of Squares
DFM=N-1 #Degree of Freedom for model
DFE=M-N #Degree of Freedom for error
DFT=M-1 #Degree of freedom total
MSM=SSM/DFM #Mean Squares for model(explained Variance)
MSE=SSE/DFE #Mean Squares for Error(should be small wrt MSM) unexplained Variance
MST=SST/DFT #Mean squares for total
R2=SSM/SST #proportion of unexplained variance
R2_adj= 1-(1-R2)*(M-1)/(M-N-1) #Adjusted R2
#t-test to see if parameters are different from zero
t_stat=popt/perr #t-stat for popt different from zero
t_stat=t_stat.real
p_p= 1.0-scipy.stats.t.cdf(t_stat,DFE) #should be low for good fit
z=scipy.stats.t(M-N).ppf(0.95)
p95=perr*z
#Chi-Squared Analysis on Residuals
chisquared=sum(residuals**2)
degfreedom=M-N
chisquared_red=chisquared/degfreedom
p_chi2=1.0-scipy.stats.chi2.cdf(chisquared, degfreedom)
stderr_reg=scipy.sqrt(chisquared_red)
chisquare=(p_chi2,chisquared,chisquared_red,degfreedom,R2,R2_adj)
#Analysis of Residuals
w, p_shapiro=scipy.stats.shapiro(residuals)
mean_res=scipy.mean(residuals)
stddev_res=scipy.sqrt(scipy.var(residuals))
t_res=mean_res/stddev_res #t-statistics
p_res=1.0-scipy.stats.t.cdf(t_res,M-1)
F=MSM/MSE
p_F=1.0-scipy.stats.f.cdf(F,DFM,DFE)
dw=stools.durbin_watson(residuals)
resanal=(p_shapiro,w,mean_res,p_res,F,p_F,dw)
if ax:
formataxis(ax)
ax.plot(Ydata,Y,'ro')
ax.errorbar(Ydata,Y,yerr=Errdata, fmt='.')
Ymin,Ymax=min((min(Y),min(Ydata))),max((max(Y),max(Ydata)))
ax.plot([Ymin,Ymax],[Ymin,Ymax],'b')
ax.xaxis.label.set_text('Data')
ax.yaxis.label.set_text('Fitted')
sigmay,avg_stddev_data=get_stderr_fit(f,Xdata,popt,pcov)
Yplus=Y+sigmay
Yminus=Y-sigmay
#.........这里部分代码省略.........
请发表评论