In [1]: import numpy as np
In [2]: from scipy import stats
In [3]: import statsmodels.api as sm
In [4]: import matplotlib.pyplot as plt
In [5]: from statsmodels.sandbox.regression.predstd import wls_prediction_std
In [6]: nsample = 50
In [7]: x1 = np.linspace(0, 20, nsample)
In [8]: X = np.c_[x1, (x1-5)**2, np.ones(nsample)]
In [9]: sig = 0.5
In [10]: beta = [0.5, -0.0, 5.]
In [11]: y_true2 = np.dot(X, beta)
In [12]: y2 = y_true2 + sig*1. * np.random.normal(size=nsample)
In [13]: res2 = sm.OLS(y2, X).fit()
In [14]: print res2.params
[ 0.52963958 -0.00277593 4.86908339]
In [15]: print res2.bse
[ 0.02593143 0.00229453 0.16796438]
print res.predict
In [16]: plt.figure()
Out[16]: <matplotlib.figure.Figure at 0x8a28390>
In [17]: plt.plot(x1, y2, 'o', x1, y_true2, 'b-')
Out[17]:
[<matplotlib.lines.Line2D at 0x8b92550>,
<matplotlib.lines.Line2D at 0x8b929d0>]
In [18]: plt.plot(x1, res2.fittedvalues, 'r--')
Out[18]: [<matplotlib.lines.Line2D at 0x8a24190>]
model assumption: * identical coefficients * misspecificaion: true model is quadratic, estimate only linear * independent noise/error term * two groups for error variance, low and high variance groups
In [19]: np.random.seed(0)
In [20]: beta = [0.5, -0.01, 5.]
In [21]: y_true2 = np.dot(X, beta)
In [22]: w = np.ones(nsample)
In [23]: w[nsample*6/10:] = 3
In [24]: y2 = y_true2 + sig*w* np.random.normal(size=nsample)
In [25]: X2 = X[:,[0,2]]
unbiased parameter estimated, biased parameter covariance, standard errors
In [26]: print 'OLS'
OLS
In [27]: res2 = sm.OLS(y2, X[:,[0,2]]).fit()
In [28]: print 'OLS beta estimates'
OLS beta estimates
In [29]: print res2.params
[ 0.3394777 5.95340931]
In [30]: print 'OLS stddev of beta'
OLS stddev of beta
In [31]: print res2.bse
[ 0.02737963 0.31776167]
OLS standard errors are inconsistent (?) with heteroscedasticity use correction sandwich estimators of parameter covariance matrix
In [32]: print 'heteroscedasticity corrected standard error of beta estimates'
heteroscedasticity corrected standard error of beta estimates
In [33]: print res2.HC0_se
[ 0.02929661 0.20819232]
In [34]: print res2.HC1_se
[ 0.02990073 0.2124854 ]
In [35]: print res2.HC2_se
[ 0.03015117 0.21437931]
In [36]: print res2.HC3_se
[ 0.03103399 0.22077995]
print res.predict plt.plot(x1, res2.fittedvalues, ‘–’)
In [37]: print '\nWLS'
WLS
In [38]: res3 = sm.WLS(y2, X[:,[0,2]], 1./w).fit()
In [39]: print 'WLS beta estimates'
WLS beta estimates
In [40]: print res3.params
[ 0.36418002 5.80763966]
In [41]: print 'WLS stddev of beta'
WLS stddev of beta
In [42]: print res3.bse
[ 0.02457 0.2293553]
print res.predict plt.plot(x1, res3.fittedvalues, ‘–.’)
Detour write function for prediction standard errors
In [43]: covb = res2.cov_params()
full covariance: predvar = res2.mse_resid + np.diag(np.dot(X2,np.dot(covb,X2.T))) predication variance only
In [44]: predvar = res2.mse_resid + (X2 * np.dot(covb,X2.T).T).sum(1)
In [45]: predstd = np.sqrt(predvar)
In [46]: tppf = stats.t.ppf(0.975, res2.df_resid)
In [47]: prstd, iv_l, iv_u = wls_prediction_std(res3)
In [48]: plt.figure()
Out[48]: <matplotlib.figure.Figure at 0x7f47310>
In [49]: plt.plot(x1, y2, 'o', x1, y_true2, 'b-')
Out[49]:
[<matplotlib.lines.Line2D at 0x7be6090>,
<matplotlib.lines.Line2D at 0x7be6190>]
In [50]: plt.plot(x1, res2.fittedvalues, 'r--')
Out[50]: [<matplotlib.lines.Line2D at 0x7f15e90>]
In [51]: plt.plot(x1, res2.fittedvalues + tppf * predstd, 'r--')
Out[51]: [<matplotlib.lines.Line2D at 0x736e650>]
In [52]: plt.plot(x1, res2.fittedvalues - tppf * predstd, 'r--')
Out[52]: [<matplotlib.lines.Line2D at 0x736e510>]
In [53]: plt.plot(x1, res3.fittedvalues, 'g--.')
Out[53]: [<matplotlib.lines.Line2D at 0x736e990>]
In [54]: plt.plot(x1, iv_u, 'g--')
Out[54]: [<matplotlib.lines.Line2D at 0x8b788d0>]
In [55]: plt.plot(x1, iv_l, 'g--')
Out[55]: [<matplotlib.lines.Line2D at 0x776a850>]
In [56]: plt.title('blue: true, red: OLS, green: WLS')
Out[56]: <matplotlib.text.Text at 0x737e1d0>
In [57]: print '\n2-stage least squares for FGLS (FWLS)'
2-stage least squares for FGLS (FWLS)
In [58]: resid1 = res2.resid[w==1.]
In [59]: var1 = resid1.var(ddof=int(res2.df_model)+1)
In [60]: resid2 = res2.resid[w!=1.]
In [61]: var2 = resid2.var(ddof=int(res2.df_model)+1)
In [62]: west = w.copy()
In [63]: west[w!=1.] = np.sqrt(var2)/np.sqrt(var1)
In [64]: res3 = sm.WLS(y2, X[:,[0,2]], 1./west).fit()
In [65]: print 'feasible WLS beta estimates'
feasible WLS beta estimates
In [66]: print res3.params
[ 0.35952501 5.83249159]
In [67]: print 'feasible WLS stddev of beta'
feasible WLS stddev of beta
In [68]: print res3.bse
[ 0.02483546 0.23959264]