* Profile stuff
clear
set more off
* Local Program:
* PROGRAM TO RUN A FEW REGRESSIONS
* ********************************
program define regs
* Regress y on x1 (by itself)
qui regress y x1
matrix define tmp1 = e(b)
* Regress y on x2 (by itself)
qui regress y x2
matrix define tmp2 = e(b)
* Regress y on x1 and x2
qui regress y x1 x2
matrix define tmp3 = e(b)
* Print coefficients to screen for
* the sake of comparison
matrix define coeffs = ///
(tmp1[1,2], tmp1[1,1], . \ ///
tmp2[1,2], ., tmp2[1,1] \ ///
tmp3[1,3], tmp3[1,1], tmp3[1,2])
matrix colnames coeffs = cons x1 x2
matrix rownames coeffs = "Just x1" "Just x2" "x1 & x2"
matrix list coeffs
end
* ********************************
* INDEPENDENT X VARIABLES
* ***********************
* Set seed
set seed 12345
* Create a matrix of (un-) correlations
matrix C = (1, 0, 0 \ 0, 1, 0 \ 0, 0, 1)
* Create a matrix of means
matrix m = (3,2,2)
* Create a matrix of standard deviations
matrix sd = (0.5,2,1)
* Draw three random variables from independent distributions
drawnorm x1 x2 x3, n(100) means(m) ///
sds(sd) corr(C)
* Draw some "unobservable" stuff
gen eps = rnormal()
* Create a dependent variable y
gen y = 5 + 2*x1 - 3*x2 + eps
* Now run the relevant regressions
regs
* ***********************
* CORRELATED X VARIABLES
* **********************
* Set seed
set seed 12345
* Drop the stuff that we want to overwrite
drop x1 x2 x3 eps y
matrix drop C
* Create a matrix of correlations
matrix C = (1, 0.2, 0.2 \ 0.2, 1, 0.2 \ ///
0.2, 0.2, 1)
* Use the means and std. devs. from the previous example
* Draw three random variables from a multivariate distribution
drawnorm x1 x2 x3, n(100) means(m) ///
sds(sd) corr(C)
* Draw some "unobservable" stuff
gen eps = rnormal()
* Create a dependent variable y
gen y = 5 + 2*x1 - 3*x2 + eps
* Now run the relevant regressions
regs
* **********************
* TRICKS WITH OLS
* ***************
* Regress x1 on x2
reg x1 x2
* Obtain predictions
predict x1hat, xb
* Use those predictions to obtain the unexplained variation in x1
gen x1u = x1 - x1hat
* Regress y on x1 and x2, so we know what we're shooting for
reg y x1 x2
* Regress y on x1hat
reg y x1u
* Ta-da!
* ***************
* INSTRUMENTAL VARIABLES STUFF
* ****************************
clear all
* Set seed
set seed 12345
* Create a matrix of means
matrix m = (2,2)
* Create a matrix of std. devs.
matrix sd = (2,1)
* Draw xy and x3 independently
drawnorm xy x3, n(100) means(m) sds(sd)
* Construct x1 out of x2, x3, and some unobservable stuff
gen epsx = rnormal()
gen x1 = xy + x3 + epsx
* Construct y out of x1, x2, and some unobservable stuff
gen epsy = rnormal()
gen y = xy + x1 + epsy
* Regress y on x1
regress y x1
* Now try it with a clean version of x1
regress x1 x3
predict x1clean
regress y x1clean
* You can actually do this in one step if you want
ivregress 2sls y (x1 = x3)