Stata Web Books Regression with Stata Class Notes

* Chapter 1 - Simple & Multiple Regression */

use https://stats.idre.ucla.edu/stat/stata/webbooks/reg/elemapi2
mkdir regstata
cd regstats
save elemapi2
describe
list in 1/5
list api00 acs_k3 meals full  in 1/10
codebook api00 acs_k3 meals full yr_rnd
summarize api00 acs_k3 meals full
univar api00 acs_k3 meals full


/* 1.3 Simple Linear Regression  */

corr api00 enroll
graph api00 enroll
regress api00 enroll
predict fv
list api00 fv in 1/10
graph api00 fv enroll, connect(.l) symbol(oi) sort
graph api00 fv enroll, connect(.l) symbol([snum]i) sort
net from https://stats.idre.ucla.edu/stat/stata/ado/analysis
net install scatter
scatter api00 enroll

/* 1.4 Multiple Regression  */


pwcorr api00 ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll, sig
corr api00 ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll
pcorr api00 ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll
regress api00 ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll
regress api00 ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll, beta
search listcoef
listcoef
test ell
test acs_k3 acs_46

/* 1.5 Transforming Variables */

graph enroll
graph enroll, normal bin(20)
graph enroll, normal bin(20) xlabel(0(200)1600) ylabel(0(.05).2)
kdensity enroll, normal ylabel xlabel
graph enroll, box ylabel
symplot enroll, xlabel ylabel
qnorm api00
pnorm api00
ladder enroll
gladder enroll
generate lenroll = log(enroll)
graph lenroll, normal bin(20)

/* Chapter 2 -  Regression diagnostics */

use https://stats.idre.ucla.edu/stat/stata/webbooks/reg/crime, clear
describe
summarize crime murder pctmetro pctwhite pcths poverty single
graph crime pctmetro poverty single, matrix
graph crime pctmetro, s([state]) psize(150)
graph crime poverty, s([state]) psize(150)
graph crime single, s([state]) psize(150)
regress crime pctmetro poverty single
predict r, rstudent
stem r
search hilo
hilo r state
predict lev, leverage
stem lev

search indexplot
indexplot sid, rstudent points yline(0)
indexplot sid, rstudent hig(3) low(3) points yline(-2.5 0 2.5) s([state])
indexplot sid, leverage hig(3) points s([state])
indexplot sid, cooksd hig(3) points yline(1) s([state])
indexplot sid, dfbeta(poverty) hig(3) points s([state])
indexplot sid, dfbeta(single) hig(3) points s([state])


hilo lev state, show(5) high
list crime pctmetro poverty single state lev if lev >.156
lvr2plot, s([state]) psize(150)
list state crime pctmetro poverty single if state=="dc" | state=="ms"
predict d, cooksd
list crime pctmetro poverty single state d if d>4/51
predict dfit, dfits
list crime pctmetro poverty single state dfit if abs(dfit)>2*sqrt(3/51)
dfbeta
list state DFpctmetro DFpoverty DFsingle in 1/5
graph DFpctmetro DFpoverty DFsingle sid, ylabel(-1(.5)3) yline(.28 -.28)
graph DFpctmetro DFpoverty DFsingle sid, ylabel(-1(.5)3) yline(.28 -.28) /*
  */ s([state][state][state]) psize(150)
list DFsingle state crime pctmetro poverty single if abs(DFsingle) > 2/sqrt(51)
avplot single, s([state]) psize(150)
regress
regress crime pctmetro poverty single if state!="dc"

/* 2.2 Tests for Normality of Residuals  */

use https://stats.idre.ucla.edu/stat/stata/webbooks/reg/elemapi2, clear
regress api00 meals ell emer
predict r, resid
kdensity r, normal
pnorm r, noborder
qnorm r, noborder
swilk r

/* 2.3 Tests for Heteroscedasticity */

rvfplot, noborder yline(0)
hettest
search whitetst
whitetst

regress api00 enroll
rvfplot
generate lenroll = log(enroll)
regress api00 lenroll
rvfplot
regress api00 meals ell emer enroll
rvfplot

/* 2.4 Tests for Collinearity  */

regress  api00 acs_k3  avg_ed grad_sch col_grad some_col
vif
regress  api00 acs_k3  grad_sch col_grad some_col
vif
search collin
collin acs_k3  avg_ed grad_sch col_grad some_col
collin acs_k3 grad_sch col_grad some_col

/* 2.5 Tests on Nonlinearity  */

regress api00 enroll
acprplot enroll, connect(s) band(10)
regress api00 meals some_col
acprplot meals, connect(s) band(15)
acprplot some_col, connect(s) band(15)

/* 2.6 Model Specification */

regress api00  acs_k3
linktest
ovtest
regress api00  acs_k3 full meals
linktest
ovtest

/* 4.1 Robust Regression Methods */

/* Robust Standard Errors */

regress api00 acs_k3 acs_46 full enroll
lvr2plot 
regress api00 acs_k3 acs_46 full enroll, robust
tabulate dnum
regress api00 acs_k3 acs_46 full enroll, cluster(dnum)

/* Robust Regression */

rreg api00 acs_k3 acs_46 full enroll, gen(wt)
drop p r h
predict p if e(sample)
predict r if e(sample), resid
predict h if e(sample), hat
hilo wt snum api00 p r h
graph r p, yline(0)

/* create lvr2plot for rreg */
generate r2=r^2
sum r2
replace r2 = r2/r(sum)
summarize r2
local rm = r(mean)
summarize h
local hm = r(mean)
graph h r2, yline(`hm') xline(`rm')
/* end lvr2plot */

/* Chapter 3 - Regression with categorical predictors */

use https://stats.idre.ucla.edu/stat/stata/webbooks/reg/elemapi2, clear
describe api00  some_col yr_rnd  mealcat
codebook api00 some_col yr_rnd mealcat

/*  3.1 Regression with a 0/1 variable */

regress api00 yr_rnd
scatter api00 yr_rnd
tabulate yr_rnd, sum(api00)
ttest api00, by(yr_rnd)
di "square t-test = " r(t)^2
anova api00 yr_rnd


/* 3.2 Regression with a 1/2 variable */

generate yr_rnd2=yr_rnd
recode yr_rnd2 0=1 1=2
regress api00 yr_rnd2
regress api00 yr_rnd

/* 3.3   Regression with a 1/2/3 variable 
   3.3.1 Manually Creating Dummy Variables */
   
codebook mealcat
regress api00 mealcat
tabulate mealcat, gen(mealcat)
list mealcat mealcat1 mealcat2 mealcat3 in 1/10, nolabel
regress api00 mealcat2 mealcat3
test mealcat2 mealcat3
tabulate mealcat, summarize(api00)

/* 3.3.2 Using the xi command */

xi : regress api00 i.mealcat
test _Imealcat_2 _Imealcat_3
regress api00 mealcat1 mealcat2
char mealcat[omit] 3
xi : regress api00 i.mealcat
anova api00 mealcat
anova, regress

/* 3.4   Regression with two categorical predictors */

regress api00 yr_rnd
xi : regress api00 i.mealcat
xi : regress api00 i.mealcat yr_rnd
test  _Imealcat_1 _Imealcat_2
anova api00 yr_rnd mealcat
anova, regress

/* 3.5 Categorical predictor with interactions */

xi : regress api00 i.mealcat*yr_rnd
test _ImeaXyr_rn_1 _ImeaXyr_rn_2
test _Imealcat_1 _Imealcat_2
anova api00 yr_rnd mealcat yr_rnd*mealcat
search xi2
xi2: regress api00 s.mealcat*s.yr_rnd
test _Imealcat_1 _Imealcat_2

/* 3.6 Continuous and Categorical variables  */

regress api00 yr_rnd some_col
predict yhat
graph yhat some_col, xlabel(0 10 to 70) ylabel(450 500 to 800)
anova api00 yr_rnd some_col, cont(some_col)
anova, regress