* Chapter 1 - Simple & Multiple Regression */ use https://stats.idre.ucla.edu/stat/stata/webbooks/reg/elemapi2 mkdir regstata cd regstats save elemapi2 describe list in 1/5 list api00 acs_k3 meals full in 1/10 codebook api00 acs_k3 meals full yr_rnd summarize api00 acs_k3 meals full univar api00 acs_k3 meals full /* 1.3 Simple Linear Regression */ corr api00 enroll graph api00 enroll regress api00 enroll predict fv list api00 fv in 1/10 graph api00 fv enroll, connect(.l) symbol(oi) sort graph api00 fv enroll, connect(.l) symbol([snum]i) sort net from https://stats.idre.ucla.edu/stat/stata/ado/analysis net install scatter scatter api00 enroll /* 1.4 Multiple Regression */ pwcorr api00 ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll, sig corr api00 ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll pcorr api00 ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll regress api00 ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll regress api00 ell meals yr_rnd mobility acs_k3 acs_46 full emer enroll, beta search listcoef listcoef test ell test acs_k3 acs_46 /* 1.5 Transforming Variables */ graph enroll graph enroll, normal bin(20) graph enroll, normal bin(20) xlabel(0(200)1600) ylabel(0(.05).2) kdensity enroll, normal ylabel xlabel graph enroll, box ylabel symplot enroll, xlabel ylabel qnorm api00 pnorm api00 ladder enroll gladder enroll generate lenroll = log(enroll) graph lenroll, normal bin(20) /* Chapter 2 - Regression diagnostics */ use https://stats.idre.ucla.edu/stat/stata/webbooks/reg/crime, clear describe summarize crime murder pctmetro pctwhite pcths poverty single graph crime pctmetro poverty single, matrix graph crime pctmetro, s([state]) psize(150) graph crime poverty, s([state]) psize(150) graph crime single, s([state]) psize(150) regress crime pctmetro poverty single predict r, rstudent stem r search hilo hilo r state predict lev, leverage stem lev search indexplot indexplot sid, rstudent points yline(0) indexplot sid, rstudent hig(3) low(3) points yline(-2.5 0 2.5) s([state]) indexplot sid, leverage hig(3) points s([state]) indexplot sid, cooksd hig(3) points yline(1) s([state]) indexplot sid, dfbeta(poverty) hig(3) points s([state]) indexplot sid, dfbeta(single) hig(3) points s([state]) hilo lev state, show(5) high list crime pctmetro poverty single state lev if lev >.156 lvr2plot, s([state]) psize(150) list state crime pctmetro poverty single if state=="dc" | state=="ms" predict d, cooksd list crime pctmetro poverty single state d if d>4/51 predict dfit, dfits list crime pctmetro poverty single state dfit if abs(dfit)>2*sqrt(3/51) dfbeta list state DFpctmetro DFpoverty DFsingle in 1/5 graph DFpctmetro DFpoverty DFsingle sid, ylabel(-1(.5)3) yline(.28 -.28) graph DFpctmetro DFpoverty DFsingle sid, ylabel(-1(.5)3) yline(.28 -.28) /* */ s([state][state][state]) psize(150) list DFsingle state crime pctmetro poverty single if abs(DFsingle) > 2/sqrt(51) avplot single, s([state]) psize(150) regress regress crime pctmetro poverty single if state!="dc" /* 2.2 Tests for Normality of Residuals */ use https://stats.idre.ucla.edu/stat/stata/webbooks/reg/elemapi2, clear regress api00 meals ell emer predict r, resid kdensity r, normal pnorm r, noborder qnorm r, noborder swilk r /* 2.3 Tests for Heteroscedasticity */ rvfplot, noborder yline(0) hettest search whitetst whitetst regress api00 enroll rvfplot generate lenroll = log(enroll) regress api00 lenroll rvfplot regress api00 meals ell emer enroll rvfplot /* 2.4 Tests for Collinearity */ regress api00 acs_k3 avg_ed grad_sch col_grad some_col vif regress api00 acs_k3 grad_sch col_grad some_col vif search collin collin acs_k3 avg_ed grad_sch col_grad some_col collin acs_k3 grad_sch col_grad some_col /* 2.5 Tests on Nonlinearity */ regress api00 enroll acprplot enroll, connect(s) band(10) regress api00 meals some_col acprplot meals, connect(s) band(15) acprplot some_col, connect(s) band(15) /* 2.6 Model Specification */ regress api00 acs_k3 linktest ovtest regress api00 acs_k3 full meals linktest ovtest /* 4.1 Robust Regression Methods */ /* Robust Standard Errors */ regress api00 acs_k3 acs_46 full enroll lvr2plot regress api00 acs_k3 acs_46 full enroll, robust tabulate dnum regress api00 acs_k3 acs_46 full enroll, cluster(dnum) /* Robust Regression */ rreg api00 acs_k3 acs_46 full enroll, gen(wt) drop p r h predict p if e(sample) predict r if e(sample), resid predict h if e(sample), hat hilo wt snum api00 p r h graph r p, yline(0) /* create lvr2plot for rreg */ generate r2=r^2 sum r2 replace r2 = r2/r(sum) summarize r2 local rm = r(mean) summarize h local hm = r(mean) graph h r2, yline(`hm') xline(`rm') /* end lvr2plot */ /* Chapter 3 - Regression with categorical predictors */ use https://stats.idre.ucla.edu/stat/stata/webbooks/reg/elemapi2, clear describe api00 some_col yr_rnd mealcat codebook api00 some_col yr_rnd mealcat /* 3.1 Regression with a 0/1 variable */ regress api00 yr_rnd scatter api00 yr_rnd tabulate yr_rnd, sum(api00) ttest api00, by(yr_rnd) di "square t-test = " r(t)^2 anova api00 yr_rnd /* 3.2 Regression with a 1/2 variable */ generate yr_rnd2=yr_rnd recode yr_rnd2 0=1 1=2 regress api00 yr_rnd2 regress api00 yr_rnd /* 3.3 Regression with a 1/2/3 variable 3.3.1 Manually Creating Dummy Variables */ codebook mealcat regress api00 mealcat tabulate mealcat, gen(mealcat) list mealcat mealcat1 mealcat2 mealcat3 in 1/10, nolabel regress api00 mealcat2 mealcat3 test mealcat2 mealcat3 tabulate mealcat, summarize(api00) /* 3.3.2 Using the xi command */ xi : regress api00 i.mealcat test _Imealcat_2 _Imealcat_3 regress api00 mealcat1 mealcat2 char mealcat[omit] 3 xi : regress api00 i.mealcat anova api00 mealcat anova, regress /* 3.4 Regression with two categorical predictors */ regress api00 yr_rnd xi : regress api00 i.mealcat xi : regress api00 i.mealcat yr_rnd test _Imealcat_1 _Imealcat_2 anova api00 yr_rnd mealcat anova, regress /* 3.5 Categorical predictor with interactions */ xi : regress api00 i.mealcat*yr_rnd test _ImeaXyr_rn_1 _ImeaXyr_rn_2 test _Imealcat_1 _Imealcat_2 anova api00 yr_rnd mealcat yr_rnd*mealcat search xi2 xi2: regress api00 s.mealcat*s.yr_rnd test _Imealcat_1 _Imealcat_2 /* 3.6 Continuous and Categorical variables */ regress api00 yr_rnd some_col predict yhat graph yhat some_col, xlabel(0 10 to 70) ylabel(450 500 to 800) anova api00 yr_rnd some_col, cont(some_col) anova, regress