Figure 12.1, page 414 and Table 12.2, page 413. This page has been updated using Stata 10. Notice that the sample size used in calculating the BIC is the number of events (see page 402 for more detailed discussion). We generate the plots first so that we do not need to rerun the models.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/tenure_pp, clear /* compute powers of period for polynomial models */ generate p2 = period^2 generate p3 = period^3 generate p4 = period^4 generate p5 = period^5 count if event==1 local n=r(N) /* constant model */ quietly logit event estimates store m1 predict p_0 replace p_0 = logit(p_0) /* linear model */ quietly logit event period estimates store m2 predict p_1 replace p_1 = logit(p_1) /* quadratic model */ quietly logit event period p2 estimates store m3 predict p_2 replace p_2 = logit(p_2) /* cubic model */ quietly logit event period p2 p3 estimates store m4 predict p_3 replace p_3 = logit(p_3) /* fourth order model */ quietly logit event period p2 p3 p4 estimates store m5 quietly logit event period p2 p3 p4 p5 estimates store m6 /* general model */ quietly logit event period d1-d9 estimates store m7 predict p_g replace p_g = logit(p_g) /* Top figure */ line p_0 p_1 p_2 p_3 p_g period, sort /// xtitle("Years after hire") ytitle("Fitted logit(hazard)") /// legend(pos(5) ring(0) col(1) /// label(1 "Constant") label(2 "Linear") label(3 "Quadratic") label(4 "Cubic") label(5 "General")) preserve collapse (mean) p_2 p_g, by(period) gen haz_2 = 1/(1 + exp(-p_2)) gen surv_2 = 1 replace surv_2 = (1 - haz_2)*surv_2 if period == 1 replace surv_2 = (1 - haz_2)*surv_2[_n-1] if period > 1 gen haz_g = 1/(1 + exp(-p_g)) gen surv_g = 1 replace surv_g = (1 - haz_g)*surv_g if period == 1 replace surv_g = (1 - haz_g)*surv_g[_n-1] if period > 1 /* Bottom left figure */ line haz_2 haz_g period, /// xtitle("Years after hire") ytitle("Fitted hazard") /// legend(pos(5) ring(0) col(1) /// label(1 "Quadratic") label(2 "General")) /* Bottom right figure */ line surv_2 surv_g period, /// xtitle("Years after hire") ytitle("Fitted survival probability") /// legend(pos(7) ring(0) col(1) /// label(1 "Quadratic") label(2 "General")) restore estimates stats m1 m2 m3 m4 m5 m6 m7 mat a = r(S) drop _all svmat a rename a5 aic gen deviance = -2*a3 gen pdiff = deviance - deviance[_n-1] if _n~=7 gen gdiff =deviance[7] - deviance if _n~=7 gen bic = deviance + log(`n')*a4 list a4 deviance pdiff gdiff aic bic +-------------------------------------------------------------+ | a4 deviance pdiff gdiff aic bic | |-------------------------------------------------------------| 1. | 1 1037.565 . -206.3614 1039.565 1042.677 | 2. | 2 867.4619 -170.1033 -36.25806 871.4619 877.6859 | 3. | 3 836.3041 -31.15778 -5.100281 842.3041 851.64 | 4. | 4 833.1725 -3.131592 -1.968689 841.1725 853.6204 | 5. | 5 832.7427 -.4298096 -1.538879 842.7427 858.3026 | |-------------------------------------------------------------| 6. | 6 832.7322 -.010498 -1.528381 844.7322 863.4041 | 7. | 9 831.2038 . . 849.2038 877.2117 | +-------------------------------------------------------------+
Figure 12.3, page 423.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/firstsex_pp, clear quietly logit event d7 d8 d9 d10 d11 d12 if pt==0, nocons predict logit_pt0 if pt==0 replace logit_pt0 = logit(logit_pt0) quietly logit event d7 d8 d9 d10 d11 d12 if pt==1, nocons predict logit_pt1 if pt==1 replace logit_pt1 = logit(logit_pt1) quietly cloglog event d7 d8 d9 d10 d11 d12 if pt==0, nocons predict cll_pt0 if pt==0 replace cll_pt0 = cloglog(cll_pt0) quietly cloglog event d7 d8 d9 d10 d11 d12 if pt==1, nocons predict cll_pt1 if pt==1 replace cll_pt1 = cloglog(cll_pt1) sort period line logit_pt1 cll_pt1 logit_pt0 cll_pt0 period, /// xtitle("Grade") ytitle("Transformed hazard probability") /// legend(pos(5) ring(0) col(1) /// label(1 "PT, logit") label(2 "PT, cloglog") label(3 "no PT, logit") label(4 "no PT, cloglog"))
Table 12.3, page 424.
preserve cloglog event d7 d8 d9 d10 d11 d12 pt, nocons Iteration 0: log likelihood = -318.86762 Iteration 1: log likelihood = -317.25266 Iteration 2: log likelihood = -317.25066 Iteration 3: log likelihood = -317.25066 Complementary log-log regression Number of obs = 822 Zero outcomes = 696 Nonzero outcomes = 126 Wald chi2(7) = 347.48 Log likelihood = -317.25066 Prob > chi2 = 0.0000 ------------------------------------------------------------------------------ event | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- d7 | -2.973297 .2978643 -9.98 0.000 -3.557101 -2.389494 d8 | -3.659244 .4052271 -9.03 0.000 -4.453475 -2.865013 d9 | -2.315632 .2501319 -9.26 0.000 -2.805881 -1.825383 d10 | -1.900139 .232474 -8.17 0.000 -2.355779 -1.444498 d11 | -1.762139 .2405359 -7.33 0.000 -2.23358 -1.290697 d12 | -1.342638 .2317264 -5.79 0.000 -1.796813 -.8884625 pt | .7854048 .195704 4.01 0.000 .401832 1.168978 ------------------------------------------------------------------------------ dis -2*e(ll) 634.50132 mat a = e(b)' drop _all svmat a gen cll = 1-exp(-exp(a1)) list cll +----------+ | cll | |----------| 1. | .0498491 | 2. | .0254232 | 3. | .0939889 | 4. | .1389028 | 5. | .1577491 | |----------| 6. | .2298391 | 7. | .8884513 | +----------+ restore logit event d7 d8 d9 d10 d11 d12 pt, nocons Iteration 0: log likelihood = -569.76698 Iteration 1: log likelihood = -334.87088 Iteration 2: log likelihood = -318.81582 Iteration 3: log likelihood = -317.36804 Iteration 4: log likelihood = -317.33096 Iteration 5: log likelihood = -317.33089 Logistic regression Number of obs = 822 LR chi2(7) = . Log likelihood = -317.33089 Prob > chi2 = . ------------------------------------------------------------------------------ event | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- d7 | -2.994327 .3175088 -9.43 0.000 -3.616632 -2.372021 d8 | -3.700124 .4205614 -8.80 0.000 -4.524409 -2.875839 d9 | -2.281124 .2723919 -8.37 0.000 -2.815002 -1.747245 d10 | -1.822599 .2584613 -7.05 0.000 -2.329173 -1.316024 d11 | -1.654227 .2691057 -6.15 0.000 -2.181665 -1.12679 d12 | -1.179057 .2715801 -4.34 0.000 -1.711344 -.6467698 pt | .8736184 .2174075 4.02 0.000 .4475076 1.299729 ------------------------------------------------------------------------------ dis -2*e(ll) 634.66178 mat b = e(b)' drop _all svmat b gen logitest = 1/(1+exp(-b1)) list logitest +----------+ | logitest | |----------| 1. | .0476828 | 2. | .0241241 | 3. | .0926984 | 4. | .1391224 | 5. | .1605384 | |----------| 6. | .2352218 | 7. | .705498 | +----------+
Figure 12.4, page 432.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/depression_pp, clear sort period pd by period pd: egen pevent = mean(event) quietly logit event one age_18 age_18sq age_18cub pd, nocons predict p twoway (scatter pevent period if pd==0 & pevent > 0) /// (scatter pevent period if pd==1 & pevent > 0) /// (line p period if pd==0) /// (line p period if pd==1), /// xtitle("Age") ytitle("Proportion with event") /// legend(pos(10) ring(0) col(1) /// label(1 "PD=0, observed") label(2 "PD=1, observed") label(3 "PD=0, model") label(4 "PD=1, model")) gen logitpevent = logit(pevent) gen logitpmodel = logit(p) twoway (scatter logitpevent period if pd==0 & pevent > 0) /// (scatter logitpevent period if pd==1 & pevent > 0) /// (line logitpmodel period if pd==0) /// (line logitpmodel period if pd==1), /// xtitle("Age") ytitle("Logit(proportion with event)") /// legend(pos(5) ring(0) col(2) /// label(1 "PD=0, observed") label(2 "PD=1, observed") label(3 "PD=0, model") label(4 "PD=1, model"))
Figure 12.5, page 437.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/depression_pp, clear quietly logit event one age_18 age_18sq age_18cub pd female, nocons predict p12_5 collapse (mean) p12_5, by (period pd female) twoway (line p12_5 period if pd==0 & female ==0) /// (line p12_5 period if pd==1 & female ==0) /// (line p12_5 period if pd==0 & female ==1) /// (line p12_5 period if pd==1 & female ==1), /// xtitle("Age") ytitle("Fitted Hazard") /// legend(pos(5) ring(0) col(2) /// label(1 "PD=0, male") label(2 "PD=1, male") label(3 "PD=0, female") label(4 "PD=1, female")) sort female pd period gen surv = 1 replace surv = (1-p12_5)*surv[_n-1] if period > 4 twoway (line surv period if pd==0 & female ==0) /// (line surv period if pd==1 & female ==0) /// (line surv period if pd==0 & female ==1) /// (line surv period if pd==1 & female ==1), /// xtitle("Age") ytitle("Fitted Survival") /// legend(pos(7) ring(0) col(2) /// label(1 "PD=0, male") label(2 "PD=1, male") label(3 "PD=0, female") label(4 "PD=1, female"))
Figure 12.6, page 445.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/firstarrest_pp, clear sort period abused black by period abused black: egen pevent = mean(event) gen logitp = log(pevent/(1-pevent)) twoway (line logitp period if black==0 & abused==0) /// (line logitp period if black==0 & abused==1), /// title("White") xtitle("Age") ytitle("Sample logit(hazard)") /// legend(pos(5) ring(0) col(1) /// label(1 "not abused") label(2 "abused")) twoway (line logitp period if black==1 & abused==0) /// (line logitp period if black==1 & abused==1), /// title("Black") xtitle("Age") ytitle("Sample logit(hazard)") /// legend(pos(5) ring(0) col(1) /// label(1 "not abused") label(2 "abused")) quietly logit event d8 d9 d10 d11 d12 d13 d14 d15 d16 d17 d18 abused black ablack, nocons predict p gen fittedlogit = logit(p) by period abused black: egen fitted = mean(fittedlogit) twoway (line fitted period if black==0 & abused==0) /// (line fitted period if black==0 & abused==1) /// (line fitted period if black==1 & abused==0) /// (line fitted period if black==1 & abused==1), /// xtitle("Age") ytitle("Fitted logit(hazard)") /// legend(pos(5) ring(0) col(1) /// label(1 "not abused, white") label(2 "abused, white") /// label(3 "not abused, black") label(4 "abused, black"))
Table 12.4, page 449.
/* Model A */ logit event one age_18 age_18sq age_18cub pd female nsibs, nocons Logistic regression Number of obs = 36997 LR chi2(7) = . Log likelihood = -2062.1428 Prob > chi2 = . ------------------------------------------------------------------------------ event | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- one | -4.358696 .1216028 -35.84 0.000 -4.597033 -4.120358 age_18 | .0610608 .0116617 5.24 0.000 .0382043 .0839172 age_18sq | -.0073084 .0012241 -5.97 0.000 -.0097075 -.0049093 age_18cub | .0001816 .000079 2.30 0.021 .0000268 .0003364 pd | .3726011 .1623821 2.29 0.022 .054338 .6908642 female | .5586864 .1094751 5.10 0.000 .3441191 .7732536 nsibs | -.0814109 .0222726 -3.66 0.000 -.1250643 -.0377574 ------------------------------------------------------------------------------ dis -2*e(ll) 4124.2856 dis -2*e(ll)+2*e(df_m) 4138.2856 /* Model B */ logit event one age_18 age_18sq age_18cub pd female sibs12 sibs34 sibs56 sibs78 sibs9plus, nocons Logistic regression Number of obs = 36997 LR chi2(11) = . Log likelihood = -2058.991 Prob > chi2 = . ------------------------------------------------------------------------------ event | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- one | -4.500068 .2066902 -21.77 0.000 -4.905174 -4.094963 age_18 | .0614524 .0116639 5.27 0.000 .0385915 .0843134 age_18sq | -.0072894 .0012239 -5.96 0.000 -.0096882 -.0048906 age_18cub | .0001813 .000079 2.30 0.022 .0000265 .0003361 pd | .3727125 .1624852 2.29 0.022 .0542474 .6911776 female | .5595903 .1095306 5.11 0.000 .3449143 .7742664 sibs12 | .020851 .1976079 0.11 0.916 -.3664533 .4081553 sibs34 | .0107606 .2100347 0.05 0.959 -.4008999 .4224211 sibs56 | -.4942199 .2545432 -1.94 0.052 -.9931155 .0046757 sibs78 | -.7753991 .3437127 -2.26 0.024 -1.449064 -.1017346 sibs9plus | -.6584835 .3440501 -1.91 0.056 -1.332809 .0158424 ------------------------------------------------------------------------------ dis -2*e(ll) 4117.982 dis -2*e(ll)+2*e(df_m) 4139.982 /* Model C */ logit event one age_18 age_18sq age_18cub pd female bigfamily, nocons Logistic regression Number of obs = 36997 LR chi2(7) = . Log likelihood = -2059.3892 Prob > chi2 = . ------------------------------------------------------------------------------ event | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- one | -4.482812 .1087126 -41.24 0.000 -4.695884 -4.269739 age_18 | .0614096 .0116632 5.27 0.000 .0385501 .0842691 age_18sq | -.0072913 .001224 -5.96 0.000 -.0096902 -.0048924 age_18cub | .0001815 .000079 2.30 0.022 .0000267 .0003363 pd | .3710316 .1622959 2.29 0.022 .0529375 .6891257 female | .5580501 .1094735 5.10 0.000 .343486 .7726142 bigfamily | -.6107821 .1445777 -4.22 0.000 -.8941491 -.327415 ------------------------------------------------------------------------------ dis -2*e(ll) 4118.7784 dis -2*e(ll)+2*e(df_m) 4132.7784
Figure 12.8, page 458 and Table 12.5, page 459.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/mathdropout_pp, clear /* Graph 1 */ sort period female by period female: egen pevent = mean(event) gen logitp = log(pevent/(1-pevent)) twoway (line logitp period if female==0) /// (line logitp period if female==1), /// title("Within-group sample hazard functions") xtitle("Term") ytitle("Sample logit(hazard)") /// legend(pos(5) ring(0) col(1) /// label(1 "male") label(2 "female")) /* Model A and Graph 2 */ logit event hs11 hs12 coll1 coll2 coll3 female, nocons Logistic regression Number of obs = 9558 LR chi2(6) = . Log likelihood = -4902.1548 Prob > chi2 = . ------------------------------------------------------------------------------ event | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- hs11 | -2.130802 .0567383 -37.55 0.000 -2.242007 -2.019597 hs12 | -.9424815 .0478881 -19.68 0.000 -1.036341 -.8486226 coll1 | -1.449476 .0634496 -22.84 0.000 -1.573834 -1.325117 coll2 | -.6176489 .0757141 -8.16 0.000 -.7660459 -.469252 coll3 | -.7716024 .1428008 -5.40 0.000 -1.051487 -.491718 female | .378645 .0501482 7.55 0.000 .2803564 .4769336 ------------------------------------------------------------------------------ predict pA dis -2*e(ll) 9804.3096 dis -2*e(ll)+2*e(df_m) 9816.3096 gen logitpA = logit(pA) twoway (line logitpA period if female==0) /// (line logitpA period if female==1), /// title("Model A: Main effect of female") xtitle("Term") ytitle("Fitted logit(hazard)") /// legend(pos(5) ring(0) col(1) /// label(1 "male") label(2 "female")) /* Model B and Graph 3 */ logit event hs11 hs12 coll1 coll2 coll3 fhs11 fhs12 fcoll1 fcoll2 fcoll3, nocons Logistic regression Number of obs = 9558 LR chi2(10) = . Log likelihood = -4898.1351 Prob > chi2 = . ------------------------------------------------------------------------------ event | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- hs11 | -2.00767 .0714805 -28.09 0.000 -2.147769 -1.867571 hs12 | -.9642534 .0585445 -16.47 0.000 -1.078999 -.8495082 coll1 | -1.482402 .0847111 -17.50 0.000 -1.648432 -1.316371 coll2 | -.710011 .1007333 -7.05 0.000 -.9074447 -.5125773 coll3 | -.8690378 .1907714 -4.56 0.000 -1.242943 -.4951328 fhs11 | .1567951 .0977744 1.60 0.109 -.0348393 .3484295 fhs12 | .4186602 .0792373 5.28 0.000 .263358 .5739624 fcoll1 | .4406768 .1158028 3.81 0.000 .2137075 .6676461 fcoll2 | .5707489 .1445487 3.95 0.000 .2874387 .8540592 fcoll3 | .6007739 .2857317 2.10 0.036 .0407501 1.160798 ------------------------------------------------------------------------------ predict pB dis -2*e(ll) 9796.2702 dis -2*e(ll)+2*e(df_m) 9816.2702 gen logitpB = logit(pB) twoway (line logitpB period if female==0) /// (line logitpB period if female==1), /// title("Model B: General Interaction") xtitle("Term") ytitle("Fitted logit(hazard)") /// legend(pos(5) ring(0) col(1) /// label(1 "male") label(2 "female")) /* Model C and Graph 4 */ logit event hs11 hs12 coll1 coll2 coll3 female fltime, nocons Logistic regression Number of obs = 9558 LR chi2(7) = . Log likelihood = -4898.9033 Prob > chi2 = . ------------------------------------------------------------------------------ event | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- hs11 | -2.04592 .0646442 -31.65 0.000 -2.172621 -1.91922 hs12 | -.9255085 .0482123 -19.20 0.000 -1.020003 -.8310141 coll1 | -1.496554 .0664926 -22.51 0.000 -1.626877 -1.366231 coll2 | -.7178143 .0860535 -8.34 0.000 -.8864761 -.5491525 coll3 | -.9165523 .1556976 -5.89 0.000 -1.221714 -.6113906 female | .2274938 .0774452 2.94 0.003 .075704 .3792836 fltime | .1197699 .0469822 2.55 0.011 .0276865 .2118534 ------------------------------------------------------------------------------ predict pC dis -2*e(ll) 9797.8065 dis -2*e(ll)+2*e(df_m) 9811.8065 gen logitpC = logit(pC) twoway (line logitpC period if female==0) /// (line logitpC period if female==1), /// title("Model C: Female-time interaction") xtitle("Term") ytitle("Fitted logit(hazard)") /// legend(pos(5) ring(0) col(1) /// label(1 "male") label(2 "female"))
Table 12.6, page 465.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/firstsex_pp, clear logit event d7 d8 d9 d10 d11 d12 pt pas, nocons predict p gen dev = -sqrt(-2*log(1-p)) replace dev = sqrt(-2*log(p)) if event == 1 keep if (id==22) | (id==112) | (id==166) | (id==89) | (id==102) | (id==87) | (id==67) | (id==212) sort id by id: egen pt_max = max(pt) by id: egen pas_max = max(pas) by id: egen grade = max(period) by id: egen censor = max(event) drop pt pas event event d7-d12 p replace censor = abs(censor - 1) reshape wide dev, i(id) j(period) gen ssdev = max(dev7^2, 0) + max(dev8^2, 0) + max(dev9^2, 0) + max(dev10^2, 0) + max(dev11^2, 0) + max(dev12^2, 0) list, table +------------------------------------------------------------------------------------------------------------------------------+ | id dev7 dev8 dev9 dev10 dev11 dev12 pt_max pas_max grade censor ssdev | |------------------------------------------------------------------------------------------------------------------------------| 1. | 22 -.4117311 -.2944394 -.5840394 -.7176492 -.7747741 1.414463 1 -.6496482 12 0 3.713321 | 2. | 67 -.6180257 -.4476514 -.8559141 -1.029431 -1.100732 1.042969 1 2.274652 12 0 4.674059 | 3. | 87 1.81763 . . . . . 1 2.677901 7 0 3.303778 | 4. | 89 -.3248377 -.2313901 -.4644566 -.5751775 1.862369 . 0 -.0751572 11 0 4.174028 | 5. | 102 -.491339 2.369482 . . . . 1 .6049317 8 0 5.855859 | |------------------------------------------------------------------------------------------------------------------------------| 6. | 112 -.4110716 -.2939581 -.5831423 -.716592 -.7736556 -.9562858 1 -.6609311 12 1 2.621976 | 7. | 166 -.6614533 -.4806816 -.910823 -1.090311 1.191398 . 1 2.781413 11 0 4.106381 | 8. | 212 -.2857033 -.2032141 -.409755 -.5090181 -.5524318 -.6958426 0 -.9617912 12 1 1.339299 | +------------------------------------------------------------------------------------------------------------------------------+
Figure 12.9, page 467.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/firstsex_pp, clear logit event d7 d8 d9 d10 d11 d12 pt pas, nocons predict p gen dev = -sqrt(-2*log(1-p)) replace dev = sqrt(-2*log(p)) if event == 1 twoway scatter dev id, /// xtitle("ID") ytitle("Deviance residual") gen devsq = dev^2 sort id by id: egen ssdev = sum(devsq) twoway scatter ssdev id, /// xtitle("ID") ytitle("SS deviance residual")