Figure 12.1, page 414 and Table 12.2, page 413. This page has been updated using Stata 10. Notice that the sample size used in calculating the BIC is the number of events (see page 402 for more detailed discussion). We generate the plots first so that we do not need to rerun the models.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/tenure_pp, clear
/* compute powers of period for polynomial models */
generate p2 = period^2
generate p3 = period^3
generate p4 = period^4
generate p5 = period^5
count if event==1
local n=r(N)
/* constant model */
quietly logit event
estimates store m1
predict p_0
replace p_0 = logit(p_0)
/* linear model */
quietly logit event period
estimates store m2
predict p_1
replace p_1 = logit(p_1)
/* quadratic model */
quietly logit event period p2
estimates store m3
predict p_2
replace p_2 = logit(p_2)
/* cubic model */
quietly logit event period p2 p3
estimates store m4
predict p_3
replace p_3 = logit(p_3)
/* fourth order model */
quietly logit event period p2 p3 p4
estimates store m5
quietly logit event period p2 p3 p4 p5
estimates store m6
/* general model */
quietly logit event period d1-d9
estimates store m7
predict p_g
replace p_g = logit(p_g)
/* Top figure */
line p_0 p_1 p_2 p_3 p_g period, sort ///
xtitle("Years after hire") ytitle("Fitted logit(hazard)") ///
legend(pos(5) ring(0) col(1) ///
label(1 "Constant") label(2 "Linear") label(3 "Quadratic") label(4 "Cubic") label(5 "General"))
preserve
collapse (mean) p_2 p_g, by(period)
gen haz_2 = 1/(1 + exp(-p_2))
gen surv_2 = 1
replace surv_2 = (1 - haz_2)*surv_2 if period == 1
replace surv_2 = (1 - haz_2)*surv_2[_n-1] if period > 1
gen haz_g = 1/(1 + exp(-p_g))
gen surv_g = 1
replace surv_g = (1 - haz_g)*surv_g if period == 1
replace surv_g = (1 - haz_g)*surv_g[_n-1] if period > 1
/* Bottom left figure */
line haz_2 haz_g period, ///
xtitle("Years after hire") ytitle("Fitted hazard") ///
legend(pos(5) ring(0) col(1) ///
label(1 "Quadratic") label(2 "General"))
/* Bottom right figure */
line surv_2 surv_g period, ///
xtitle("Years after hire") ytitle("Fitted survival probability") ///
legend(pos(7) ring(0) col(1) ///
label(1 "Quadratic") label(2 "General"))
restore
estimates stats m1 m2 m3 m4 m5 m6 m7
mat a = r(S)
drop _all
svmat a
rename a5 aic
gen deviance = -2*a3
gen pdiff = deviance - deviance[_n-1] if _n~=7
gen gdiff =deviance[7] - deviance if _n~=7
gen bic = deviance + log(`n')*a4
list a4 deviance pdiff gdiff aic bic
+-------------------------------------------------------------+
| a4 deviance pdiff gdiff aic bic |
|-------------------------------------------------------------|
1. | 1 1037.565 . -206.3614 1039.565 1042.677 |
2. | 2 867.4619 -170.1033 -36.25806 871.4619 877.6859 |
3. | 3 836.3041 -31.15778 -5.100281 842.3041 851.64 |
4. | 4 833.1725 -3.131592 -1.968689 841.1725 853.6204 |
5. | 5 832.7427 -.4298096 -1.538879 842.7427 858.3026 |
|-------------------------------------------------------------|
6. | 6 832.7322 -.010498 -1.528381 844.7322 863.4041 |
7. | 9 831.2038 . . 849.2038 877.2117 |
+-------------------------------------------------------------+
Figure 12.3, page 423.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/firstsex_pp, clear
quietly logit event d7 d8 d9 d10 d11 d12 if pt==0, nocons
predict logit_pt0 if pt==0
replace logit_pt0 = logit(logit_pt0)
quietly logit event d7 d8 d9 d10 d11 d12 if pt==1, nocons
predict logit_pt1 if pt==1
replace logit_pt1 = logit(logit_pt1)
quietly cloglog event d7 d8 d9 d10 d11 d12 if pt==0, nocons
predict cll_pt0 if pt==0
replace cll_pt0 = cloglog(cll_pt0)
quietly cloglog event d7 d8 d9 d10 d11 d12 if pt==1, nocons
predict cll_pt1 if pt==1
replace cll_pt1 = cloglog(cll_pt1)
sort period
line logit_pt1 cll_pt1 logit_pt0 cll_pt0 period, ///
xtitle("Grade") ytitle("Transformed hazard probability") ///
legend(pos(5) ring(0) col(1) ///
label(1 "PT, logit") label(2 "PT, cloglog") label(3 "no PT, logit") label(4 "no PT, cloglog"))

Table 12.3, page 424.
preserve
cloglog event d7 d8 d9 d10 d11 d12 pt, nocons
Iteration 0: log likelihood = -318.86762
Iteration 1: log likelihood = -317.25266
Iteration 2: log likelihood = -317.25066
Iteration 3: log likelihood = -317.25066
Complementary log-log regression Number of obs = 822
Zero outcomes = 696
Nonzero outcomes = 126
Wald chi2(7) = 347.48
Log likelihood = -317.25066 Prob > chi2 = 0.0000
------------------------------------------------------------------------------
event | Coef. Std. Err. z P>|z| [95% Conf. Interval]
-------------+----------------------------------------------------------------
d7 | -2.973297 .2978643 -9.98 0.000 -3.557101 -2.389494
d8 | -3.659244 .4052271 -9.03 0.000 -4.453475 -2.865013
d9 | -2.315632 .2501319 -9.26 0.000 -2.805881 -1.825383
d10 | -1.900139 .232474 -8.17 0.000 -2.355779 -1.444498
d11 | -1.762139 .2405359 -7.33 0.000 -2.23358 -1.290697
d12 | -1.342638 .2317264 -5.79 0.000 -1.796813 -.8884625
pt | .7854048 .195704 4.01 0.000 .401832 1.168978
------------------------------------------------------------------------------
dis -2*e(ll)
634.50132
mat a = e(b)'
drop _all
svmat a
gen cll = 1-exp(-exp(a1))
list cll
+----------+
| cll |
|----------|
1. | .0498491 |
2. | .0254232 |
3. | .0939889 |
4. | .1389028 |
5. | .1577491 |
|----------|
6. | .2298391 |
7. | .8884513 |
+----------+
restore
logit event d7 d8 d9 d10 d11 d12 pt, nocons
Iteration 0: log likelihood = -569.76698
Iteration 1: log likelihood = -334.87088
Iteration 2: log likelihood = -318.81582
Iteration 3: log likelihood = -317.36804
Iteration 4: log likelihood = -317.33096
Iteration 5: log likelihood = -317.33089
Logistic regression Number of obs = 822
LR chi2(7) = .
Log likelihood = -317.33089 Prob > chi2 = .
------------------------------------------------------------------------------
event | Coef. Std. Err. z P>|z| [95% Conf. Interval]
-------------+----------------------------------------------------------------
d7 | -2.994327 .3175088 -9.43 0.000 -3.616632 -2.372021
d8 | -3.700124 .4205614 -8.80 0.000 -4.524409 -2.875839
d9 | -2.281124 .2723919 -8.37 0.000 -2.815002 -1.747245
d10 | -1.822599 .2584613 -7.05 0.000 -2.329173 -1.316024
d11 | -1.654227 .2691057 -6.15 0.000 -2.181665 -1.12679
d12 | -1.179057 .2715801 -4.34 0.000 -1.711344 -.6467698
pt | .8736184 .2174075 4.02 0.000 .4475076 1.299729
------------------------------------------------------------------------------
dis -2*e(ll)
634.66178
mat b = e(b)'
drop _all
svmat b
gen logitest = 1/(1+exp(-b1))
list logitest
+----------+
| logitest |
|----------|
1. | .0476828 |
2. | .0241241 |
3. | .0926984 |
4. | .1391224 |
5. | .1605384 |
|----------|
6. | .2352218 |
7. | .705498 |
+----------+
Figure 12.4, page 432.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/depression_pp, clear
sort period pd
by period pd: egen pevent = mean(event)
quietly logit event one age_18 age_18sq age_18cub pd, nocons
predict p
twoway (scatter pevent period if pd==0 & pevent > 0) ///
(scatter pevent period if pd==1 & pevent > 0) ///
(line p period if pd==0) ///
(line p period if pd==1), ///
xtitle("Age") ytitle("Proportion with event") ///
legend(pos(10) ring(0) col(1) ///
label(1 "PD=0, observed") label(2 "PD=1, observed") label(3 "PD=0, model") label(4 "PD=1, model"))
gen logitpevent = logit(pevent)
gen logitpmodel = logit(p)
twoway (scatter logitpevent period if pd==0 & pevent > 0) ///
(scatter logitpevent period if pd==1 & pevent > 0) ///
(line logitpmodel period if pd==0) ///
(line logitpmodel period if pd==1), ///
xtitle("Age") ytitle("Logit(proportion with event)") ///
legend(pos(5) ring(0) col(2) ///
label(1 "PD=0, observed") label(2 "PD=1, observed") label(3 "PD=0, model") label(4 "PD=1, model"))
Figure 12.5, page 437.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/depression_pp, clear
quietly logit event one age_18 age_18sq age_18cub pd female, nocons
predict p12_5
collapse (mean) p12_5, by (period pd female)
twoway (line p12_5 period if pd==0 & female ==0) ///
(line p12_5 period if pd==1 & female ==0) ///
(line p12_5 period if pd==0 & female ==1) ///
(line p12_5 period if pd==1 & female ==1), ///
xtitle("Age") ytitle("Fitted Hazard") ///
legend(pos(5) ring(0) col(2) ///
label(1 "PD=0, male") label(2 "PD=1, male") label(3 "PD=0, female") label(4 "PD=1, female"))
sort female pd period
gen surv = 1
replace surv = (1-p12_5)*surv[_n-1] if period > 4
twoway (line surv period if pd==0 & female ==0) ///
(line surv period if pd==1 & female ==0) ///
(line surv period if pd==0 & female ==1) ///
(line surv period if pd==1 & female ==1), ///
xtitle("Age") ytitle("Fitted Survival") ///
legend(pos(7) ring(0) col(2) ///
label(1 "PD=0, male") label(2 "PD=1, male") label(3 "PD=0, female") label(4 "PD=1, female"))
Figure 12.6, page 445.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/firstarrest_pp, clear
sort period abused black
by period abused black: egen pevent = mean(event)
gen logitp = log(pevent/(1-pevent))
twoway (line logitp period if black==0 & abused==0) ///
(line logitp period if black==0 & abused==1), ///
title("White") xtitle("Age") ytitle("Sample logit(hazard)") ///
legend(pos(5) ring(0) col(1) ///
label(1 "not abused") label(2 "abused"))
twoway (line logitp period if black==1 & abused==0) ///
(line logitp period if black==1 & abused==1), ///
title("Black") xtitle("Age") ytitle("Sample logit(hazard)") ///
legend(pos(5) ring(0) col(1) ///
label(1 "not abused") label(2 "abused"))
quietly logit event d8 d9 d10 d11 d12 d13 d14 d15 d16 d17 d18 abused black ablack, nocons
predict p
gen fittedlogit = logit(p)
by period abused black: egen fitted = mean(fittedlogit)
twoway (line fitted period if black==0 & abused==0) ///
(line fitted period if black==0 & abused==1) ///
(line fitted period if black==1 & abused==0) ///
(line fitted period if black==1 & abused==1), ///
xtitle("Age") ytitle("Fitted logit(hazard)") ///
legend(pos(5) ring(0) col(1) ///
label(1 "not abused, white") label(2 "abused, white") ///
label(3 "not abused, black") label(4 "abused, black"))
Table 12.4, page 449.
/* Model A */
logit event one age_18 age_18sq age_18cub pd female nsibs, nocons
Logistic regression Number of obs = 36997
LR chi2(7) = .
Log likelihood = -2062.1428 Prob > chi2 = .
------------------------------------------------------------------------------
event | Coef. Std. Err. z P>|z| [95% Conf. Interval]
-------------+----------------------------------------------------------------
one | -4.358696 .1216028 -35.84 0.000 -4.597033 -4.120358
age_18 | .0610608 .0116617 5.24 0.000 .0382043 .0839172
age_18sq | -.0073084 .0012241 -5.97 0.000 -.0097075 -.0049093
age_18cub | .0001816 .000079 2.30 0.021 .0000268 .0003364
pd | .3726011 .1623821 2.29 0.022 .054338 .6908642
female | .5586864 .1094751 5.10 0.000 .3441191 .7732536
nsibs | -.0814109 .0222726 -3.66 0.000 -.1250643 -.0377574
------------------------------------------------------------------------------
dis -2*e(ll)
4124.2856
dis -2*e(ll)+2*e(df_m)
4138.2856
/* Model B */
logit event one age_18 age_18sq age_18cub pd female sibs12 sibs34 sibs56 sibs78 sibs9plus, nocons
Logistic regression Number of obs = 36997
LR chi2(11) = .
Log likelihood = -2058.991 Prob > chi2 = .
------------------------------------------------------------------------------
event | Coef. Std. Err. z P>|z| [95% Conf. Interval]
-------------+----------------------------------------------------------------
one | -4.500068 .2066902 -21.77 0.000 -4.905174 -4.094963
age_18 | .0614524 .0116639 5.27 0.000 .0385915 .0843134
age_18sq | -.0072894 .0012239 -5.96 0.000 -.0096882 -.0048906
age_18cub | .0001813 .000079 2.30 0.022 .0000265 .0003361
pd | .3727125 .1624852 2.29 0.022 .0542474 .6911776
female | .5595903 .1095306 5.11 0.000 .3449143 .7742664
sibs12 | .020851 .1976079 0.11 0.916 -.3664533 .4081553
sibs34 | .0107606 .2100347 0.05 0.959 -.4008999 .4224211
sibs56 | -.4942199 .2545432 -1.94 0.052 -.9931155 .0046757
sibs78 | -.7753991 .3437127 -2.26 0.024 -1.449064 -.1017346
sibs9plus | -.6584835 .3440501 -1.91 0.056 -1.332809 .0158424
------------------------------------------------------------------------------
dis -2*e(ll)
4117.982
dis -2*e(ll)+2*e(df_m)
4139.982
/* Model C */
logit event one age_18 age_18sq age_18cub pd female bigfamily, nocons
Logistic regression Number of obs = 36997
LR chi2(7) = .
Log likelihood = -2059.3892 Prob > chi2 = .
------------------------------------------------------------------------------
event | Coef. Std. Err. z P>|z| [95% Conf. Interval]
-------------+----------------------------------------------------------------
one | -4.482812 .1087126 -41.24 0.000 -4.695884 -4.269739
age_18 | .0614096 .0116632 5.27 0.000 .0385501 .0842691
age_18sq | -.0072913 .001224 -5.96 0.000 -.0096902 -.0048924
age_18cub | .0001815 .000079 2.30 0.022 .0000267 .0003363
pd | .3710316 .1622959 2.29 0.022 .0529375 .6891257
female | .5580501 .1094735 5.10 0.000 .343486 .7726142
bigfamily | -.6107821 .1445777 -4.22 0.000 -.8941491 -.327415
------------------------------------------------------------------------------
dis -2*e(ll)
4118.7784
dis -2*e(ll)+2*e(df_m)
4132.7784
Figure 12.8, page 458 and Table 12.5, page 459.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/mathdropout_pp, clear
/* Graph 1 */
sort period female
by period female: egen pevent = mean(event)
gen logitp = log(pevent/(1-pevent))
twoway (line logitp period if female==0) ///
(line logitp period if female==1), ///
title("Within-group sample hazard functions") xtitle("Term") ytitle("Sample logit(hazard)") ///
legend(pos(5) ring(0) col(1) ///
label(1 "male") label(2 "female"))
/* Model A and Graph 2 */
logit event hs11 hs12 coll1 coll2 coll3 female, nocons
Logistic regression Number of obs = 9558
LR chi2(6) = .
Log likelihood = -4902.1548 Prob > chi2 = .
------------------------------------------------------------------------------
event | Coef. Std. Err. z P>|z| [95% Conf. Interval]
-------------+----------------------------------------------------------------
hs11 | -2.130802 .0567383 -37.55 0.000 -2.242007 -2.019597
hs12 | -.9424815 .0478881 -19.68 0.000 -1.036341 -.8486226
coll1 | -1.449476 .0634496 -22.84 0.000 -1.573834 -1.325117
coll2 | -.6176489 .0757141 -8.16 0.000 -.7660459 -.469252
coll3 | -.7716024 .1428008 -5.40 0.000 -1.051487 -.491718
female | .378645 .0501482 7.55 0.000 .2803564 .4769336
------------------------------------------------------------------------------
predict pA
dis -2*e(ll)
9804.3096
dis -2*e(ll)+2*e(df_m)
9816.3096
gen logitpA = logit(pA)
twoway (line logitpA period if female==0) ///
(line logitpA period if female==1), ///
title("Model A: Main effect of female") xtitle("Term") ytitle("Fitted logit(hazard)") ///
legend(pos(5) ring(0) col(1) ///
label(1 "male") label(2 "female"))
/* Model B and Graph 3 */
logit event hs11 hs12 coll1 coll2 coll3 fhs11 fhs12 fcoll1 fcoll2 fcoll3, nocons
Logistic regression Number of obs = 9558
LR chi2(10) = .
Log likelihood = -4898.1351 Prob > chi2 = .
------------------------------------------------------------------------------
event | Coef. Std. Err. z P>|z| [95% Conf. Interval]
-------------+----------------------------------------------------------------
hs11 | -2.00767 .0714805 -28.09 0.000 -2.147769 -1.867571
hs12 | -.9642534 .0585445 -16.47 0.000 -1.078999 -.8495082
coll1 | -1.482402 .0847111 -17.50 0.000 -1.648432 -1.316371
coll2 | -.710011 .1007333 -7.05 0.000 -.9074447 -.5125773
coll3 | -.8690378 .1907714 -4.56 0.000 -1.242943 -.4951328
fhs11 | .1567951 .0977744 1.60 0.109 -.0348393 .3484295
fhs12 | .4186602 .0792373 5.28 0.000 .263358 .5739624
fcoll1 | .4406768 .1158028 3.81 0.000 .2137075 .6676461
fcoll2 | .5707489 .1445487 3.95 0.000 .2874387 .8540592
fcoll3 | .6007739 .2857317 2.10 0.036 .0407501 1.160798
------------------------------------------------------------------------------
predict pB
dis -2*e(ll)
9796.2702
dis -2*e(ll)+2*e(df_m)
9816.2702
gen logitpB = logit(pB)
twoway (line logitpB period if female==0) ///
(line logitpB period if female==1), ///
title("Model B: General Interaction") xtitle("Term") ytitle("Fitted logit(hazard)") ///
legend(pos(5) ring(0) col(1) ///
label(1 "male") label(2 "female"))
/* Model C and Graph 4 */
logit event hs11 hs12 coll1 coll2 coll3 female fltime, nocons
Logistic regression Number of obs = 9558
LR chi2(7) = .
Log likelihood = -4898.9033 Prob > chi2 = .
------------------------------------------------------------------------------
event | Coef. Std. Err. z P>|z| [95% Conf. Interval]
-------------+----------------------------------------------------------------
hs11 | -2.04592 .0646442 -31.65 0.000 -2.172621 -1.91922
hs12 | -.9255085 .0482123 -19.20 0.000 -1.020003 -.8310141
coll1 | -1.496554 .0664926 -22.51 0.000 -1.626877 -1.366231
coll2 | -.7178143 .0860535 -8.34 0.000 -.8864761 -.5491525
coll3 | -.9165523 .1556976 -5.89 0.000 -1.221714 -.6113906
female | .2274938 .0774452 2.94 0.003 .075704 .3792836
fltime | .1197699 .0469822 2.55 0.011 .0276865 .2118534
------------------------------------------------------------------------------
predict pC
dis -2*e(ll)
9797.8065
dis -2*e(ll)+2*e(df_m)
9811.8065
gen logitpC = logit(pC)
twoway (line logitpC period if female==0) ///
(line logitpC period if female==1), ///
title("Model C: Female-time interaction") xtitle("Term") ytitle("Fitted logit(hazard)") ///
legend(pos(5) ring(0) col(1) ///
label(1 "male") label(2 "female"))
Table 12.6, page 465.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/firstsex_pp, clear
logit event d7 d8 d9 d10 d11 d12 pt pas, nocons
predict p
gen dev = -sqrt(-2*log(1-p))
replace dev = sqrt(-2*log(p)) if event == 1
keep if (id==22) | (id==112) | (id==166) | (id==89) | (id==102) | (id==87) | (id==67) | (id==212)
sort id
by id: egen pt_max = max(pt)
by id: egen pas_max = max(pas)
by id: egen grade = max(period)
by id: egen censor = max(event)
drop pt pas event event d7-d12 p
replace censor = abs(censor - 1)
reshape wide dev, i(id) j(period)
gen ssdev = max(dev7^2, 0) + max(dev8^2, 0) + max(dev9^2, 0) + max(dev10^2, 0) + max(dev11^2, 0) + max(dev12^2, 0)
list, table
+------------------------------------------------------------------------------------------------------------------------------+
| id dev7 dev8 dev9 dev10 dev11 dev12 pt_max pas_max grade censor ssdev |
|------------------------------------------------------------------------------------------------------------------------------|
1. | 22 -.4117311 -.2944394 -.5840394 -.7176492 -.7747741 1.414463 1 -.6496482 12 0 3.713321 |
2. | 67 -.6180257 -.4476514 -.8559141 -1.029431 -1.100732 1.042969 1 2.274652 12 0 4.674059 |
3. | 87 1.81763 . . . . . 1 2.677901 7 0 3.303778 |
4. | 89 -.3248377 -.2313901 -.4644566 -.5751775 1.862369 . 0 -.0751572 11 0 4.174028 |
5. | 102 -.491339 2.369482 . . . . 1 .6049317 8 0 5.855859 |
|------------------------------------------------------------------------------------------------------------------------------|
6. | 112 -.4110716 -.2939581 -.5831423 -.716592 -.7736556 -.9562858 1 -.6609311 12 1 2.621976 |
7. | 166 -.6614533 -.4806816 -.910823 -1.090311 1.191398 . 1 2.781413 11 0 4.106381 |
8. | 212 -.2857033 -.2032141 -.409755 -.5090181 -.5524318 -.6958426 0 -.9617912 12 1 1.339299 |
+------------------------------------------------------------------------------------------------------------------------------+
Figure 12.9, page 467.
use https://stats.idre.ucla.edu/stat/stata/examples/alda/data/firstsex_pp, clear
logit event d7 d8 d9 d10 d11 d12 pt pas, nocons
predict p
gen dev = -sqrt(-2*log(1-p))
replace dev = sqrt(-2*log(p)) if event == 1
twoway scatter dev id, ///
xtitle("ID") ytitle("Deviance residual")
gen devsq = dev^2
sort id
by id: egen ssdev = sum(devsq)
twoway scatter ssdev id, ///
xtitle("ID") ytitle("SS deviance residual")
