Applied Longitudinal Data Analysis, Chapter 10

R Textbook Examples Applied Longitudinal Data Analysis: Modeling Change and Event Occurrence by Judith D. Singer and John B. Willett Chapter 10: Describing Discrete-time Event Occurrence Data

The examples on the page were written in R version 2.4.1. This page requires package survival.

library(survival)

Table 10.1 on page 327.

teachers<-read.table("https://stats.idre.ucla.edu/stat/examples/alda/teachers.csv", sep=",", header=T)
ts <- survfit( Surv(t, 1-censor)~ 1, conf.type="none", data=teachers)
h<-ts$n.event/ts$n.risk
nlost<-ts$n.risk-ts$n.event- ts$n.risk[-1]
nlost[12] = ts$n.risk[12]-ts$n.event[12]
tab10.1<-cbind(time=ts$time, risk=ts$n.risk, left=ts$n.event, censored=nlost, hazard=h, survival=ts$surv)
tab10.1

      time risk left censored     hazard  survival
 [1,]    1 3941  456        0 0.11570667 0.8842933
 [2,]    2 3485  384        0 0.11018651 0.7868561
 [3,]    3 3101  359        0 0.11576911 0.6957625
 [4,]    4 2742  295        0 0.10758570 0.6209084
 [5,]    5 2447  218        0 0.08908868 0.5655925
 [6,]    6 2229  184        0 0.08254823 0.5189038
 [7,]    7 2045  123      280 0.06014670 0.4876935
 [8,]    8 1642   79      307 0.04811206 0.4642295
 [9,]    9 1256   53      255 0.04219745 0.4446402
[10,]   10  948   35      265 0.03691983 0.4282242
[11,]   11  648   16      241 0.02469136 0.4176508
[12,]   12  391    5      386 0.01278772 0.4123100

Figure 10.1, page 333.

plot(ts$time, h, type="l", ylab="Estimated hazard probability", xlab="years in teaching", 
     ylim=c(0, .15), xlim=c(0, 13))
text(8, .08, "All teachers", adj=0)

plot(ts$time, ts$surv, type="l", ylab="Estimated Survival Probability", xlab="years in teaching", 
     ylim=c(0, 1.), xlim=c(0, 13))
abline(h=c(.5), lty=2)
abline(v=c(6.6), lty=2)
text(8, .6, "All teachers(6.6 years)", adj=0, cex=.7)

Fig. 10.2, page 340

par(mfrow=c(1, 2), cex=0.7)

# Panel A

cocaine<-read.table("https://stats.idre.ucla.edu/stat/examples/alda/cocaine_relapse.csv", sep=",", header=T)
ts <- survfit( Surv(week, 1-censor)~ 1, conf.type="none", data=cocaine)
h<-ts$n.event/ts$n.risk

plot(ts$time, h, type="l", ylab=" ", main="Estimated Hazard Function", xlim=c(0, 13))
plot(ts$time, ts$surv, type="l", ylab=" ", main="Estimated Survival Function", xlim=c(0, 13))
abline(h=c(.5), lty=2)
abline(v=c(7.6), lty=2)

Image ch10.h1

# panel B

firstsex<-read.table("https://stats.idre.ucla.edu/stat/examples/alda/firstsex.csv", sep=",", header=T)
ts <- survfit( Surv(time, 1-censor)~ 1, conf.type="none", data=firstsex)
h<-ts$n.event/ts$n.risk

plot(ts$time, h, type="l", ylab=" ", main="Estimated Hazard Function", xlim=c(6, 13))
plot(ts$time, ts$surv, type="l", ylab=" ", main="Estimated Survival Function",  xlim=c(6, 13))
abline(h=c(.5), lty=2)
abline(v=c(10.6), lty=2)

Image ch10.h5

# panel C
rm(list=ls())
suicide<-read.table("https://stats.idre.ucla.edu/stat/examples/alda/suicide.csv", sep=",", header=T)
ts <- survfit( Surv(time, 1-censor)~ 1, conf.type="none", data=suicide)
h<-ts$n.event/ts$n.risk

plot(ts$time, h, type="l", ylab=" ", main="Estimated Hazard Function", xlim=c(4, 24))
plot(ts$time, ts$surv, type="l", ylab=" ", main="Estimated Survival Function",  xlim=c(4, 24))
abline(h=c(.5), lty=2)
abline(v=c(14.8), lty=2)

Image ch10.h6

# panel D
congress<-read.table("https://stats.idre.ucla.edu/stat/examples/alda/congress.csv", sep=",", header=T)
ts <- survfit( Surv(time, 1-censor)~ 1, conf.type="none", data=congress)
h<-ts$n.event/ts$n.risk

plot(ts$time, h, type="l", ylab=" ", main="Estimated Hazard Function", xlim=c(0,8))
plot(ts$time, ts$surv, type="l", ylab=" ", main="Estimated Survival Function",  xlim=c(0,8))
abline(h=c(.5), lty=2)
abline(v=c(3.5), lty=2)

Image ch10.h7

Table 10.2 on page 349. The middle column for the survival function is based on the formula (10.8) on page 350.

rm(list=ls())
teachers<-read.table("https://stats.idre.ucla.edu/stat/examples/alda/teachers.csv", sep=",", header=T)
ts <- summary(survfit( Surv(t, 1-censor)~ 1, conf.type="none", error="greenwood", data=teachers))
h<-ts$n.event/ts$n.risk
se.h<-sqrt(h*(1-h)/ts$n.risk)
s.sqrt<-(ts$std.err)^2/(ts$surv)^2
tab10.2<-cbind(t=ts$time, n=ts$n.risk, hazard = h, se.h, survival=ts$surv, s.sqrt, stderr=ts$std.err)
tab10.2

       t    n     hazard        se.h  survival       s.sqrt      stderr
 [1,]  1 3941 0.11570667 0.005095352 0.8842933 3.320134e-05 0.005095352
 [2,]  2 3485 0.11018651 0.005304108 0.7868561 6.873392e-05 0.006523503
 [3,]  3 3101 0.11576911 0.005745506 0.6957625 1.109546e-04 0.007328813
 [4,]  4 2742 0.10758570 0.005917344 0.6209084 1.549210e-04 0.007728276
 [5,]  5 2447 0.08908868 0.005758804 0.5655925 1.948890e-04 0.007895820
 [6,]  6 2229 0.08254823 0.005828952 0.5189038 2.352549e-04 0.007958957
 [7,]  7 2045 0.06014670 0.005257621 0.4876935 2.665487e-04 0.007962239
 [8,]  8 1642 0.04811206 0.005281208 0.4642295 2.973305e-04 0.008004838
 [9,]  9 1256 0.04219745 0.005672654 0.4446402 3.324074e-04 0.008106700
[10,] 10  948 0.03691983 0.006124306 0.4282242 3.728453e-04 0.008268668
[11,] 11  648 0.02469136 0.006096155 0.4176508 4.119139e-04 0.008476499
[12,] 12  391 0.01278772 0.005682161 0.4123100 4.450427e-04 0.008698106

Figure 10.4, page 353

Demonstrating the difference between the person-oriented data set teachers and the person-period data set teachers_pp.

teachers<-read.table("https://stats.idre.ucla.edu/stat/examples/alda/teachers.csv", sep=",", header=T)
teachers[teachers$id==20 | teachers$id == 126 | teachers$id ==129, ]
     id  t censor
19   20  3      0
125 126 12      0
128 129 12      1

To go from person-level/person-oriented data to person-period data, also see our FAQ page.

rm(list=ls())
teachers.pp<-read.table("https://stats.idre.ucla.edu/stat/examples/alda/teachers_pp.csv", sep=",", header=T)
t<-subset(teachers.pp, id %in% c(20, 126, 129))
t
     id period event
95   20      1     0
96   20      2     0
97   20      3     1
740 126      1     0
741 126      2     0
742 126      3     0
743 126      4     0
744 126      5     0
745 126      6     0
746 126      7     0
747 126      8     0
748 126      9     0
749 126     10     0
750 126     11     0
751 126     12     1
760 129      1     0
761 129      2     0
762 129      3     0
763 129      4     0
764 129      5     0
765 129      6     0
766 129      7     0
767 129      8     0
768 129      9     0
769 129     10     0
770 129     11     0
771 129     12     0

Table 10.3, page 355

Cross-tabulation of event indicator (event) and time-period indicator (period) in the person-period data set to yield components of the life table.

rm(list=ls())
teachers.pp<-read.table("https://stats.idre.ucla.edu/stat/examples/alda/teachers_pp.csv", sep=",", header=T)
attach(teachers.pp)
temp<-data.frame(table(period, event))
event0<-subset(temp, event==0)
event1<-subset(temp, event==1)
total<-event0$Freq + event1$Freq
proportion<-event1$Freq/total

tab10.3<-cbind(period=event0$period, event0=event0$Freq, event1=event1$Freq, total, proportion)
tab10.3

      period event0 event1 total proportion
 [1,]      1   3485    456  3941 0.11570667
 [2,]      2   3101    384  3485 0.11018651
 [3,]      3   2742    359  3101 0.11576911
 [4,]      4   2447    295  2742 0.10758570
 [5,]      5   2229    218  2447 0.08908868
 [6,]      6   2045    184  2229 0.08254823
 [7,]      7   1922    123  2045 0.06014670
 [8,]      8   1563     79  1642 0.04811206
 [9,]      9   1203     53  1256 0.04219745
[10,]     10    913     35   948 0.03691983
[11,]     11    632     16   648 0.02469136
[12,]     12    386      5   391 0.01278772