R Textbook Examples Applied Longitudinal Data Analysis: Modeling Change and Event Occurrence by Judith D. Singer and John B. Willett Chapter 2: Exploring Longitudinal Data on Change
The comma separated text files linked on the main page have capitalized variable names. On this page the variable names are all lower case. For the appropriate lower case versions use tolerance1.txt and tolerance1_pp.txt.
Fig. 2.1, p. 18.
Inputting and printing the tolerance “person-level” data
tolerance <- read.table("https://stats.idre.ucla.edu/wp-content/uploads/2016/02/tolerance1.txt", sep=",", header=T)
print(tolerance)
id tol11 tol12 tol13 tol14 tol15 male exposure
1 9 2.23 1.79 1.90 2.12 2.66 0 1.54
2 45 1.12 1.45 1.45 1.45 1.99 1 1.16
3 268 1.45 1.34 1.99 1.79 1.34 1 0.90
4 314 1.22 1.22 1.55 1.12 1.12 0 0.81
5 442 1.45 1.99 1.45 1.67 1.90 0 1.13
6 514 1.34 1.67 2.23 2.12 2.44 1 0.90
7 569 1.79 1.90 1.90 1.99 1.99 0 1.99
8 624 1.12 1.12 1.22 1.12 1.22 1 0.98
9 723 1.22 1.34 1.12 1.00 1.12 0 0.81
10 918 1.00 1.00 1.22 1.99 1.22 0 1.21
11 949 1.99 1.55 1.12 1.45 1.55 1 0.93
12 978 1.22 1.34 2.12 3.46 3.32 1 1.59
13 1105 1.34 1.90 1.99 1.90 2.12 1 1.38
14 1542 1.22 1.22 1.99 1.79 2.12 0 1.44
15 1552 1.00 1.12 2.23 1.55 1.55 0 1.04
16 1653 1.11 1.11 1.34 1.55 2.12 0 1.25
Inputting and printing the tolerance “person-level” data
tolerance.pp <- read.table("https://stats.idre.ucla.edu/wp-content/uploads/2016/02/tolerance1_pp.txt", sep=",", header=T)
attach(tolerance.pp)
print(tolerance.pp[c(1:9, 76:80), ])
id age tolerance male exposure time
1 9 11 2.23 0 1.54 0
2 9 12 1.79 0 1.54 1
3 9 13 1.90 0 1.54 2
4 9 14 2.12 0 1.54 3
5 9 15 2.66 0 1.54 4
6 45 11 1.12 1 1.16 0
7 45 12 1.45 1 1.16 1
8 45 13 1.45 1 1.16 2
9 45 14 1.45 1 1.16 3
76 1653 11 1.11 0 1.25 0
77 1653 12 1.11 0 1.25 1
78 1653 13 1.34 0 1.25 2
79 1653 14 1.55 0 1.25 3
80 1653 15 2.12 0 1.25 4
Table 2.1, p. 20. Bivariate correlation among tolerance scores assessed on five occassions.
cor(tolerance[ , 2:6])
tol11 tol12 tol13 tol14 tol15
tol11 1.00000000 0.6572937 0.06194915 0.1407631 0.2635371
tol12 0.65729370 1.0000000 0.24755116 0.2056198 0.3922781
tol13 0.06194915 0.2475512 1.00000000 0.5871742 0.5692116
tol14 0.14076312 0.2056198 0.58717422 1.0000000 0.8254566
tol15 0.26353705 0.3922781 0.56921163 0.8254566 1.0000000
Fig. 2.2, p. 25. Empirical growth plots.
library(lattice) # load lattice library for xyplot function xyplot(tolerance ~ age | id, data=tolerance.pp, as.table=T)

Fig. 2.3, p. 27. Smooth nonparametric trajectories superimposed on empirical growth plots.
xyplot(tolerance~age | id, data=tolerance.pp,
prepanel = function(x, y) prepanel.loess(x, y, family="gaussian"),
xlab = "Age", ylab = "Tolerance",
panel = function(x, y) {
panel.xyplot(x, y)
panel.loess(x,y, family="gaussian") },
ylim=c(0, 4), as.table=T)
Table 2.2, p. 30. Fitting separate within person OLS regression models.
by(tolerance.pp, id, function(x) summary(lm(tolerance ~ time, data=x)))
tolerance.pp$id: 9
Call:
lm(formula = tolerance ~ time, data = x)
Residuals:
1 2 3 4 5
0.328 -0.231 -0.240 -0.139 0.282
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.9020 0.2519 7.549 0.00482 **
time 0.1190 0.1029 1.157 0.33105
---
Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
Residual standard error: 0.3253 on 3 degrees of freedom
Multiple R-Squared: 0.3085, Adjusted R-squared: 0.07802
F-statistic: 1.339 on 1 and 3 DF, p-value: 0.3311
------------------------------------------------------------------------------
<some output omitted>
------------------------------------------------------------------------------
tolerance.pp$id: 1653
Call:
lm(formula = tolerance ~ time, data = x)
Residuals:
76 77 78 79 80
0.156 -0.090 -0.106 -0.142 0.182
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.95400 0.13926 6.851 0.00637 **
time 0.24600 0.05685 4.327 0.02276 *
---
Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
Residual standard error: 0.1798 on 3 degrees of freedom
Multiple R-Squared: 0.8619, Adjusted R-squared: 0.8159
F-statistic: 18.72 on 1 and 3 DF, p-value: 0.02276
Fig. 2.4, p. 31.
# stem plot for the intercepts
int <- by(tolerance.pp, id, function(data)
coefficients(lm(tolerance ~ time, data = data))[[1]])
int <- unlist(int)
names(int) <- NULL
summary(int)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.954 1.138 1.287 1.358 1.547 1.902
stem(int, scale=2)
Decimal point is 1 place to the left of the colon
9 : 5
10 : 03
11 : 2489
12 : 7
13 : 1
14 : 3
15 : 448
16 :
17 : 3
18 : 2
19 : 0
# stem plot for fitted rate of change
rate <- by(tolerance.pp, id, function(data)
coefficients(lm(tolerance ~ time, data = data))[[2]])
rate <- unlist(rate)
names(rate) <- NULL
summary(rate)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.09800 0.02225 0.13100 0.13080 0.18980 0.63200
stem(rate, scale=2)
Decimal point is 1 place to the left of the colon
-1 | 0
-0 | 53
0 | 2256
1 | 24567
2 | 457
3 |
4 |
5 |
6 | 3
# stem plot for R sq
rsq <- by(tolerance.pp, id, function(data)
summary(lm(tolerance ~ time, data = data))$r.squared)
rsq <- unlist(rsq)
names(rsq) <- NULL
summary(rsq)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.01537 0.25050 0.39170 0.49100 0.79710 0.88640
stem(rsq, scale=2)
Decimal point is 1 place to the left of the colon
0 : 27
1 : 3
2 : 55
3 : 113
4 : 5
5 :
6 : 8
7 : 78
8 : 6889
Fig. 2.5, p. 32. Fitted OLS trajectories superimposed on the empirical growth plots.
xyplot(tolerance ~ age | id, data=tolerance.pp,
panel = function(x, y){
panel.xyplot(x, y)
panel.lmline(x, y)
}, ylim=c(0, 4), as.table=T)
Fig. 2.6, p. 34. The collection of the trajectories of the raw data in the top panel; in the bottom panel is the collection of fitted OLS trajectories.
#plot of the raw data interaction.plot(tolerance.pp$age, tolerance.pp$id, tolerance.pp$tolerance)
# fitting the linear model by id fit <- by(tolerance.pp, id, function(bydata) fitted.values(lm(tolerance ~ time, data=bydata))) fit <- unlist(fit) # plotting the linear fit by id interaction.plot(age, id, fit, xlab="age", ylab="tolerance")
Table 2.3, p. 37 Descriptive statistics of the estimates obtained by fitting the linear model by id.
#obtaining the intercepts from linear model by id
ints <- by(tolerance.pp, tolerance.pp$id,
function(data) coefficients(lm(tolerance ~ time, data=data))[[1]])
ints1 <- unlist(ints)
names(ints1) <- NULL
mean(ints1)
[1] 1.35775
sqrt(var(ints1))
[1] 0.2977792
#obtaining the slopes from linear model by id
slopes <- by(tolerance.pp, tolerance.pp$id,
function(data) coefficients(lm(tolerance ~ time, data=data))[[2]])
slopes1 <- unlist(slopes)
names(slopes1) <- NULL
mean(slopes1)
[1] 0.1308125
sqrt(var(slopes1))
[1] 0.172296
cor( ints1, slopes1)
[1] -0.4481135
Fig. 2.7, 38 OLS fitted trajectories separated by levels of selected predictors. The two first panels are separated by gender; the two last are separated by levels of exposure.
# fitting the linear model by id, males only tolm <- tolerance.pp[male == 1 , ] fitmlist <- by(tolm, tolm$id, function(bydata) fitted.values(lm(tolerance ~ time, data=bydata))) fitm <- unlist(fitmlist) #appending the average for the whole group lm.m <- fitted( lm(tolerance ~ time, data=tolm) ) names(lm.m) <- NULL fit.m2 <- c(fitm, lm.m[1:5]) age.m <- c(tolm$age, seq(11,15)) id.m <- c(tolm$id, rep(111, 5)) #plotting the linear fit by id, males #id.m=111 denotes the average value for males interaction.plot(age.m, id.m, fit.m2, ylim=c(0, 4), xlab="AGE", ylab="TOLERANCE", lwd=1) title(main="Males")
#fitting the linear model by id, females only tol.pp.fm <- tolerance.pp[tolerance.pp$male == 0, ] fit.fm <- by(tol.pp.fm, tol.pp.fm$id, function(data) fitted.values(lm(tolerance ~ time, data=data))) fit.fm1 <- unlist(fit.fm) names(fit.fm1) <- NULL #appending the average for the whole group lm.fm <- fitted( lm(tolerance ~ time, data=tol.pp.fm) ) names(lm.fm) <- NULL fit.fm2 <- c(fit.fm1, lm.fm[1:5]) age.fm <- c(tol.pp.fm$age, seq(11,15)) id.fm <- c(tol.pp.fm$id, rep(1111, 5)) #plotting the linear fit by id, females #id.fm=1111 denotes the average for females interaction.plot(age.fm, id.fm, fit.fm2, ylim=c(0, 4), xlab="AGE", ylab="TOLERANCE", lwd=1) title(main="Females")
#We can include both plot in the same graph windows.options(width = 12, height = 5) par(mfrow = c(1, 2), mar = c(4, 4, 2, 1)) interaction.plot(age.m, id.m, fit.m2, ylim=c(0, 4), xlab="AGE", ylab="TOLERANCE", lwd=1) title(main="Males") interaction.plot(age.fm, id.fm, fit.fm2, ylim=c(0, 4), xlab="AGE", ylab="TOLERANCE", lwd=1) title(main="Females")
windows.options(reset = TRUE)
#fitting the linear model by id, low exposure
tol.pp.low <- tolerance.pp[tolerance.pp$exposure < 1.145 , ]
fit.low <- by(tol.pp.low, tol.pp.low$id,
function(data) fitted.values(lm(tolerance ~ time, data=data)))
fit.low1 <- unlist(fit.low)
names(fit.low1) <- NULL
#appending the average for the whole group
lm.low <- fitted( lm(tolerance ~ time, data=tol.pp.low) )
names(lm.low) <- NULL
fit.low2 <- c(fit.low1, lm.low[1:5])
age.low <- c(tol.pp.low$age, seq(11,15))
id.low <- c(tol.pp.low$id, rep(1, 5))
#plotting the linear fit by id, low Exposure
#id.low=1 denotes the average for group
interaction.plot(age.low, id.low, fit.low2, ylim=c(0, 4), xlab="AGE", ylab="TOLERANCE", lwd=1)
title(main="Low Exposure")
#fitting the linear model by id, high exposure
tol.pp.hi <- tolerance.pp[tolerance.pp$exposure >= 1.145 , ]
fit.hi <- by(tol.pp.hi, tol.pp.hi$id,
function(data) fitted.values(lm(tolerance ~ time, data=data)))
fit.hi1 <- unlist(fit.hi)
names(fit.hi1) <- NULL
#appending the average for the whole group
lm.hi <- fitted( lm(tolerance ~ time, data=tol.pp.hi) )
names(lm.hi) <- NULL
fit.hi2 <- c(fit.hi1, lm.hi[1:5])
age.hi <- c(tol.pp.hi$age, seq(11,15))
id.hi <- c(tol.pp.hi$id, rep(1, 5))
#plotting the linear fit by id, High Exposure
#id.hi=1 denotes the average for group
interaction.plot(age.hi, id.hi, fit.hi2, ylim=c(0, 4), xlab="AGE", ylab="TOLERANCE", lwd=1)
title(main="High Exposure")
Fig. 2.8, p. 40. OLS estimates plotted against the predictors male and exposure.
#Using the slopes and intercepts from the linear model fitted by id
#generated for use in table 2.3
plot(tolerance$male, ints1, xlab="Male", ylab="Fitted initial status",
xlim=c(0, 1), ylim=c(0.5, 2.5))
cor(tolerance$male, ints1)
[1] 0.008630119
plot(tolerance$exposure, ints1, xlab="Exposure", ylab="Fitted initial status",
xlim=c(0, 2.5), ylim=c(0.5, 2.5))
cor(tolerance$exposure, ints1)
[1] 0.2324426
plot(tolerance$male, slopes1, xlab="Male", ylab="Fitted rate of change",
xlim=c(0, 1), ylim=c(-0.4, .8))
cor(tolerance$male, slopes1) [1] 0.1935703 plot(tolerance$exposure, slopes1, xlab = "Exposure", ylab = "Fitted rate of change", xlim = c(0, 2.5), ylim = c(-0.2, 0.8))
cor(tolerance$exposure, slopes1) [1] 0.4420925













