#regression equation on p. 122. #first generating ffev1a = ffev1/100. lung$FFEV1a <- lung$FFEV1/100 coef(lm(FFEV1a ~ FHEIGHT, lung)) #summary stats, middle p. 122. summary(lung[, c("FAGE", "FHEIGHT", "FFEV1a")]) # regression equation, p. 122. # we also store the model for use later coef(m <- lm(FFEV1a ~ FAGE + FHEIGHT, lung)) #covariance matrix, p. 127. cov(lung[, c("FAGE", "FHEIGHT", "FWEIGHT", "FFEV1a")]) cor(lung[, c("FAGE", "FHEIGHT", "FWEIGHT", "FFEV1a")]) #table 7.2, p. 132. anova(m) #table 7.5. # drop made up FFEV1a variable lung <- lung[-which(colnames(lung) == "FFEV1a")] # reshape data from wide to long # this is a bit complicated because the repeated variables # were not adjacent in the data frame, instead, we use grep() # to search for the column numbers matching our search string lung.long <- reshape(lung, direction = "long", varying = list( grep("SEX", colnames(lung)), grep("AGE", colnames(lung)), grep("HEIGHT", colnames(lung)), grep("WEIGHT", colnames(lung)), grep("FVC", colnames(lung)), grep("FEV", colnames(lung))), v.names = c("SEX", "AGE", "HEIGHT", "WEIGHT", "FVC", "FEV"), timevar = "Member", idvar = "ID", times = c("Father", "Mother", "OldestChild", "MiddleChild", "YoungestChild")) lung.long$FEV1a <- lung.long$FEV/100 # subset data to fathers and mothers only lung.sub <- subset(lung.long, Member == "Father" | Member == "Mother") # to make nice output for the table, we will write a little function f <- function(x) { mu <- sapply(x, mean) sigma <- sapply(x, sd) m <- lm(FEV1a ~ AGE + HEIGHT, data = x) data.frame(Mean = mu, Sx = sigma, coef(summary(m))[, 1:2], row.names = c("FFEV1a/Intercept", "AGE", "HEIGHT")) } # overall f(lung.sub[, c("FEV1a", "AGE", "HEIGHT")]) ## separate by member (father/mother) by(lung.sub[, c("FEV1a", "AGE", "HEIGHT")], lung.sub$Member, f) # Page 147 top of the page # first we convert member to a factor with Mother as the first level # and Father as the second, the test the HEIGHT x Member interaction lung.sub$Member <- factor(lung.sub$Member, levels = c("Mother", "Father")) coef(summary(lm(FEV1a ~ AGE + HEIGHT * Member, data = lung.sub)))