library(emmeans) library(ggplot2) # read in data mouse <- read.csv("https://stats.oarc.ucla.edu/wp-content/uploads/2023/03/mouse.csv") # display first few rows head(mouse) ### REVIEW OF REGRESSION ### # simple linear regression m_age <- lm(weight ~ age, data=mouse) # regression tabl summary(m_age) # dummy variable for being female dummyvars1 <- data.frame(sex=c("male", "female", "female", "male"), female=c(0, 1, 1, 0)) #dummy variable for diet dummyvars2 <- data.frame(diet=c("fat", "starch", "starch", "control", "fat"), fat=c(1, 0, 0, 0, 1), starch=c(0, 1, 1, 0, 0)) # character vector income <- c("low", "mid", "high", "high", "low", "mid") # convert it to factor f_income <- factor(income) # distinct categories, in order (first value is 1, second values is 2, ...) levels(f_income) # specifying the order of levels f_income <- factor(income, levels=c("low", "mid", "high")) levels(f_income) # underlying integer values for factor as.numeric(f_income) # 0=low, 1=mid, 2=high num_income <- c(0, 1, 2, 2, 0, 1) f_num_income <- factor(num_income, labels=c("low", "mid", "high")) # labels become the levels levels(f_num_income) # the variable now displays the labels/levels f_num_income # convert mouse vars to factors mouse$sex <- factor(mouse$sex, levels=c("male", "female")) # male is reference mouse$diet <- factor(mouse$diet, levels=c("control", "fat", "starch")) # control is reference str(mouse) # regression with a categorical predictor m_diet <- lm(weight ~ diet, data=mouse) summary(m_diet) # main effects model m_me <- lm(weight ~ sex + diet + age, data=mouse) summary(m_me) ### THE emmeans PACKAGE ### # EMMs for each diet level, averaging across levels of sex and at the mean age emm_me <- emmeans(m_me, specs=c("diet")) emm_me # display the EMMs # class of object created by emmeans class(emm_me) # EMMs for each crossing of diet and sex emm_me2 <- emmeans(m_me, specs=c("diet", "sex")) emm_me2 ## # when averaging over sex for these EMMs, use the frequencies of sex ## # in the data to weight ## emm_me <- emmeans(m_me, specs=c("diet"), weights="proportional") ## # not run, because all factors are balanced in the data # effects of diet contrast(emm_me, method="pairwise", simple="diet") # notice p-val adjustment contrast(emm_me, method="pairwise", simple="diet") # no p-value adjustment contrast(emm_me, method="pairwise", simple="diet", adjust="none") # slopes of age across diets emtrends(m_me, specs="diet", var="age") # graphs of diet effect for each sex # diet is on the x-axis # different lines/colors by sex # 95% confidence intervals emmip(emm_me2, sex ~ diet, CIs=TRUE) # model object is first argument to visualize slopes # age on x-axis # separate lines by diet # at= specifies we want estimates at ages 2, 9, and 16 # 95% CIs emmip(m_me, diet ~ age, at=list(age=c(2,9,16)), CIs=TRUE) # now we estimate at each integer between 2 and 16 for age emmip(m_me, diet ~ age, at=list(age=2:16), CIs=TRUE) # now we estimate at each integer between 2 and 16 for age # we also change the label for the y-axis emmip(m_me, diet ~ age, at=list(age=seq(2,16,0.2)), CIs=TRUE, ylab="predicted weight (g)") # using ggplot2 code to modify emmip graph emmip(m_me, diet ~ age, at=list(age=seq(2,16,0.2)), CIs=TRUE) + theme_classic() + scale_color_manual(values=c("orange", "brown", "purple")) + labs(x="diet", y="predicted weight (g)") # can also use emmip xlab=, ylab= # saving emmip plot data to data set plotdata <- emmip(m_me, diet ~ age, at=list(age=seq(2,16,0.2)), CIs=TRUE, plotit=FALSE) head(plotdata) # plotting with CI bands ggplot(plotdata, aes(x=age, y=yvar, color=diet, group=diet)) + geom_line() + geom_ribbon(aes(ymin=LCL, ymax=UCL, fill=diet), alpha=.5) ### INTERACTIONS AND MODERATIONS MODELS ### # equivalent interaction notation in R m_int1 <- lm(weight ~ diet + sex + diet:sex, data=mouse) m_int2 <- lm(weight ~ diet*sex, data=mouse) ### INTERACTION OF TWO CONTINUOUS VARIABLES ### # interaction of age and litter m_agelitter <- lm(weight ~ age*litter + sex, data=mouse) summary(m_agelitter) # simples slopes of age at litter=2,6,10 age_by_litter <- emtrends(m_agelitter, # model specs="litter", # moderator var="age", # simple effects we want at=list(litter=c(2, 6, 10))) # moderator values age_by_litter # p-values for age slopes test(age_by_litter) # graph of age slopes by litter values emmip(m_agelitter, litter ~ age, at=list(age=2:16, litter=c(2,6,10)), CIs=TRUE) ### INTERACTION OF TWO CATEGORICAL VARIABLES ### # interaction of diet and sex m_dietsex <- lm(weight ~ diet*sex + age, data=mouse) summary(m_dietsex) # EMMs for diet*sex interaction emm_dietsex <- emmeans(m_dietsex, specs=c("diet", "sex")) emm_dietsex # simple effects of diet by sex contrast(emm_dietsex, method="pairwise", simple="diet") # 95% CI for simple effects of diet by sex confint(contrast(emm_dietsex, method="pairwise", simple="diet")) # simple effects of sex by diet contrast(emm_dietsex, method="pairwise", simple="sex") # interaction contrasts contrast(emm_dietsex, method="pairwise", interaction=TRUE) # diet mapped to x-axis, separate lines/colors by sex, with 95% CIs emmip(emm_dietsex, sex ~ diet, CI=TRUE) # sex mapped to x-axis, separate lines/colors by diet, with 95% CIs emmip(emm_dietsex, diet ~ sex, CI=TRUE) ### INTERACTION OF A CATEGORICAL AND CONTINUOUS VARIABLE ### # interaction of diet and age m_dietage <- lm(weight ~ diet*age + litter, data=mouse) summary(m_dietage) # simple slopes of age by diet emtrends(m_dietage, specs=c("diet"), var="age") # p-values of slopes of age by diet test(emtrends(m_dietage, specs=c("diet"), var="age")) # EMMs for each diet at ages 2, 9, and 16 emm_dietage <- emmeans(m_dietage, specs=c("diet", "age"), at=list(age=c(2,9,16))) emm_dietage # simple effects of diet by age contrast(emm_dietage, method="pairwise", simple="diet") # slopes of age from 2 to 16 weeks for each diet emmip(m_dietage, diet ~ age, at=list(age=2:16), CIs=TRUE) # diet effects at ages 2 6 and 9 (specified in emmeans) emmip(emm_dietage, age ~ diet) ## # not run, same as above ## emmip(m_dietage, age ~ diet, at=list(age=c(2,9,16))) ### INTERACTIONS IN GENERALIZED LINEAR MODELS ### # logistic regression m_logistic <- glm(hyper ~ age, data=mouse, family="binomial") # log-odds of hypertension vs age emmip(m_logistic, ~ age, at=list(age=2:16)) # probability of hypertension vs age emmip(m_logistic, ~ age, at=list(age=2:16), type="response") # probability of hypertension vs extended ages; notice the S-shape emmip(m_logistic, ~ age, at=list(age=0:50), type="response") # logistic regression with interaction of diet and age m_hyper <- glm(hyper ~ diet*age + litter, data=mouse, family="binomial") summary(m_hyper) # exponentiating coefficients for odds ratios exp(coef(m_hyper)) # EMMs for diet*age expressed as log-odds emm_hyper_logodds <- emmeans(m_hyper, specs=c("diet", "age"), at=list(age=c(2,9,16))) emm_hyper_logodds # EMMs for diet*age expressed as probabilities emm_hyper_prob <- emmeans(m_hyper, specs=c("diet", "age"), at=list(age=c(2,9,16)), type="response") emm_hyper_prob # using EMMs expressed as log-odds, effects will be differences in log-odds contrast(emm_hyper_logodds, method="pairwise", simple="diet") # using EMMs expressed as probabilities, effects will be odds ratios contrast(emm_hyper_prob, method="pairwise", simple="diet") # slopes of age by diet in log-odds emtrends(m_hyper, specs="diet", var="age") # extract the table of estimates emt <- summary(emtrends(m_hyper, specs="diet", var="age")) # exponentiate the estimate and the confidence limits emt$OR <- exp(emt$age.trend) emt$OR_LCL <- exp(emt$asymp.LCL) emt$OR_UCL <- exp(emt$asymp.UCL) # display the results, removing the unexponentiated columns and SE emt # Forest-style plot of odds ratios plot(contrast(emm_hyper_prob, method="pairwise", simple="diet")) # Forest-style plot of odds ratios plot(contrast(emm_hyper_prob, method="pairwise", simple="diet")) + geom_vline(xintercept=1, linetype=2) # dashed vertical line at x=1 # using EMMs expressed as log odds # diet on x-axis, separate lines/colors by age # predicted log-odds of hypertension emmip(emm_hyper_logodds, age ~ diet, CIs=TRUE) # age on x-axis, separate lines by diet # predicted log-odds of hypertension emmip(m_hyper, diet ~ age, at=list(age=2:16), CIs=TRUE) # diet on x-axis, separate lines/colors by age # predicted probabilities of hypertension emmip(emm_hyper_logodds, age ~ diet, CIs=TRUE, type="response") # age on x-axis, separate lines by diet # predicted probabilities of hypertension emmip(m_hyper, diet ~ age, at=list(age=2:16), CIs=TRUE, type="response") ### DO IT YOURSELF (DIY) ANALYSIS AND VISUALIZATION OF INTERACTIONS ### # diet*age model again m_dietage <- lm(weight ~ diet*age + litter, data=mouse) summary(m_dietage) ## # not run ## mouse$age9 <- mouse$age - 9 ## m_dietage9 <- lm(weight ~ diet*age9 + litter, data=mouse) # re-centering age using I() m_dietage9 <- lm(weight ~ diet*I(age-9) + litter, data=mouse) # simple diet effects are now at age=9 summary(m_dietage9) # compare to emmeans::contrast() contrast(emm_dietage, method="pairwise", simple="diet") # log-likelihoods of 2 models c(uncentered=logLik(m_dietage), centered.at.9=logLik(m_dietage9)) # diet*age model one more time m_dietage <- lm(weight ~ diet*age + litter, data=mouse) summary(m_dietage) # changing reference group of diet from control to fat m_dietfatage <- lm(weight ~ relevel(diet, ref="fat")*age + litter, data=mouse) # age slope is now for high-fat group summary(m_dietfatage) # compare to emmeans::emtrends emtrends(m_dietage, specs="diet", var="age") # same log-likelihoods of 2 models c(control.ref=logLik(m_dietage), fat.ref=logLik(m_dietfatage)) # CIs for model coefficients confint(m_dietfatage) # expand.grid() crosses levels of variables smalldata <- expand.grid(diet=levels(mouse$diet), age=c(2, 16), litter=mean(mouse$litter)) smalldata # data we will use to plot predictions with more ages preddata <- expand.grid(diet=levels(mouse$diet), age=2:16, litter=mean(mouse$litter)) head(preddata, n=10) # just predictions for each new observation in plotdata fit <- predict(m_dietage, newdata=preddata) head(fit) # predictions and confidence intervals; result is 3 column matrix fit_ci <- predict(m_dietage, newdata=preddata, interval="confidence") head(fit_ci) # adding predicted values and CIs to data set plotdata <- cbind(preddata, predict(m_dietage, newdata=preddata, interval="confidence")) head(plotdata) # ggplot age slopes by diet ggplot(plotdata, aes(x=age, y=fit, color=diet)) + geom_line() # ggplot age slopes by diet add error bars ggplot(plotdata, aes(x=age, y=fit, color=diet)) + geom_line() + geom_errorbar(aes(ymin=lwr, ymax=upr), width=.2) # used width to narrow the error bars # ggplot age slopes by diet with pointrange CIs ggplot(plotdata, aes(x=age, y=fit, color=diet)) + geom_line() + geom_pointrange(aes(ymin=lwr, ymax=upr)) # ggplot age slopes by diet with CI bands ggplot(plotdata, aes(x=age, y=fit, color=diet)) + geom_line() + geom_ribbon(aes(ymin=lwr, ymax=upr, fill=diet), alpha=.5) # subset plot data to only ages 2, 9, 16 plotdata.age.2.9.16 <- subset(plotdata, plotdata$age==2 | plotdata$age==9 | plotdata$age==16) # only ages 2 and 16 # ggplot diet effects across ages ggplot(plotdata.age.2.9.16, aes(x=diet, y=fit, color=age)) + geom_point() + geom_errorbar(aes(ymin=lwr, ymax=upr), width=.2) # change color variable (age) to factor for distinct colors ggplot(plotdata.age.2.9.16, aes(x=diet, y=fit, color=factor(age))) + geom_point() + geom_errorbar(aes(ymin=lwr, ymax=upr), width=.2) # 3-way interaction m_3 <- lm(weight ~ diet*sex*age, data=mouse) summary(m_3) # EMMs for diet by sex at ages=2,9,16 emm_3 <- emmeans(m_3, specs=c("diet", "sex", "age"), at=list(age=c(2,9,16))) # simple effects of diet for each sex and ages=2,9,16 contrast(emm_3, method="pairwise", simple="diet") # diet on x-axis, separate lines/colors by sex, separate panels by age emmip(emm_3, sex ~ diet | age, CIs=TRUE) #Conditional interaction contrast(emm_3, interaction = "pairwise", by = "age") # simple slopes of age by diet and sex emtrends(m_3, specs=c("diet", "sex"), var="age") # age on x-axis, separate lines/colors by diet, panel by sex emmip(m_3, diet ~ age | sex, at=list(age=2:16), CIs=TRUE) ### END WORKSHOP ### ##### EXERCISES ### Exercises data set auction <- read.csv("https://stats.oarc.ucla.edu/wp-content/uploads/2023/03/auction.csv") ### Exercise 1 # 1. Currently, rarity and budget are character variables. # Convert them both to factors, with the ordering of the levels # for rarity and budget as (common, rare, unique) and (small, large), # respectively. # 2. Run a main effects linear regression modeling bid (the outcome) # predicted by hours and rarity. # 3. Interpret the coefficients ### Exercise 2 # Using the main effects model of bid regressed on hours and rarity # (from the auction data) in Exercise 1: # 1. Estimate the slopes of hours for each rarity. # 2. Plot the slopes of hours for each rarity (from 2 to 9 hours). ### Exercise 3 # Using the auction dataset: # 1. Run a linear regression modeling bid predicted by # hours, quality, and their interaction. # 2. Estimate the simple slopes of hours at each quality from 0 to 5. # 3. Graph the simple slopes of hours at each quality from 0 to 5. ### Exercise 4 # Using the auction data set # 1. Run a linear regression modeling bid predicted by rarity and budget. # 2. Estimate the simple effects of rarity across budgets. # 3. Graph the simple effects of rarity across budgets. ### Exercise 5 # Using the auction dataset # 1. Run a linear regression modeling bid predicted # by hours, rarity, and their interaction. # 2. Analyze the interaction both through estimation and graphing.