############################################################### #******** Code For Intro to R programming workshop ********* ############################################################### ## ----object Example---------------------------------------------------------------------------- # Creates an object named 'a' with value 1 a <- 1 # Print the value of 'a' a # Creates an object named 'my.number' with value 7 my.number <- 7 # Assign the value of 'my.number' to 'x' x <- my.number # Print the values of 'x' and 'my.number' #c combine values into one vector and then print, prints it c(x, my.number) # Objects 'D' and 'd' are two different objects D <- c(5, 6) d <- 5 # Print the values of 'D' and 'd' D d #################### Exercise 1 ###################### ## ----object_die---------------------------------------------------------------------------- # Create a vector 'die' representing a die's sides die <- c(1, 2, 3, 4, 5, 6) # Make a copy of 'die' named 'new.die' new.die <- die # Print the values of 'die' and 'new.die' die new.die #################### End of Exercise 1 ###################### ## ----Atomic vector Examples--------------------------------------------------------------------------- # Check if 'die' is a vector is.vector(die) # Determine the data type of 'die' typeof(die) # Create an integer vector 'a' a <- c(1L, -2L) # Print the values of 'a' and its data type a typeof(a) # If we include a double in integer calculations, the result is double. typeof(a * die) # Create a character vector 'text' text <- c("john", "mary", 2) # Print the values of 'text' and its data type text typeof(text) # Create a logical vector 'my.logic' my.logic <- c(TRUE, FALSE, FALSE) # Print the data type and values of 'my.logic' typeof(my.logic) my.logic # The result of a condition statement is a logical value 2 < 1 # Create a complex number vector 'comp' comp <- c(1 + 1i, 2 - 2i) # Print the data type and values of 'comp' typeof(comp) # Create a raw vector of length 3 raw(3) ## ----attributes die_names----------------------------------------------------------------------------- # Before assigning attributes to a vector, the attributes are NULL attributes(die) # Assign names to the 'die' vector names(die) <- c("one", "two", "three", "four", "five", "six") # Print the values and attributes of 'die' die attributes(die) # Set the attributes of 'die' to NULL attributes(die) <- NULL die ## ----die_dimension------------------------------------------------------------------------- # Check the dimensions of 'die' (NULL for a vector) dim(die) # By changing dim we Reshapes 'die' into a matrix of 2 by 3 dim(die) <- c(2, 3) # Print the reshaped 'die' matrix die # Print the attributes of 'die' matrix attributes(die) # Reshape 'die' into a 1 by 2 by 3 array dim(die) <- c(1, 2, 3) # Print the reshaped 'die' array die attributes(die) #Reset attributes of die to NULL makes it back to vector attributes(die) <- NULL #print die die ## ----factor_class-------------------------------------------------------------------------- # Create a factor variable 'gender' with two levels: "male" and "female" gender <- factor(c("male", "female", "female", "male")) # Print the values and attributes of 'gender' gender # or we label a vector of integers 1 and 2 factor(c(1,2,2,1), labels = c("male", "female")) typeof(gender) attributes(gender) ## ----matrix-------------------------------------------------------------------------------- # Create a 2 by 3 matrix 'm' using 'die' m <- matrix(die, nrow = 2, ncol = 3) # Print the matrix 'm' m # Create a 2 by 3 matrix 'm' using 'die', filling by rows m <- matrix(die, nrow = 2, ncol = 3, byrow = TRUE) # Print the matrix 'm' m ## ----array--------------------------------------------------------------------------------- # Create a 2 by 2 by 3 array 'ar' with values 1 to 12 ar <- array(1:12, dim = c(2, 2, 3)) # Print the array 'ar' ar ## ----dataframe----------------------------------------------------------------------------- # Create a data frame 'gpa.data' with various variables gpa.data <- data.frame(student_number = c(1, 2, 3), Gender = c("F", "F", "M"), GPA = c(3.5, 3.7, 3.6), Enroll = c(TRUE, TRUE, FALSE)) # Print the data frame 'gpa.data' gpa.data ## ----card deck Example-------------------------------------------------------------------------- #create a vector of 4 suits suit <- c("spades", "heart", "clubs", "dimonds") #create a vector of 13 faces face <- c("king", "queen", "jack", "ten", "nine", "eight", "seven", "six", "five", "four", "three", "two", "ace") #create a vector of values 13 to 1 value <- 13:1 #putting together all variables deck <- data.frame( suit = rep(suit, each = 13), face = rep(face, times = 4), value = rep(value, times = 4)) deck ## ----list---------------------------------------------------------------------------------- # Create a list 'l' containing various objects l <- list(die, ar, m, gpa.data, gender, list("a", "b")) # Print the list 'l' l ## ----extract deck-------------------------------------------------------------------------- # Extract the value in row 3 and column 1 of 'deck' deck[3, 1] # Extract values in row 2 and 1, and columns 1, 2, and 3 of 'deck' deck[c(2, 1), c(1, 2, 3)] # Extract all columns of row 3 from 'deck' deck[3, ] # Extract values in row 3 and columns 1 to 3 of 'deck' deck[3, 1:3] # Exclude rows 1 to 2 and 4 to 52, and extract columns 1 to 3 of 'deck' deck[-c(1:2, 4:52), 1:3] # Extract values in row 3 and columns 1 and 2 of 'deck' using logical indices deck[3, c(TRUE, TRUE, FALSE)] ## ----deck larger 7------------------------------------------------------------------------- # Test which values in 'deck' are greater than or equal to 7 deck$value >= 7 # Create a new 'deck7' with values greater than or equal to 7 deck7 <- deck[deck$value >= 7, ] deck7 ## ----gpa modified-------------------------------------------------------------------------- # Print the original 'gpa.data' gpa.data # Identify rows where 'Enroll' is TRUE ind.enroll <- gpa.data[, 4] == TRUE # Add 0.1 to 'GPA' for rows where 'Enroll' is TRUE gpa.data[ind.enroll, "GPA"] <- gpa.data[ind.enroll, "GPA"] + 0.1 # Print the modified 'gpa.data' gpa.data ###################################################### ## *************** Exercise 2 ********************* ###################################################### # Sample without replacement from 1 to 52 and extract rows from 'deck' shuffled.deck <- deck[sample(1:52), ] # Print the shuffled 'deck' shuffled.deck # Select the first five cards as 'player' player <- shuffled.deck[1:5,] # Print the first five cards for 'player' head(player) # Keep the remaining cards as 'dealer' dealer <- shuffled.deck[-(1:5),] # Print the remaining cards for 'dealer' head(dealer) ## ----blackjack deck------------------------------------------------------------------------ # Create a copy of 'deck' named 'deck2' deck2 <- deck # Identify cards that are king, queen, jack, or ace and set their values to 10 ind <- deck2$face == "king" | deck2$face == "queen" | deck2$face == "jack" | deck2$face == "ace" deck2$value[ind] <- 10 # Shorter approach using '%in%' deck2$value[deck$face %in% c("king", "queen", "jack", "ace")] <- 10 # Shuffle 'deck2' shuffled.deck2 <- deck2[sample(1:52), ] # Extract the first card from shuffled 'deck2' shuffled.deck2[1, ] ## ----functions round----------------------------------------------------------------------- # Calculate the rounded value of pi round(3.141593) # Calculate the value of pi rounded to two decimal places round(3.141593, digits = 2) # Calculate the factorial of 3 factorial(3) ## ----functions more------------------------------------------------------------------------ # Calculate the mean of 'die' mean(die) # Calculate the mean of 'die' and round the result round(mean(die)) ## ----functions sample---------------------------------------------------------------------- # Sample values from 'die' two times (sampling without replacement) sample(die, size = 6) # Sample values from 'die' six times with replacement (simulating dice rolling) sample(die, size = 6, replace = TRUE) ## ----function example 1-------------------------------------------------------------------- # Define a function 'average_two_number' to calculate the average of two numbers average_two_number <- function(a, b){ x <- (a + b) / 2 print(x) } # Calculate and print the average of 2 and 4 average_two_number(2, 4) ## ----function example 2-------------------------------------------------------------------- # Define a function 'C_to_F' to convert Celsius to Fahrenheit C_to_F <- function(c = 0){ f <- c * 9/5 + 32 return(f) } # Convert 30 degrees Celsius to Fahrenheit f <- C_to_F(30) # Print the converted temperature f # Convert 0 degrees Celsius to Fahrenheit using the default value C_to_F() # Print the class and structure of the function 'C_to_F' class(C_to_F) C_to_F ###################################################### ## *************** Exercise 3 ********************* ###################################################### ## ----function_roll_die1-------------- # Define a function 'roll_die' to simulate rolling a single die roll_die <- function(){ #Create a vector of 1 to 6 for each side of die die <- 1:6 #Sample one number from 1 to 6 die <- sample(die, size = 1, replace = TRUE) #return the result return(die) } # Roll the die using the defined function roll_die() ## ----function_roll_dice_2------------------------------------------------------------------ # Define a function 'roll_die_2' to simulate rolling two dice and getting their sum roll_die_2 <- function(){ #create die die <- 1:6 #sample from 1 to 6 with replacement dice <- sample(die, size = 2, replace = TRUE) #sum/ I did not used return so the last line will return sum(dice) } # Roll two dice and calculate their sum using the defined function roll_die_2() ## ----function_roll_dice_k------------------------------------------------------------------ # Define a function 'roll_die_k' to simulate rolling a die 'k' times roll_die_k <- function(side = 6, k = 1){ #create a die with side number of sides die <- 1:side #sample die k times dice <- sample(die, size = k, replace = TRUE) #sum sum(dice) } # Roll a 4-sided die 3 times using the defined function roll_die_k(side = 4, k = 3) ## ----if1----------------------------------------------------------------------------------- # Set the initial value of 'number' to 6 number <- 6 # Use an if statement to check if 'number' is even if (number %% 2 == 0) { print("The number is even.") } # Update 'number' to 7 and check again number <- 7 if (number %% 2 == 0) { print("The number is even.") } ## ----function even or odd------------------------------------------------------------------ # Define a function 'even_odd' to determine if a number is even or odd even_odd <- function(a){ #is the remainder zero if (a %% 2 == 0) { print("The number is even") } # if the remainder is not zero if (a %% 2 != 0) { print("The number is odd") } } # Check if 5 is even or odd using the function even_odd(5) ## ----function even or odd with else-------------------------------------------------------- # Define a function 'even_odd' to determine if a number is even or odd even_odd <- function(a){ #is the remainder zero if (a %% 2 == 0) { print("The number is even") } else { print("The number is odd") } } # Check if 6 is even or odd using the function even_odd(6) #check 11 even_odd(11) ## ----ifelse example------------------------------------------------------------------------ # Given grades for 4 students grades <- c(85, 72, 94, 60) # Determine whether each student passed or failed pass_fail <- ifelse(grades >= 70, "Pass", "Fail") # Print the results pass_fail # Simulate rolling a die 10 times and determine win/loss for each roll dice <- sample(die, size = 10, replace = TRUE) win_loss <- ifelse(dice == 6, "win", "loss") # Print the results win_loss # What proportion of wins do you expect if you roll the die many times? ## ----example for loop---------------------------------------------------------------------- # Use a for loop to calculate the square of numbers from 1 to 10 for (i in 1:10) { x1 <- i^2 print(x1) } # Calculate factorial using a for loop calculate_factorial <- function(n) { #initiate factorial to be 1 factorial <- 1 #factorial of 0 is 1! #prints warning if the number is not non negative! if (n < 0) print("Warning message: Factorial can only calculated for non negative integers") if (n > 0){ for (i in 1:n) { factorial <- factorial * i }#end for }#end if return(factorial) }#end function # Calculate the factorial of 3 calculate_factorial(3) # Calculate the factorial of 0 calculate_factorial(0) ## ----while_example------------------------------------------------------------------------- # Find the smallest power of 2 greater than 1000 using while loop # Current number (initialized to 2^0) number <- 1 # Current power (initialized to 0) power <- 0 # Start a while loop while (number <= 1000) { #Calculate the next power of 2 number <- 2 ^ power # Increment the power power <- power + 1 } #I use paste to combine a text with the value of power print(paste("The smallest power of 2 greater than 1000 is:", number)) ## ----example_repeat------------------------------------------------------------------------ # Find the smallest power of 2 greater than 1000 # Initialize variables number <- 1 # Current number (initialized to 2^0) power <- 0 # Current power (initialized to 0) # Start a repeat loop repeat { # Calculate the next power of 2 number <- 2^power # Check if the number is greater than 1000 if (number > 1000) { break # Exit the loop if condition is met } # Increment the power power <- power + 1 } # Print the result print(paste("The smallest power of 2 greater than 1000 is:", number)) ## ----nested loop--------------------------------------------------------------------------- # multiply a combination of 1 to 5 to 1 to 5 for (i in 1:5) { for (j in 1:5) { result <- i * j #print current multiplication cat(i, "x", j, "=", result, "\t") } #print in the next line cat("\n") } ##************************************** ##* ##* ##*********** Exercise 4 ************## ##* ##* ##************************************** # Define the function to roll two dice and get the sum die_roll_2 <- function() { die1 <- sample(1:6, 1) die2 <- sample(1:6, 1) return(die1 + die2) } # a) Calculate average and standard deviation of sum of two dice #set the total runs to 1000 total_runs <- 1000 #Initialize sums to save result of each iterations sums <- rep(NA, total_runs) for (i in 1:total_runs) { sums[i] <- die_roll_2() } average_sum <- mean(sums) std_dev <- sd(sums) cat("a) Long-run average of sum of two dice:", average_sum, "\n") cat(" Standard deviation of sum of two dice:", std_dev, "\n") # b) Find number of rolls to get sum of 12 #Initialize the while loop rolls_to_get_12 <- 0 sum_result <- 0 while (sum_result != 12) { sum_result <- die_roll_2() rolls_to_get_12 <- rolls_to_get_12 + 1 } cat("b) Number of rolls to get sum of 12:", rolls_to_get_12, "\n") # c) Calculate average number of rolls to get sum of 12 over 1000 runs # set the total iterations to 1000 total_runs_c <- 1000 # Create a vector of size total_runs_c to keep result of each iteration rolls_to_get_12_c <- rep(NA, total_runs_c) #run for total_runs_c for (i in 1:total_runs_c) { #Initialize the while loop rolls <- 0 sum_result <- 0 #Start while loop while (sum_result != 12) { sum_result <- die_roll_2() rolls <- rolls + 1 } # save the result of each iterations rolls_to_get_12_c[i] <- rolls } average_rolls_c <- mean(rolls_to_get_12_c) cat("c) Average number of rolls to get sum of 12 over 1000 runs:", average_rolls_c, "\n") #*************************************************************************************** #*************************************************************************************** #* #efficient loop system.time({ output <- NA for(i in 1:1000000) { output[i] <- i + 1 } } ) system.time({ #loop with Preallocation output <- rep(NA, 1000000) for (i in 1:1000000) { output[i] <- i + 1 } } ) ## --Example_vectorization------------------------------------------------------------------ # Without Vectorization x <- c(1, 2, 3) y <- c(4, 5, 6) #set z to be a vector of zeros z <- numeric(length(x)) #running a for loop to add x and y element by element for (i in 1:length(x)) { z[i] <- x[i] + y[i] } # With Vectorization x <- c(1, 2, 3) y <- c(4, 5, 6) z <- x + y ## -----Mathematical operation in R-------------------------------------------- # Multiply each element of 'die' by 1 die * 1 # Subtract 1 from each element of 'die' die - 1 # Multiply each element of 'die' by 2 die * 2 # Perform element-wise addition between 'die' and a subset of 'die' die + die[1:2] # Perform element-wise multiplication between 'die' and a subset of 'die' # Warning: Recycling will occur since the shorter vector is repeated die * die[1:4] # Perform inner multiplication of two vectors die %*% die # Perform outer multiplication of two vectors die %o% die ## ------------------------------------------------------------------------------------------ # Calculate the mean of each column of a matrix using 'apply' matrix_data <- matrix(1:12, nrow = 3) col_sums <- apply(matrix_data, 2, mean) # Calculate the square root of each element in a vector using 'sapply' sqrt_12 <- sapply(1:12, sqrt) sqrt_12 # Alternatively, calculate the square root using the exponentiation operator (1:12) ^ .5 ###################################################### ## *************** Exercise 5 ********************* ###################################################### ## ----solution using replicate-------------------------------------------------------------- # Define the function to roll two dice and get the sum die_roll_2 <- function() { die1 <- sample(1:6, 1) die2 <- sample(1:6, 1) return(die1 + die2) } # a) Calculate average and standard deviation of sum of two dice using replicate total_runs <- 1000 sums <- replicate(total_runs, die_roll_2()) average_sum <- mean(sums) std_dev <- sd(sums) cat("a) Long-run average of sum of two dice:", average_sum, "\n") cat(" Standard deviation of long-run average:", std_dev, "\n") # b) Find number of rolls to get sum of 12 get_rolls_to_get_12 <- function() { rolls_to_get_12 <- 0 sum_result <- 0 while (sum_result != 12) { sum_result <- die_roll_2() rolls_to_get_12 <- rolls_to_get_12 + 1 } return(rolls_to_get_12) } # Simulate the process multiple times to find an average simulations <- 10000 rolls_to_get_12 <- replicate(simulations, get_rolls_to_get_12()) average_rolls <- mean(rolls_to_get_12) cat("b) Average rolls to get sum of 12:", average_rolls, "\n")