#run if you have not install packages
#install.packages(c("MASS", "ggplot2", "tidyverse", "nnet", "mlogit", "ordinal", "brant"), 
#                 packagesdependencies=TRUE)
#load packages used in the workshop
library(tidyverse)
library(MASS)
library(nnet)
library(mlogit)
library(brant)
library(ordinal)


#reading hsb data
hsb <- read.csv("https://stats.idre.ucla.edu/stat/data/hsbdemo.csv")
#names of variables 
names(hsb)
#Structur of variables
str(hsb)

##making variable female as a factor 
hsb$female <- factor(hsb$female)
#levels
levels(hsb$female)
#frequency table
table(hsb$female)
#proportion in each level
prop.table(table(hsb$female))

#changing the reference to "male" 
hsb$female <- relevel(hsb$female, ref = "male")
#check to see reference changed
levels(hsb$female)

#Exercise 1

# 1- In hsb data transform variables `schtyp` (school type),
# `prog` (program), `honors` (honors program), and 
# `ses` (socioeconomic status) to unordered `factor` variables
# and report the proportions of each category for those variables.


#transform variables schtyp, prog, honors and ses to factor
hsb$schtyp <- factor(hsb$schtyp)
prop.table(table(hsb$schtyp))
hsb$prog <- factor(hsb$prog)
prop.table(table(hsb$prog))
hsb$honors <- factor(hsb$honors)
prop.table(table(hsb$honors))
#ses as factor in order or "low", "middle", "high". Not ordered factor.
hsb$ses <- factor(hsb$ses, levels = c("low", "middle", "high"))
prop.table(table(hsb$ses))



#2- Change the reference of variable `honors` to "not enrolled"

hsb$honors <- 

 ## Exercise 2, multinomial logistic regression:


    
#   In `hsb` data First center math score to its mean and call it c.math.
# Then, run a multinomial logistic regression with the `ses` variable as a nominal
# outcome and use variable c.math and female as predictors, 
# and with the interaction between c.math and female.

  
hsb$c.math <- scale(hsb$math, center = TRUE, scale = FALSE)

  

###### Exercise3, Ordinal logistic regression of `apply` 
  #on `pared`, `public` and `gpa`
#reading the data into R
dat <- read.csv("https://stats.idre.ucla.edu/stat/data/ologit.csv")
#structure of the data
str(dat)

dat$apply <- factor(dat$apply, labels = c("unlikely", "somewhat likely", "very likely") ,ordered = TRUE)
str(dat$apply)
#transform pared to factor
dat$pared <- factor(dat$pared)
#checking levels
levels(dat$pared)
#change labels to "not attend" and "attend"
dat$pared <- factor(dat$pared, labels = c("not attend", "attend"))
#structure of pared
str(dat$pared)


#First we make variable public as factor




dat$public <- factor(dat$public, labels = c("private", "public"))