R Library Matrices and matrix computations in R

R Library: Matrices and matrix computations

The R program (as a text file) for the code on this page.
In order to see more than just the results from the computations of the functions (i.e. if you want to see the functions echoed back in console as they are processed) use the echo=T option in the source function when running the program.

Tutorial on matrices and matrix operations in R.

Creating matrices

The function matrix creates matrices.

matrix(data, nrow, ncol, byrow)

The data argument is usually a list of the elements that will fill the matrix. The nrow and ncol arguments specify the dimension of the matrix. Often only one dimension argument is needed if, for example, there are 20 elements in the data list and ncol is specified to be 4 then R will automatically calculate that there should be 5 rows and 4 columns since 4*5=20. The byrow argument specifies how the matrix is to be filled. The default value for byrow is FALSE which means that by default the matrix will be filled column by column.

seq1 <- seq(1:6)
mat1 <- matrix(seq1, 2)
mat1
     [,1] [,2] [,3] 
[1,]    1    3    5
[2,]    2    4    6

#filling the matrix by rows
mat2 <- matrix(seq1, 2, byrow = T)
mat2
     [,1] [,2] [,3] 
[1,]    1    2    3
[2,]    4    5    6

In the next example we specify the number of columns without specifying the number of rows. We therefore have to name the argument ncol since we are using it out of order.

mat3 <- matrix(seq1, ncol = 2)
mat3
     [,1] [,2] 
[1,]    1    4
[2,]    2    5
[3,]    3    6

#creating the same matrix using both dimension arguments
#by using them in order we do not have to name them
mat4 <- matrix(seq1, 3, 2)
mat4
     [,1] [,2] 
[1,]    1    4
[2,]    2    5
[3,]    3    6

#creating a matrix of 20 numbers from a standard normal dist.
mat5 <- matrix(rnorm(20), 4)
mat5
           [,1]        [,2]       [,3]        [,4]        [,5] 
[1,] -0.1920780  0.09103080 -1.1044547 -1.15135828  1.34352473
[2,]  0.7306961 -0.19970060 -0.6967638 -0.85618071 -0.78089133
[3,]  0.9477167  0.08694307  0.2525230  0.06272714  0.08456276
[4,]  0.4854406 -0.49585177 -1.4194989  1.71346683 -1.18961769

Useful matrix functions

The dim function is used to list the dimensions of a matrix. The cbind and rbind functions are used to append matrices together. The dimnames function is used to manipulate the row and column names of a matrix.

#appending v1 to mat5
v1 <- c(1, 1, 2, 2)
mat6 <- cbind(mat5, v1)
mat6
                                                               v1 
[1,] -0.1920780  0.09103080 -1.1044547 -1.15135828  1.34352473  1
[2,]  0.7306961 -0.19970060 -0.6967638 -0.85618071 -0.78089133  1
[3,]  0.9477167  0.08694307  0.2525230  0.06272714  0.08456276  2
[4,]  0.4854406 -0.49585177 -1.4194989  1.71346683 -1.18961769  2

v2 <- c(1:6)
mat7 <- rbind(mat6, v2)
mat7
                                                             v1 
   -0.1920780  0.09103080 -1.1044547 -1.15135828  1.34352473  1
    0.7306961 -0.19970060 -0.6967638 -0.85618071 -0.78089133  1
    0.9477167  0.08694307  0.2525230  0.06272714  0.08456276  2
    0.4854406 -0.49585177 -1.4194989  1.71346683 -1.18961769  2
v2  1.0000000  2.00000000  3.0000000  4.00000000  5.00000000  6

#determining the dimensions of a mat7
dim(mat7)
[1] 5 6

#removing names of rows and columns
#the first NULL refers to all row names, the second to all column names 
dimnames(mat7) <- list(NULL, NULL)
mat7
           [,1]        [,2]       [,3]        [,4]        [,5] [,6] 
[1,] -0.1920780  0.09103080 -1.1044547 -1.15135828  1.34352473    1
[2,]  0.7306961 -0.19970060 -0.6967638 -0.85618071 -0.78089133    1
[3,]  0.9477167  0.08694307  0.2525230  0.06272714  0.08456276    2
[4,]  0.4854406 -0.49585177 -1.4194989  1.71346683 -1.18961769    2
[5,]  1.0000000  2.00000000  3.0000000  4.00000000  5.00000000    6

By using the bracket notation it is possible to access the rows, columns or elements in the matrix.

matrix_name[row#, col#]

mat7[1, 6]
[1] 1

#to access an entire row leave the column number blank
mat7[1,  ]
[1] -0.1920780  0.0910308 -1.1044547 -1.1513583  1.3435247  1.0000000

#to access an entire column leave the row number blank
mat7[, 6]
[1] 1 1 2 2 6

Matrix computations

Most matrix operations use the same symbols as the math operations.

#Creating mat8 and mat9
mat8 <- matrix(1:6, 2)
mat8
     [,1] [,2] [,3] 
[1,]    1    3    5
[2,]    2    4    6

mat9 <- matrix(c(rep(1, 3), rep(2, 3)), 2, byrow = T)
mat9
     [,1] [,2] [,3] 
[1,]    1    1    1
[2,]    2    2    2

#addition
mat9 + mat8
     [,1] [,2] [,3] 
[1,]    2    4    6
[2,]    4    6    8

mat9 + 3
     [,1] [,2] [,3] 
[1,]    4    4    4
[2,]    5    5    5

#subtraction
mat8 - mat9
     [,1] [,2] [,3] 
[1,]    0    2    4
[2,]    0    2    4

#inverse
solve(mat8[, 2:3])
     [,1] [,2] 
[1,]   -3  2.5
[2,]    2 -1.5

#transpose
t(mat9)
     [,1] [,2] 
[1,]    1    2
[2,]    1    2
[3,]    1    2

#multiplication
#we transpose mat8 since the dimension of the matrices have to match
#dim(2, 3) times dim(3, 2)
mat8 %*% t(mat9)
     [,1] [,2] 
[1,]    9   18
[2,]   12   24

#element-wise multiplication
mat8 * mat9
     [,1] [,2] [,3] 
[1,]    1    3    5
[2,]    4    8   12

mat8 * 4
     [,1] [,2] [,3] 
[1,]    4   12   20
[2,]    8   16   24

#division
mat8/mat9
     [,1] [,2] [,3] 
[1,]    1    3    5
[2,]    1    2    3

mat9/2
     [,1] [,2] [,3] 
[1,]  0.5  0.5  0.5
[2,]  1.0  1.0  1.0

#using submatrices from the same matrix in computations
mat8[, 1:2]
     [,1] [,2] 
[1,]    1    3
[2,]    2    4

mat8[, 2:3]
     [,1] [,2] 
[1,]    3    5
[2,]    4    6

mat8[, 1:2]/mat8[, 2:3]
          [,1]      [,2] 
[1,] 0.3333333 0.6000000
[2,] 0.5000000 0.6666667

Regression example

Using matrix computations to perform a basic linear regression on the data set hsb2.
Here is hsb2.txt as a text file for use in R.

y <- matrix(hsb2$write, ncol = 1)
y[1:10, 1]
 [1] 52 59 33 44 52 52 59 46 57 55

x <- as.matrix(cbind(1, hsb2$math, hsb2$science, hsb2$socst, hsb2$female))
x[1:10,  ]
      [,1] [,2] [,3] [,4] [,5] 
 [1,]    1   41   47   57    0
 [2,]    1   53   63   61    1
 [3,]    1   54   58   31    0
 [4,]    1   47   53   56    0
 [5,]    1   57   53   61    0
 [6,]    1   51   63   61    0
 [7,]    1   42   53   61    0
 [8,]    1   45   39   36    0
 [9,]    1   54   58   51    0
[10,]    1   52   50   51    0

n <- nrow(x)
p <- ncol(x)

#parameter estimates
beta.hat <- solve(t(x) %*% x) %*% t(x) %*% y
beta.hat
          [,1] 
[1,] 6.5689235
[2,] 0.2801611
[3,] 0.2786543
[4,] 0.2681117
[5,] 5.4282152

#predicted values
y.hat <- x %*% beta.hat
y.hat[1:5, 1]
[1] 46.43465 60.75571 46.17103 49.51943 53.66160

y[1:5, 1]
[1] 52 59 33 44 52

#the variance, residual standard error and df's
sigma2 <- sum((y - y.hat)^2)/(n - p)

#residual standard error
sqrt(sigma2)
[1] 6.101191

#degrees of freedom
n - p
[1] 195

#the standard errors, t-values and p-values for estimates
#variance/covariance matrix
v <- solve(t(x) %*% x) * sigma2

#standard errors of the parameter estimates
sqrt(diag(v))
[1] 2.81907949 0.06393076 0.05804522 0.04919499 0.88088532

#t-values for the t-tests of the parameter estimates
t.values <- beta.hat/sqrt(diag(v))
t.values
         [,1] 
[1,] 2.330166
[2,] 4.382257
[3,] 4.800642
[4,] 5.449980
[5,] 6.162227

#p-values for the t-tests of the parameter estimates
2 * (1 - pt(abs(t.values), n - p))
[1] 2.082029e-002 1.917191e-005 3.142297e-006 1.510015e-007 4.033511e-009

#checking that we got the correct results
ex1 <- lm(write ~ math + science + socst + female, hsb2)
summary(ex1)

Call: lm(formula = write ~ math + science + socst + female, data = hsb2)

Coefficients:
             Value Std. Error t value Pr(>|t|) 
(Intercept) 6.5689 2.8191     2.3302  0.0208  
       math 0.2802 0.0639     4.3823  0.0000  
    science 0.2787 0.0580     4.8006  0.0000  
      socst 0.2681 0.0492     5.4500  0.0000  
     female 5.4282 0.8809     6.1622  0.0000  

Residual standard error: 6.101 on 195 degrees of freedom
Multiple R-Squared: 0.594 
F-statistic: 71.32 on 4 and 195 degrees of freedom, the p-value is 0