# C.A.L. Bailer-Jones 
# R code used in lecture 9B of Statistical Methods course,
# "Nonparametric and nonlinear regression"
# to do ridge regression

library(gplots) # for plotCI()

### Define true model and simulate experimental data from it
set.seed(51)
Ndat <- 20
x <- sort(runif(Ndat, -2.5, 7))
Xmat <- cbind(1, x, x^2, x^3, x^4)
sigTrue <- 1
modMat <- c(0,1,1,-0.2,0.005) # 1 x P vector: coefficients, a_p, of polynomial sum_{p=0} a_p*x^p
y <- Xmat %*% as.matrix(modMat) + rnorm(Ndat, 0, sigTrue)
# Dimensions in matrix multiplication: [Ndat x 1] = [Ndat x P] %*% [P x 1] + [Ndat]
y <- drop(y) # converts into a vector
plotCI(x, y, uiw=sigTrue, gap=0)
# overplot true model (by calculating model on dense grid)
xp <- seq(min(x), max(x), length.out=1000)
Xpmat <- cbind(1, xp, xp^2, xp^3, xp^4)
yp <- Xpmat %*% as.matrix(modMat)
lines(xp, yp, col="red", lw=2)

### Ridge regression with regularization parameter lambda
### Note that we do not use sigTrue
lambda <- 0 # now try 10, 100, 1000
Npar <- ncol(Xmat)
alpha <- solve( t(Xmat) %*% Xmat + diag(x=lambda, nrow=Npar) ) %*% t(Xmat) %*% y
# solve() with one argument does matrix inversion;
# diag() here produces lamdab*I, where I is the identity matrix.
# Overplot fitted model (by calculating on a dense grid)
yp <- Xpmat %*% alpha
lines(xp, yp, col="black", lw=2)
# Note that as lambda -> infinity, we get the horizontal line at y=0.
# This is because the data become irrelevant, and the minimum of
# the regularizer (prior) is all parameters equal to zero.