R scripts for the lecture course
Machine Learning, pattern recognition and statistical data modelling
Coryn A.L. Bailer-Jones, 2007


More nonlinear stuff
--------------------


Neural nework application to PS1 Teff prediction problem

# This will not work as is
# xdat is a four column matrix with the four colours
# phot.bas$teff[train.st] is a one column vector of the Teff values
# train.st is a set of indices denoting the training data set
# See http://www.mpia.de/homes/calj/ps1/PS1-CBJ-001.pdf for more details and the data

par(mfrow=c(2,2))
for (nhid in c(1,2,4,8)) {
  nnet.teff <- nnet(y=log10(phot.bas$teff[train.st]), x=xdat[train.st,], size=nhid, maxit=1000, linout=TRUE, abstol=1e-8)
  pred <- predict(nnet.teff, xdat[-train.st,])
  plot(log10(phot.bas$teff[-train.st]), pred, xlab='true log(Teff)', ylab='predicted log(Teff)', cex=0.4)
  mylab <- paste("No. nodes=", nhid, " RMS=", formatC(rms(pred-log10(phot.bas$teff[-train.st])), format="f", digits=3))
  title(main=mylab)
}


Mixture models

library(mclust)

# basic run
plot(faithful$eruptions, faithful$waiting)
fc <- Mclust(faithful)
mclust2Dplot(data=faithful, what="classification", identify=TRUE, parameters=fc$parameters, z= fc$z)

# run this to obtain other information
fc.bic <- mclustBIC(faithful, G=1:10)
summary(fc.bic, data=faithful)

# Plot results for G=1:7 clusters. G=5 omitted as it crashes
par(mfrow=c(2,3))
for (g in c(1,2,3,4,6,7)) { 
  fc <- Mclust(faithful, G=g)
  mclust2Dplot(data=faithful, what="classification", identify=TRUE, parameters=fc$parameters, z= fc$z)
}