R scripts for the lecture course Machine Learning, pattern recognition and statistical data modelling Coryn A.L. Bailer-Jones, 2007 Model selection and combination ------------------------------- library(mclust) # Note that this package reports the negative BIC, not the BIC. The optimal model has the # LARGEST -BIC. (Annoyingly, they still call this the BIC) # basic run plot(faithful$eruptions, faithful$waiting) # Search over full range of models fc <- Mclust(faithful) mclust2Dplot(data=faithful, what="classification", identify=TRUE, parameters=fc$parameters, z= fc$z) plot(x=fc, data=faithful) # This actually plots -BIC, not BIC attributes(fc) # Run this to obtain other information, in particular the best 3 models fc.bic <- mclustBIC(faithful, G=1:10) summary(fc.bic, data=faithful) # We see that a 3-cluster EEE model is slightly better than a 2-cluster VVV model # fc.bic does not seem to give full details on all solutions, so run on each specifically fc_3eee <- Mclust(faithful, G=3, modelNames='EEE') fc_2vvv <- Mclust(faithful, G=2, modelNames='VVV') par(mfrow=c(1,2)) mclust2Dplot(data=faithful, parameters=fc_3eee$parameters, z= fc_3eee$z) mclust2Dplot(data=faithful, parameters=fc_2vvv$parameters, z= fc_2vvv$z) # Inspect parameters fc_3eee$parameters # 3 parameters in covariance matrix; 2 means (x,y) and 1 weight per cluster - 1 # (as weights sum to 1) (=8). # Total = 11 fc_2vvv$parameters # 6 parameters in the covariance matrices; 2 means (x,y) and 1 weight per cluster -1 # (as weights sum to 1) (=5). # Total = 11 # # Generally have G*d*(d-1)/2 + d*G + G - 1 = Gd(d+1)/2 + G - 1 for VVV case # and d*(d-1)/2 + d*G + G - 1 for EEE case # From the equation for BIC (and recalling that Mclust gives -BIC) we can extract the # number of parameters: (-fc_2vvv$bic + 2*fc_2vvv$loglik) / log(fc_2vvv$n) # = 11