R scripts for the lecture course Machine Learning, pattern recognition and statistical data modelling Coryn A.L. Bailer-Jones, 2007 Support Vector Machines ----------------------- library(e1071) # application with fixed gamma and cost dim(dat.baspeg) length(train.cl) svm.cl <- svm(astroclass ~ ., data=dat.baspeg[train.cl,], kernel='radial', cost=1, gamma=1) svm.cl attributes(svm.cl) # apply to test set and examine confusion matrix pred.svm.cl <- predict(svm.cl, dat.baspeg[-train.cl,2:5]) table(dat.baspeg[-train.cl,]$astroclass, pred.svm.cl) # write again, but now as percentages noquote(format(100*table(dat.baspeg[-train.cl,]$astroclass, pred.svm.cl) / Nastroclass2, digits=2) ) # plot data, fitted classes and mark SVs plot(gr ~ ri, x=svm.cl, data=dat.baspeg[train.cl, ], slice=list(iz=0.2, zy=0.2)) # Now play around and see how number of SVs and train and test errors vary with # cost, gamma and the kernel function used # tune svm.cl.tune <- tune(svm, train.x=dat.baspeg[train.cl,2:5], train.y=dat.baspeg[train.cl,1], validation.x=dat.baspeg[-train.cl,2:5], validation.y=dat.baspeg[-train.cl,1], ranges=list(gamma=c(0.1,1,10), cost=c(1,5,10) ), tunecontrol=tune.control(sampling="fix") ) # analyse svm.cl.tune$performances # apply best model astroclass2.predict.svm.cl <- predict(svm.cl.tune$best.model, dat.baspeg[-train.cl,2:5]) # write confusion matrix noquote( format(100*table(dat.baspeg[-train.cl,]$astroclass, astroclass2.predict.svm.cl) / Nastroclass2, digits=2) )