DATA MINING
Desktop Survival Guide by Graham Williams |
|||||
Tuning rpart |
To keep the examples simple we use the audit dataset and remove
entities with missing values and also ignore the Adjustment column.
library(e1071) audit <- na.omit(read.csv("audit.csv")) audit$Adjustment <- NULL fm <- formula(Adjusted ~ ID+Age+Employment+Education+ Marital+Occupation+Income+Sex+ Deductions+Hours+Accounts) # Explore minsplit audit.rpart <- tune.rpart(fm, data=audit, minsplit=seq(10,100,10)) plot(audit.rpart, main="Tune rpart on minsplit") # cp audit.rpart <- tune.rpart(fm, data = audit, cp = c(0.002,0.005,0.01,0.015,0.02,0.03)) plot(audit.rpart,main="Performance of rpart vs. cp") readline() # maxdepth audit.rpart <- tune.rpart(fm, data = audit, maxdepth = 1:5) plot(audit.rpart,main="Performance of rpart vs. cp") readline() |