|
DATA MINING
Desktop Survival Guide by Graham Williams |
|
|||
Tuning rpart |
To keep the examples simple we use the audit dataset and remove
entities with missing values and also ignore the Adjustment column.
library(e1071)
audit <- na.omit(read.csv("audit.csv"))
audit$Adjustment <- NULL
fm <- formula(Adjusted ~ ID+Age+Employment+Education+
Marital+Occupation+Income+Sex+
Deductions+Hours+Accounts)
# Explore minsplit
audit.rpart <- tune.rpart(fm, data=audit, minsplit=seq(10,100,10))
plot(audit.rpart, main="Tune rpart on minsplit")
# cp
audit.rpart <- tune.rpart(fm, data = audit, cp = c(0.002,0.005,0.01,0.015,0.02,0.03))
plot(audit.rpart,main="Performance of rpart vs. cp")
readline()
# maxdepth
audit.rpart <- tune.rpart(fm, data = audit, maxdepth = 1:5)
plot(audit.rpart,main="Performance of rpart vs. cp")
readline()
|