|
DATA MINING
Desktop Survival Guide by Graham Williams |
|
|||
Predicting Salary Group |
A little more complex is the survey data.
> survey.rp <- rpart(Salary.Group ~ ., data=survey)
> survey.rp
n= 32561
node), split, n, loss, yval, (yprob)
* denotes terminal node
1) root 32561 7841 <=50K (0.75919044 0.24080956)
2) Relationship=Not-in-family,Other-relative,Own-child,Unmarried
17800 1178 <=50K (0.93382022 0.06617978)
4) Capital.Gain< 7073.5 17482 872 <=50K (0.95012012 0.04987988) *
5) Capital.Gain>=7073.5 318 12 >50K (0.03773585 0.96226415) *
3) Relationship=Husband,Wife 14761 6663 <=50K (0.54860782 0.45139218)
6) Education=10th,11th,12th,1st-4th,5th-6th,7th-8th,9th,Assoc-acdm,
Assoc-voc,HS-grad,Preschool,Some-college
10329 3456 <=50K (0.66540807 0.33459193)
12) Capital.Gain< 5095.5 9807 2944 <=50K (0.69980626 0.30019374) *
13) Capital.Gain>=5095.5 522 10 >50K (0.01915709 0.98084291) *
7) Education=Bachelors,Doctorate,Masters,Prof-school 4432 1225 >50K
(0.27639892 0.72360108) *
> table(survey$Salary.Group)
<=50K >50K
24720 7841
|