-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLabCustomerChurnDecisionTree.R
71 lines (70 loc) · 3.48 KB
/
LabCustomerChurnDecisionTree.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
###Set up###
rm(list=ls())
require(rpart) #this is the recursive partitioning package
require(rattle) # the fancyRpartPlot and asRules functions at the end of this script are in the rattle package
###get data and explore###
churndata<-read.table("churndata.csv",header=T,sep=",")
names(churndata)
str(churndata)
summary(churndata)
par(mfrow=c(4, 4)) #tell R we want a 4x4 grid of plots on the same screen
for(i in c(2,4:17)) {hist(churndata[,i], xlab=names(churndata)[i],main=names(churndata)[i])}
###transform variables and deal with missing values###
churndata$area<-factor(churndata$area)
churndata<-na.omit(churndata)
###partition data into training, validate and test subsets (60/20/20)###
set.seed(527)
nobs <- nrow(churndata)
trainrows <- sample(nobs, 0.6* nobs)
validaterows <- sample(setdiff(seq_len(nobs), trainrows), 0.2* nobs)
testrows <- setdiff(setdiff(seq_len(nobs), trainrows), validaterows)
length(union(testrows,union(validaterows,trainrows)))
length(intersect(trainrows,intersect(validaterows,testrows)))
train<-churndata[trainrows,]
validate<-churndata[validaterows,]
test<-churndata[testrows,]
###create and examine classification model with cp=0, minsplit=2,minbucket=1 (we'll prune the tree later) ###
rpart<-rpart(churn ~ .,data=train, method="class",parms=list(split="information"),control=rpart.control(usesurrogate=0, maxsurrogate=0,cp=0, minsplit=2,minbucket=1))
print(rpart)
printcp(rpart)
windows() #open new graphics window
fancyRpartPlot(rpart, main="Customer Churn Prediction Model")
predict <- predict(rpart, newdata=train, type="class")
table(train$churn, predict,dnn=c("Actual", "Predicted"))
round(100*table(train$churn, predict,dnn=c("% Actual", "% Predicted"))/length(predict))
###evaluate predictive power using validate dataset###
predict <- predict(rpart, newdata=validate, type="class")
table(validate$churn, predict,dnn=c("Actual", "Predicted"))
round(100*table(validate$churn, predict,dnn=c("% Actual", "% Predicted"))/length(predict))
###prune classification tree###
rpart$cptable
xerr<-rpart$cptable[,"xerror"]
minxerr<-which(xerr==min(xerr))
minxerr
mincp<-rpart$cptable[minxerr,"CP"]
mincp
rpart.prune<-prune(rpart,cp=mincp)
###compare pruned/original models on validate dataset###
rpart.prune$cptable
windows() #open new graphics window
fancyRpartPlot(rpart.prune, main="Pruned Customer Churn Prediction Model")
predict <- predict(rpart.prune, newdata=validate, type="class")
table(validate$churn, predict,dnn=c("Actual", "Predicted"))
round(100*table(validate$churn, predict,dnn=c("% Actual", "% Predicted"))/length(predict))
###evaluate predictive power using test dataset###
asRules(rpart.prune)
predict <- predict(rpart.prune, newdata=test, type="class")
table(test$churn, predict,dnn=c("Actual", "Predicted"))
round(100*table(test$churn, predict,dnn=c("% Actual", "% Predicted"))/length(predict))
###score new dataset and create file###
newdata<-read.table("newcustomerdata.csv",header=T,sep=",")
names(newdata)
str(newdata)
newdata$area<-factor(newdata$area)
summary(newdata)
par(mfrow=c(4, 4)) #tell R we want a 4x4 grid of plots on the same screen
for(i in c(3:16)) {hist(newdata[,i], xlab=names(newdata)[i],main=names(newdata)[i])}
newdata<-na.omit(newdata)
predict <- predict(rpart.prune, newdata=newdata, type="class")
newdata$churn<-predict #add to newdata frame
write.table(newdata,file="prediction.csv",sep=",")