|
| 1 | +# Importing the dataset |
| 2 | +# --------------------- |
| 3 | +dataset = read.csv('../../data_files/Churn_Modelling.csv') |
| 4 | +dataset = dataset[4:14] |
| 5 | + |
| 6 | +# Encoding the categorical variables as factor |
| 7 | +dataset$Geography = as.numeric(factor(dataset$Geography, |
| 8 | + levels=c('France', 'Spain', 'Germany'), |
| 9 | + labels=c(1, 2, 3))) |
| 10 | +dataset$Gender = as.numeric(factor(dataset$Gender, |
| 11 | + levels=c('Female', 'Male'), |
| 12 | + labels=c(1, 2))) |
| 13 | + |
| 14 | +# Splitting the dataset into the Training set and Test set |
| 15 | +# -------------------------------------------------------- |
| 16 | +# install.packages('caTools') |
| 17 | +library(caTools) |
| 18 | +set.seed(123) |
| 19 | +split = sample.split(dataset$Exited, SplitRatio = 0.8) |
| 20 | +training_set = subset(dataset, split==TRUE) |
| 21 | +test_set = subset(dataset, split==FALSE) |
| 22 | + |
| 23 | +# Fitting XGBoost to the Training set |
| 24 | +# ----------------------------------- |
| 25 | +library(xgboost) |
| 26 | +classifier = xgboost( |
| 27 | + data=as.matrix(training_set[-11]), |
| 28 | + label=training_set$Exited, |
| 29 | + nrounds=10 |
| 30 | +) |
| 31 | + |
| 32 | +# Applying k-Fold Cross Validation |
| 33 | +library(caret) |
| 34 | +folds = createFolds(training_set$Purchased, k=10) |
| 35 | +cv = lapply(folds, function(x) { |
| 36 | + training_fold = training_set[-x, ] |
| 37 | + test_fold = training_set[x, ] |
| 38 | + classifier = xgboost( |
| 39 | + data=as.matrix(training_set[-11]), |
| 40 | + label=training_set$Exited, |
| 41 | + nrounds=10 |
| 42 | + ) |
| 43 | + y_pred = predict(classifier, newdata=as.matrix(test_fold[-11])) |
| 44 | + y_pred = (y_pred >= 0.5) |
| 45 | + cm = table(test_fold[, 3], y_pred) |
| 46 | + accuracy = (cm[1, 1] + cm[2, 2]) / (cm[1, 1] + cm[2, 2] + cm[1, 2] + cm[2, 1]) |
| 47 | + return(accuracy) |
| 48 | +}) |
| 49 | +accuracy = mean(as.numeric(cv)) |
0 commit comments