|
| 1 | +# Importing the dataset |
| 2 | +# --------------------- |
| 3 | +dataset = read.csv('../../data_files/Social_Network_Ads.csv') |
| 4 | +dataset = dataset[, 3:5] |
| 5 | + |
| 6 | +# Splitting the dataset into the Training set and Test set |
| 7 | +# -------------------------------------------------------- |
| 8 | +# install.packages('caTools') |
| 9 | +library(caTools) |
| 10 | +set.seed(123) |
| 11 | +split = sample.split(dataset$Purchased, SplitRatio = 0.75) |
| 12 | +training_set = subset(dataset, split==TRUE) |
| 13 | +test_set = subset(dataset, split==FALSE) |
| 14 | + |
| 15 | +# Feature scaling |
| 16 | +# --------------- |
| 17 | +training_set[, 1:2] = scale(training_set[, 1:2]) |
| 18 | +test_set[, 1:2] = scale(test_set[, 1:2]) |
| 19 | + |
| 20 | +# Applying Kernel PCA |
| 21 | +# ------------------- |
| 22 | +# install.packages('kernlab') |
| 23 | +library(kernlab) |
| 24 | +kpca = kpca(~., data=training_set[-3], kernel='rbfdot', features=2) |
| 25 | +training_set_pca = as.data.frame(predict(kpca, training_set)) |
| 26 | +training_set_pca$Purchased = training_set$Purchased |
| 27 | +test_set_pca = as.data.frame(predict(kpca, test_set)) |
| 28 | +test_set_pca$Purchased = test_set$Purchased |
| 29 | + |
| 30 | +# Fitting Logisitic Regression to training set |
| 31 | +#--------------------------------------------- |
| 32 | +classifier = glm(formula = Purchased ~ ., |
| 33 | + family = binomial, |
| 34 | + data = training_set_pca) |
| 35 | + |
| 36 | +# Predicting the Test set results |
| 37 | +#-------------------------------- |
| 38 | +prob_pred = predict(classifier, type='response', newdata=test_set_pca[-3]) |
| 39 | +y_pred = ifelse(prob_pred > 0.5, 1, 0) |
| 40 | + |
| 41 | +# Making the Confusion Matrix |
| 42 | +cm = table(test_set_pca[, 3], y_pred) |
| 43 | + |
| 44 | +# Visualizing the Training set results |
| 45 | +#------------------------------------- |
| 46 | +library(ElemStatLearn) |
| 47 | +set = training_set_pca |
| 48 | +X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by=0.01) |
| 49 | +X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by=0.01) |
| 50 | +grid_set = expand.grid(X1, X2) |
| 51 | +colnames(grid_set) = c('V1', 'V2') |
| 52 | +prob_set = predict(classifier, type='response', newdata=grid_set) |
| 53 | +y_grid = ifelse(prob_set > 0.5, 1, 0) |
| 54 | +plot(set[, -3], |
| 55 | + main='Logistic Regression (Training Set)', |
| 56 | + xlab='Age', ylab='Estimated Salary', |
| 57 | + xlim=range(X1), ylim=range(X2)) |
| 58 | +contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add=TRUE) |
| 59 | +points(grid_set, pch='.', col=ifelse(y_grid == 1, 'springgreen3', 'tomato')) |
| 60 | +points(set, pch=21, bg=ifelse(set[, 3] == 1, 'green4', 'red3')) |
| 61 | + |
| 62 | +# Visualizing the Test set results |
| 63 | +#--------------------------------- |
| 64 | +set = test_set_pca |
| 65 | +X1 = seq(min(set[, 1]) - 1, max(set[, 1] + 1, by=0.01)) |
| 66 | +X2 = seq(min(set[, 2]) - 1, max(set[, 2] + 1, by=0.01)) |
| 67 | +grid_set = expand.grid(X1, X2) |
| 68 | +colnames(grid_set) = c('V1', 'V2') |
| 69 | +prob_set = predict(classifier, type='response', newdata=grid_set) |
| 70 | +y_grid = ifelse(prob_set > 0.5, 1, 0) |
| 71 | +plot(set[, -3], |
| 72 | + main='Logistic Regression (Test Set)', |
| 73 | + xlab='Age', ylab='Estimated Salary', |
| 74 | + xlim=range(X1), ylim=range(X2)) |
| 75 | +contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add=TRUE) |
| 76 | +points(grid_set, pch='.', col=ifelse(y_grid == 1, 'springgreen3', 'tomato')) |
| 77 | +points(set, pch=21, bg=ifelse(set[, 3] == 1, 'green4', 'red3')) |
0 commit comments