Skip to content

Commit ee353ce

Browse files
committed
Implemented kernel PCA in R
1 parent e1fb774 commit ee353ce

File tree

1 file changed

+77
-0
lines changed

1 file changed

+77
-0
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# Importing the dataset
2+
# ---------------------
3+
dataset = read.csv('../../data_files/Social_Network_Ads.csv')
4+
dataset = dataset[, 3:5]
5+
6+
# Splitting the dataset into the Training set and Test set
7+
# --------------------------------------------------------
8+
# install.packages('caTools')
9+
library(caTools)
10+
set.seed(123)
11+
split = sample.split(dataset$Purchased, SplitRatio = 0.75)
12+
training_set = subset(dataset, split==TRUE)
13+
test_set = subset(dataset, split==FALSE)
14+
15+
# Feature scaling
16+
# ---------------
17+
training_set[, 1:2] = scale(training_set[, 1:2])
18+
test_set[, 1:2] = scale(test_set[, 1:2])
19+
20+
# Applying Kernel PCA
21+
# -------------------
22+
# install.packages('kernlab')
23+
library(kernlab)
24+
kpca = kpca(~., data=training_set[-3], kernel='rbfdot', features=2)
25+
training_set_pca = as.data.frame(predict(kpca, training_set))
26+
training_set_pca$Purchased = training_set$Purchased
27+
test_set_pca = as.data.frame(predict(kpca, test_set))
28+
test_set_pca$Purchased = test_set$Purchased
29+
30+
# Fitting Logisitic Regression to training set
31+
#---------------------------------------------
32+
classifier = glm(formula = Purchased ~ .,
33+
family = binomial,
34+
data = training_set_pca)
35+
36+
# Predicting the Test set results
37+
#--------------------------------
38+
prob_pred = predict(classifier, type='response', newdata=test_set_pca[-3])
39+
y_pred = ifelse(prob_pred > 0.5, 1, 0)
40+
41+
# Making the Confusion Matrix
42+
cm = table(test_set_pca[, 3], y_pred)
43+
44+
# Visualizing the Training set results
45+
#-------------------------------------
46+
library(ElemStatLearn)
47+
set = training_set_pca
48+
X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by=0.01)
49+
X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by=0.01)
50+
grid_set = expand.grid(X1, X2)
51+
colnames(grid_set) = c('V1', 'V2')
52+
prob_set = predict(classifier, type='response', newdata=grid_set)
53+
y_grid = ifelse(prob_set > 0.5, 1, 0)
54+
plot(set[, -3],
55+
main='Logistic Regression (Training Set)',
56+
xlab='Age', ylab='Estimated Salary',
57+
xlim=range(X1), ylim=range(X2))
58+
contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add=TRUE)
59+
points(grid_set, pch='.', col=ifelse(y_grid == 1, 'springgreen3', 'tomato'))
60+
points(set, pch=21, bg=ifelse(set[, 3] == 1, 'green4', 'red3'))
61+
62+
# Visualizing the Test set results
63+
#---------------------------------
64+
set = test_set_pca
65+
X1 = seq(min(set[, 1]) - 1, max(set[, 1] + 1, by=0.01))
66+
X2 = seq(min(set[, 2]) - 1, max(set[, 2] + 1, by=0.01))
67+
grid_set = expand.grid(X1, X2)
68+
colnames(grid_set) = c('V1', 'V2')
69+
prob_set = predict(classifier, type='response', newdata=grid_set)
70+
y_grid = ifelse(prob_set > 0.5, 1, 0)
71+
plot(set[, -3],
72+
main='Logistic Regression (Test Set)',
73+
xlab='Age', ylab='Estimated Salary',
74+
xlim=range(X1), ylim=range(X2))
75+
contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add=TRUE)
76+
points(grid_set, pch='.', col=ifelse(y_grid == 1, 'springgreen3', 'tomato'))
77+
points(set, pch=21, bg=ifelse(set[, 3] == 1, 'green4', 'red3'))

0 commit comments

Comments
 (0)