diff --git a/additional materials/transparencias/Part 8 - Deep Learning/Section 39 - Artificial Neural Networks (ANN)/ANN.key b/additional materials/transparencias/Part 8 - Deep Learning/Section 39 - Artificial Neural Networks (ANN)/ANN.key index 3d1b5da3..9c1b9765 100755 Binary files a/additional materials/transparencias/Part 8 - Deep Learning/Section 39 - Artificial Neural Networks (ANN)/ANN.key and b/additional materials/transparencias/Part 8 - Deep Learning/Section 39 - Artificial Neural Networks (ANN)/ANN.key differ diff --git a/additional materials/transparencias/Part 8 - Deep Learning/Section 40 - Convolutional Neural Networks (CNN)/CNN.key b/additional materials/transparencias/Part 8 - Deep Learning/Section 40 - Convolutional Neural Networks (CNN)/CNN.key index 4fb0ec68..e92508c6 100755 Binary files a/additional materials/transparencias/Part 8 - Deep Learning/Section 40 - Convolutional Neural Networks (CNN)/CNN.key and b/additional materials/transparencias/Part 8 - Deep Learning/Section 40 - Convolutional Neural Networks (CNN)/CNN.key differ diff --git a/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/data_preprocessing_template_new_version.py b/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/data_preprocessing_template_new_version.py new file mode 100755 index 00000000..5f56f20f --- /dev/null +++ b/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/data_preprocessing_template_new_version.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sun Apr 26 12:11:34 2020 + +@author: juangabriel +""" + + +# Plantilla de Pre Procesado + +# Cómo importar las librerias en Python + +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importar el Data set +dataset = pd.read_csv("Data.csv") +X = dataset.iloc[:, :-1].values +y = dataset.iloc[:, 3].values + +# Tratamiento de los NAs +from sklearn.impute import SimpleImputer +imputer = SimpleImputer(missing_values = np.nan, strategy = "mean", verbose=0) +imputer = imputer.fit(X[:,1:3]) +X[:, 1:3] = imputer.transform(X[:,1:3]) + +# Codificar datos categoricos +from sklearn.preprocessing import LabelEncoder, OneHotEncoder +from sklearn.compose import ColumnTransformer + +labelencoder_X = LabelEncoder() +X[:, 0] = labelencoder_X.fit_transform(X[:, 0]) + +ct = ColumnTransformer( + [('one_hot_encoder', OneHotEncoder(categories='auto'), [0])], + remainder='passthrough' +) + +X = np.array(ct.fit_transform(X), dtype=np.float) +labelencoder_y = LabelEncoder() +y = labelencoder_y.fit_transform(y) + + +# Dividir el data set en conjunto de entrenamiento y en conjunto de testing +from sklearn.model_selection import train_test_split +X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.2, random_state = 0) + +# Escalado de variables +from sklearn.preprocessing import StandardScaler +sc_X = StandardScaler() +X_train = sc_X.fit_transform(X_train) +X_test = sc_X.transform(X_test) \ No newline at end of file diff --git a/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/missing_data.py b/datasets/Part 1 - Data Preprocessing/Section 2 -------------------- Part 1 - Data Preprocessing --------------------/missing_data.py old mode 100644 new mode 100755 diff --git a/datasets/Part 10 - Model Selection & Boosting/Section 49 - XGBoost/xgboost.py b/datasets/Part 10 - Model Selection & Boosting/Section 49 - XGBoost/xgboost.py index fad51374..e86a8d87 100644 --- a/datasets/Part 10 - Model Selection & Boosting/Section 49 - XGBoost/xgboost.py +++ b/datasets/Part 10 - Model Selection & Boosting/Section 49 - XGBoost/xgboost.py @@ -30,25 +30,3 @@ X = onehotencoder.fit_transform(X).toarray() X = X[:, 1:] -# Dividir el data set en conjunto de entrenamiento y conjunto de testing -from sklearn.model_selection import train_test_split -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) - -# Ajustar el modelo XGBoost al Conjunto de Entrenamiento -from xgboost import XGBClassifier -classifier = XGBClassifier() -classifier.fit(X_train, y_train) - -# Predicción de los resultados con el Conjunto de Testing -y_pred = classifier.predict(X_test) - -# Elaborar una matriz de confusión -from sklearn.metrics import confusion_matrix -cm = confusion_matrix(y_test, y_pred) - -# Aplicar k-fold cross validation -from sklearn.model_selection import cross_val_score -accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10) -accuracies.mean() -accuracies.std() - diff --git a/datasets/Part 10 - Model Selection & Boosting/Section 49 - XGBoost/xgboost_new_version.py b/datasets/Part 10 - Model Selection & Boosting/Section 49 - XGBoost/xgboost_new_version.py new file mode 100755 index 00000000..e57dcb9d --- /dev/null +++ b/datasets/Part 10 - Model Selection & Boosting/Section 49 - XGBoost/xgboost_new_version.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sun May 5 09:58:24 2019 + +@author: juangabriel +""" + +# XGBoost +# Las instrucciones de instalación se pueden consultar en http://xgboost.readthedocs.io/en/latest/build.html + +# Cómo importar las librerías +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importar el data set +dataset = pd.read_csv('Churn_Modelling.csv') + +X = dataset.iloc[:, 3:13].values +y = dataset.iloc[:, 13].values + +# Codificar datos categóricos +from sklearn.preprocessing import LabelEncoder, OneHotEncoder +from sklearn.compose import ColumnTransformer + +labelencoder_X_1 = LabelEncoder() +X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1]) +labelencoder_X_2 = LabelEncoder() +X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2]) +ct = ColumnTransformer( + [('one_hot_encoder', OneHotEncoder(categories='auto'), [2])], + remainder='passthrough' +) +X = np.array(ct.fit_transform(X), dtype=np.float) +X = X[:, 1:] + + +# Dividir el data set en conjunto de entrenamiento y conjunto de testing +from sklearn.model_selection import train_test_split +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) + +# Ajustar el modelo XGBoost al Conjunto de Entrenamiento +from xgboost import XGBClassifier +classifier = XGBClassifier() +classifier.fit(X_train, y_train) + +# Predicción de los resultados con el Conjunto de Testing +y_pred = classifier.predict(X_test) + +# Elaborar una matriz de confusión +from sklearn.metrics import confusion_matrix +cm = confusion_matrix(y_test, y_pred) + +# Aplicar k-fold cross validation +from sklearn.model_selection import cross_val_score +accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10) +accuracies.mean() +accuracies.std() + diff --git a/datasets/Part 2 - Regression/Section 5 - Multiple Linear Regression/multiple_linear_regression_new_version.py b/datasets/Part 2 - Regression/Section 5 - Multiple Linear Regression/multiple_linear_regression_new_version.py new file mode 100755 index 00000000..b5057462 --- /dev/null +++ b/datasets/Part 2 - Regression/Section 5 - Multiple Linear Regression/multiple_linear_regression_new_version.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sun Mar 3 13:10:07 2019 + +@author: juangabriel +""" + +# Regresión Lineal Múltiple + +# Cómo importar las librerías +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importar el data set +dataset = pd.read_csv('50_Startups.csv') +X = dataset.iloc[:, :-1].values +y = dataset.iloc[:, 4].values + + +# Codificar datos categóricos +from sklearn.preprocessing import LabelEncoder, OneHotEncoder +from sklearn.compose import make_column_transformer + +labelencoder_X = LabelEncoder() +X[:, 3] = labelencoder_X.fit_transform(X[:, 3]) +onehotencoder = make_column_transformer((OneHotEncoder(), [3]), remainder = "passthrough") +X = onehotencoder.fit_transform(X) + +# Evitar la trampa de las variables ficticias +X = X[:, 1:] + +# Dividir el data set en conjunto de entrenamiento y conjunto de testing +from sklearn.model_selection import train_test_split +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) + + +# Escalado de variables +"""from sklearn.preprocessing import StandardScaler +sc_X = StandardScaler() +X_train = sc_X.fit_transform(X_train) +X_test = sc_X.transform(X_test)""" + +# Ajustar el modelo de Regresión lineal múltiple con el conjunto de entrenamiento +from sklearn.linear_model import LinearRegression +regression = LinearRegression() +regression.fit(X_train, y_train) + +# Predicción de los resultados en el conjunto de testing +y_pred = regression.predict(X_test) + +# Construir el modelo óptimo de RLM utilizando la Eliminación hacia atrás +import statsmodels.api as sm +X = np.append(arr = np.ones((50,1)).astype(int), values = X, axis = 1) +SL = 0.05 + +#Se ha añadido el modificador .tolist() al X_opt para adaptarse a Python 3.7 + +X_opt = X[:, [0, 1, 2, 3, 4, 5]] +regression_OLS = sm.OLS(endog = y, exog = X_opt.tolist()).fit() +regression_OLS.summary() + +X_opt = X[:, [0, 1, 3, 4, 5]] +regression_OLS = sm.OLS(endog = y, exog = X_opt.tolist()).fit() +regression_OLS.summary() + +X_opt = X[:, [0, 3, 4, 5]] +regression_OLS = sm.OLS(endog = y, exog = X_opt.tolist()).fit() +regression_OLS.summary() + +X_opt = X[:, [0, 3, 5]] +regression_OLS = sm.OLS(endog = y, exog = X_opt.tolist()).fit() +regression_OLS.summary() + +X_opt = X[:, [0, 3]] +regression_OLS = sm.OLS(endog = y, exog = X_opt.tolist()).fit() +regression_OLS.summary() diff --git a/datasets/Part 2 - Regression/Section 8 - Decision Tree Regression/decission_tree_regression_new_version.py b/datasets/Part 2 - Regression/Section 8 - Decision Tree Regression/decission_tree_regression_new_version.py new file mode 100755 index 00000000..9df04e49 --- /dev/null +++ b/datasets/Part 2 - Regression/Section 8 - Decision Tree Regression/decission_tree_regression_new_version.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Mar 7 19:04:40 2019 + +@author: juangabriel +""" + +# Regresión con Árboles de Decisión + +# Cómo importar las librerías +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importar el data set +dataset = pd.read_csv('Position_Salaries.csv') +X = dataset.iloc[:, 1:2].values +y = dataset.iloc[:, 2].values + + +# Dividir el data set en conjunto de entrenamiento y conjunto de testing +""" +from sklearn.model_selection import train_test_split +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) +""" + +# Escalado de variables +"""from sklearn.preprocessing import StandardScaler +sc_X = StandardScaler() +X_train = sc_X.fit_transform(X_train) +X_test = sc_X.transform(X_test)""" + +# Ajustar la regresión con el dataset +from sklearn.tree import DecisionTreeRegressor +regression = DecisionTreeRegressor(random_state = 0) +regression.fit(X, y) + +# Predicción de nuestros modelos +y_pred = regression.predict([[6.5]]) +print(y_pred) + +# Visualización de los resultados del Modelo Polinómico +X_grid = np.arange(min(X), max(X), 0.1) +X_grid = X_grid.reshape(len(X_grid), 1) +plt.scatter(X, y, color = "red") +plt.plot(X, regression.predict(X), color = "blue") +plt.title("Modelo de Regresión") +plt.xlabel("Posición del empleado") +plt.ylabel("Sueldo (en $)") +plt.show() + + diff --git a/datasets/Part 2 - Regression/Section 9 - Random Forest Regression/random_forest_regression_new_version.py b/datasets/Part 2 - Regression/Section 9 - Random Forest Regression/random_forest_regression_new_version.py new file mode 100755 index 00000000..58a6fab1 --- /dev/null +++ b/datasets/Part 2 - Regression/Section 9 - Random Forest Regression/random_forest_regression_new_version.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Mar 11 19:53:04 2019 + +@author: juangabriel +""" + +# Regresión Bosques Aleatorios + +# Cómo importar las librerías +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importar el data set +dataset = pd.read_csv('Position_Salaries.csv') +X = dataset.iloc[:, 1:2].values +y = dataset.iloc[:, 2].values + + +# Dividir el data set en conjunto de entrenamiento y conjunto de testing +""" +from sklearn.model_selection import train_test_split +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) +""" + +# Escalado de variables +"""from sklearn.preprocessing import StandardScaler +sc_X = StandardScaler() +X_train = sc_X.fit_transform(X_train) +X_test = sc_X.transform(X_test)""" + +# Ajustar el Random Forest con el dataset +from sklearn.ensemble import RandomForestRegressor +regression = RandomForestRegressor(n_estimators = 300, random_state = 0) +regression.fit(X, y) + +# Predicción de nuestros modelos con Random Forest +y_pred = regression.predict([[6.5]]) + +# Visualización de los resultados del Random Forest +X_grid = np.arange(min(X), max(X), 0.01) +X_grid = X_grid.reshape(len(X_grid), 1) +plt.scatter(X, y, color = "red") +plt.plot(X_grid, regression.predict(X_grid), color = "blue") +plt.title("Modelo de Regresión con Random Forest") +plt.xlabel("Posición del empleado") +plt.ylabel("Sueldo (en $)") +plt.show() + + diff --git a/datasets/Part 5 - Association Rule Learning/Section 28 - Apriori/Apriori_Python/apriori.py b/datasets/Part 5 - Association Rule Learning/Section 28 - Apriori/Apriori_Python/apriori.py index 4d3dad6a..c8d63fae 100644 --- a/datasets/Part 5 - Association Rule Learning/Section 28 - Apriori/Apriori_Python/apriori.py +++ b/datasets/Part 5 - Association Rule Learning/Section 28 - Apriori/Apriori_Python/apriori.py @@ -28,4 +28,4 @@ # Visualización de los resultados results = list(rules) -results[4] \ No newline at end of file +print(results[4]) \ No newline at end of file diff --git a/datasets/Part 5 - Association Rule Learning/Section 28 - Apriori/apriori.R b/datasets/Part 5 - Association Rule Learning/Section 28 - Apriori/apriori.R index b6184b3c..ae971160 100644 --- a/datasets/Part 5 - Association Rule Learning/Section 28 - Apriori/apriori.R +++ b/datasets/Part 5 - Association Rule Learning/Section 28 - Apriori/apriori.R @@ -2,7 +2,9 @@ # Preprocesado de Datos #install.packages("arules") -library(arules) +library(arules) +library(arulesViz) + dataset = read.csv("Market_Basket_Optimisation.csv", header = FALSE) dataset = read.transactions("Market_Basket_Optimisation.csv", sep = ",", rm.duplicates = TRUE) @@ -16,5 +18,7 @@ rules = apriori(data = dataset, # Visualización de los resultados inspect(sort(rules, by = 'lift')[1:10]) - +plot(rules, method = "graph", engine = "htmlwidget") + + \ No newline at end of file diff --git a/datasets/Part 7 - Natural Language Processing/Section 36 - Natural Language Processing/natural_language_processing.py b/datasets/Part 7 - Natural Language Processing/Section 36 - Natural Language Processing/natural_language_processing.py index 64952727..fb36a1d6 100644 --- a/datasets/Part 7 - Natural Language Processing/Section 36 - Natural Language Processing/natural_language_processing.py +++ b/datasets/Part 7 - Natural Language Processing/Section 36 - Natural Language Processing/natural_language_processing.py @@ -56,4 +56,5 @@ from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) +print(cm) (55+91)/200 \ No newline at end of file diff --git a/datasets/Part 8 - Deep Learning/Section 39 - Artificial Neural Networks (ANN)/ann_new_version.py b/datasets/Part 8 - Deep Learning/Section 39 - Artificial Neural Networks (ANN)/ann_new_version.py new file mode 100755 index 00000000..cc1d87ed --- /dev/null +++ b/datasets/Part 8 - Deep Learning/Section 39 - Artificial Neural Networks (ANN)/ann_new_version.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sun Apr 28 10:38:56 2019 + +@author: juangabriel +""" + +# Redes Neuronales Artificales + +# Instalar Theano +# pip install --upgrade --no-deps git+git://github.com/Theano/Theano.git + +# Instalar Tensorflow y Keras +# conda install -c conda-forge keras + +# Parte 1 - Pre procesado de datos + + +# Cómo importar las librerías +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd + +# Importar el data set +dataset = pd.read_csv('Churn_Modelling.csv') + +X = dataset.iloc[:, 3:13].values +y = dataset.iloc[:, 13].values + +# Codificar datos categóricos +from sklearn.preprocessing import LabelEncoder, OneHotEncoder +from sklearn.compose import ColumnTransformer + +labelencoder_X_1 = LabelEncoder() +X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1]) +labelencoder_X_2 = LabelEncoder() +X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2]) +onehotencoder = ColumnTransformer( + [('one_hot_encoder', OneHotEncoder(categories='auto'), [1])], + remainder='passthrough' +) +X = onehotencoder.fit_transform(X) +X = X[:, 1:] + +# Dividir el data set en conjunto de entrenamiento y conjunto de testing +from sklearn.model_selection import train_test_split +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0) + +# Escalado de variables +from sklearn.preprocessing import StandardScaler +sc_X = StandardScaler() +X_train = sc_X.fit_transform(X_train) +X_test = sc_X.transform(X_test) + + +# Parte 2 - Construir la RNA + +# Importar Keras y librerías adicionales +import keras +from keras.models import Sequential +from keras.layers import Dense + +# Inicializar la RNA +classifier = Sequential() + +# Añadir las capas de entrada y primera capa oculta +classifier.add(Dense(units = 6, kernel_initializer = "uniform", + activation = "relu", input_dim = 11)) + +# Añadir la segunda capa oculta +classifier.add(Dense(units = 6, kernel_initializer = "uniform", activation = "relu")) + +# Añadir la capa de salida +classifier.add(Dense(units = 1, kernel_initializer = "uniform", activation = "sigmoid")) + +# Compilar la RNA +classifier.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"]) + +# Ajustamos la RNA al Conjunto de Entrenamiento +classifier.fit(X_train, y_train, batch_size = 10, epochs = 100) + + +# Parte 3 - Evaluar el modelo y calcular predicciones finales + +# Predicción de los resultados con el Conjunto de Testing +y_pred = classifier.predict(X_test) +y_pred = (y_pred>0.5) +# Elaborar una matriz de confusión +from sklearn.metrics import confusion_matrix +cm = confusion_matrix(y_test, y_pred) diff --git a/datasets/Part 8 - Deep Learning/Section 40 - Convolutional Neural Networks (CNN)/cnn.py b/datasets/Part 8 - Deep Learning/Section 40 - Convolutional Neural Networks (CNN)/cnn.py index 1df16aa6..90830de1 100644 --- a/datasets/Part 8 - Deep Learning/Section 40 - Convolutional Neural Networks (CNN)/cnn.py +++ b/datasets/Part 8 - Deep Learning/Section 40 - Convolutional Neural Networks (CNN)/cnn.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +y#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Apr 29 19:20:59 2019 diff --git a/datasets/Part 9 - Dimensionality Reduction/Section 45 - Kernel PCA/kernel_pca_old.R b/datasets/Part 9 - Dimensionality Reduction/Section 45 - Kernel PCA/kernel_pca_old.R deleted file mode 100644 index 101124b9..00000000 --- a/datasets/Part 9 - Dimensionality Reduction/Section 45 - Kernel PCA/kernel_pca_old.R +++ /dev/null @@ -1,74 +0,0 @@ -# Kernel PCA - -# Importing the dataset -dataset = read.csv('Social_Network_Ads.csv') -dataset = dataset[, 3:5] - -# Splitting the dataset into the Training set and Test set -# install.packages('caTools') -library(caTools) -set.seed(123) -split = sample.split(dataset$Purchased, SplitRatio = 0.75) -training_set = subset(dataset, split == TRUE) -test_set = subset(dataset, split == FALSE) - -# Feature Scaling -training_set[, 1:2] = scale(training_set[, 1:2]) -test_set[, 1:2] = scale(test_set[, 1:2]) - -# Applying Kernel PCA -# install.packages('kernlab') -library(kernlab) -kpca = kpca(~., data = training_set[-3], kernel = 'rbfdot', features = 2) -training_set_pca = as.data.frame(predict(kpca, training_set)) -training_set_pca$Purchased = training_set$Purchased -test_set_pca = as.data.frame(predict(kpca, test_set)) -test_set_pca$Purchased = test_set$Purchased - -# Fitting Logistic Regression to the Training set -classifier = glm(formula = Purchased ~ ., - family = binomial, - data = training_set_pca) - -# Predicting the Test set results -prob_pred = predict(classifier, type = 'response', newdata = test_set_pca[-3]) -y_pred = ifelse(prob_pred > 0.5, 1, 0) - -# Making the Confusion Matrix -cm = table(test_set_pca[, 3], y_pred) - -# Visualising the Training set results -install.packages('ElemStatLearn') -library(ElemStatLearn) -set = training_set_pca -X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) -X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) -grid_set = expand.grid(X1, X2) -colnames(grid_set) = c('V1', 'V2') -prob_set = predict(classifier, type = 'response', newdata = grid_set) -y_grid = ifelse(prob_set > 0.5, 1, 0) -plot(set[, -3], - main = 'Logistic Regression (Training set)', - xlab = 'PC1', ylab = 'PC2', - xlim = range(X1), ylim = range(X2)) -contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) -points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) -points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) - -# Visualising the Test set results -# install.packages('ElemStatLearn') -library(ElemStatLearn) -set = test_set_pca -X1 = seq(min(set[, 1]) - 1, max(set[, 1]) + 1, by = 0.01) -X2 = seq(min(set[, 2]) - 1, max(set[, 2]) + 1, by = 0.01) -grid_set = expand.grid(X1, X2) -colnames(grid_set) = c('V1', 'V2') -prob_set = predict(classifier, type = 'response', newdata = grid_set) -y_grid = ifelse(prob_set > 0.5, 1, 0) -plot(set[, -3], - main = 'Logistic Regression (Test set)', - xlab = 'Age', ylab = 'Estimated Salary', - xlim = range(X1), ylim = range(X2)) -contour(X1, X2, matrix(as.numeric(y_grid), length(X1), length(X2)), add = TRUE) -points(grid_set, pch = '.', col = ifelse(y_grid == 1, 'springgreen3', 'tomato')) -points(set, pch = 21, bg = ifelse(set[, 3] == 1, 'green4', 'red3')) \ No newline at end of file diff --git a/datasets/Part 9 - Dimensionality Reduction/Section 45 - Kernel PCA/kernel_pca_old.py b/datasets/Part 9 - Dimensionality Reduction/Section 45 - Kernel PCA/kernel_pca_old.py deleted file mode 100644 index 30e96826..00000000 --- a/datasets/Part 9 - Dimensionality Reduction/Section 45 - Kernel PCA/kernel_pca_old.py +++ /dev/null @@ -1,75 +0,0 @@ -# Kernel PCA - -# Importing the libraries -import numpy as np -import matplotlib.pyplot as plt -import pandas as pd - -# Importing the dataset -dataset = pd.read_csv('Social_Network_Ads.csv') -X = dataset.iloc[:, [2, 3]].values -y = dataset.iloc[:, 4].values - -# Splitting the dataset into the Training set and Test set -from sklearn.model_selection import train_test_split -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) - -# Feature Scaling -from sklearn.preprocessing import StandardScaler -sc = StandardScaler() -X_train = sc.fit_transform(X_train) -X_test = sc.transform(X_test) - -# Applying Kernel PCA -from sklearn.decomposition import KernelPCA -kpca = KernelPCA(n_components = 2, kernel = 'rbf') -X_train = kpca.fit_transform(X_train) -X_test = kpca.transform(X_test) - -# Fitting Logistic Regression to the Training set -from sklearn.linear_model import LogisticRegression -classifier = LogisticRegression(random_state = 0) -classifier.fit(X_train, y_train) - -# Predicting the Test set results -y_pred = classifier.predict(X_test) - -# Making the Confusion Matrix -from sklearn.metrics import confusion_matrix -cm = confusion_matrix(y_test, y_pred) - -# Visualising the Training set results -from matplotlib.colors import ListedColormap -X_set, y_set = X_train, y_train -X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), - np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) -plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), - alpha = 0.75, cmap = ListedColormap(('red', 'green'))) -plt.xlim(X1.min(), X1.max()) -plt.ylim(X2.min(), X2.max()) -for i, j in enumerate(np.unique(y_set)): - plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], - c = ListedColormap(('red', 'green'))(i), label = j) -plt.title('Logistic Regression (Training set)') -plt.xlabel('Age') -plt.ylabel('Estimated Salary') -plt.legend() -plt.show() - -# Visualising the Test set results -from matplotlib.colors import ListedColormap -X_set, y_set = X_test, y_test -X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), - np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) -plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), - alpha = 0.75, cmap = ListedColormap(('red', 'green'))) -plt.xlim(X1.min(), X1.max()) -plt.ylim(X2.min(), X2.max()) -for i, j in enumerate(np.unique(y_set)): - plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], - c = ListedColormap(('red', 'green'))(i), label = j) -plt.title('Logistic Regression (Test set)') -plt.xlabel('Age') -plt.ylabel('Estimated Salary') -plt.legend() -plt.show() \ No newline at end of file