bayesian optimization

ACSE-vg822 · Sep 13, 2024 · 80fdab1 · 80fdab1
1 parent d2022ba
commit 80fdab1
Show file tree

Hide file tree

Showing 6 changed files with 89 additions and 48 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,7 +5,7 @@ __pycache__/
 
 # C extensions
 *.so
-
+bayesian_opt_logs/
 # Distribution / packaging
 .Python
 build/

diff --git a/artifacts/errors.txt b/artifacts/errors.txt
@@ -1 +1 @@
-Threshold: 0.007301321548107711
+Threshold: 0.004547533124656054
diff --git a/artifacts/model_new.keras b/artifacts/model_new.keras
diff --git a/requirements.txt b/requirements.txt
@@ -3,6 +3,7 @@ numpy
 pandas
 scikit-learn
 tensorflow
+keras-tuner
 seaborn
 matplotlib
 Flask

diff --git a/src/components/data_ingestion.py b/src/components/data_ingestion.py
@@ -10,9 +10,7 @@
 
 from sklearn.preprocessing import MinMaxScaler
 
-from src.components.data_transformation import DataTransformation
-#from src.components.data_transformation import DataTransformationConfig
-from src.components.model_trainer import ModelTRainer
+from src.components.model_trainer import ModelTrainer
 
 @dataclass
 class DataIngestionConfig:
@@ -74,5 +72,5 @@ def initiate_data_ingestion(self):
     obj = DataIngestion()
     train_data, test_data = obj.initiate_data_ingestion()
 
-    modeltrainer = ModelTRainer()
+    modeltrainer = ModelTrainer()
     print(modeltrainer.initiate_model_trainer(train_data, test_data))
diff --git a/src/components/model_trainer.py b/src/components/model_trainer.py
@@ -3,8 +3,9 @@
 from dataclasses import dataclass
 
 from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import LSTM, Dense, RepeatVector, TimeDistributed
+from tensorflow.keras.layers import LSTM, Dense, RepeatVector
 from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
+from kerastuner.tuners import BayesianOptimization
 
 from src.exception import CustomException
 from src.logger import logging
@@ -17,76 +18,117 @@ class ModelTrainerConfig:
     trained_model_file_path = os.path.join("artifacts", "model_new.keras")
     errors_file_path = os.path.join("artifacts", "errors.txt")
 
-class ModelTRainer:
+class ModelTrainer:
     def __init__(self):
         self.model_trainer_config = ModelTrainerConfig() 
 
+    def build_model(self, hp, input_shape):
+        """Build the LSTM Autoencoder model using hyperparameters from Bayesian Optimization."""
+        # Build the LSTM Autoencoder model
+        model = Sequential([
+            LSTM(units=hp.Int('units_1', min_value=32, max_value=256, step=32), return_sequences=True, input_shape=input_shape),
+            LSTM(units=hp.Int('units_2', min_value=32, max_value=128, step=32), return_sequences=False),
+            RepeatVector(input_shape[0]),  # Repeat the output across all time steps
+            LSTM(units=hp.Int('units_3', min_value=32, max_value=128, step=32), return_sequences=True),
+            LSTM(units=hp.Int('units_4', min_value=32, max_value=64, step=32), return_sequences=True),
+            Dense(input_shape[1])  # Output layer with the same number of features as input
+        ])
+
+        # Compile the model with the selected optimizer
+        model.compile(
+            optimizer=hp.Choice('optimizer', values=['adam', 'rmsprop']),
+            loss='mse'
+        )
+        return model
+
+    def bayesian_optimization_tuning(self, X_train, X_val):
+        """Perform Bayesian Optimization for the LSTM Autoencoder model."""
+        input_shape = (X_train.shape[1], X_train.shape[2])
+
+        tuner = BayesianOptimization(
+            lambda hp: self.build_model(hp, input_shape),  # The model building function
+            objective='val_loss',  # Minimizing validation loss (MSE)
+            max_trials=3,  # Number of hyperparameter settings to try
+            directory='bayesian_opt_logs',  # Directory where the logs will be saved
+            project_name='lstm_autoencoder'
+        )
+
+        # Early Stopping callback to stop training if no improvement after patience epochs
+        early_stopping = EarlyStopping(
+            monitor='val_loss',
+            patience=3,
+            restore_best_weights=True
+        )
+
+        # Search for the best hyperparameters
+        tuner.search(
+            X_train, X_train,
+            epochs=25,
+            validation_data=(X_val, X_val),
+            callbacks=[early_stopping],
+            batch_size=32
+        )
+
+        # Get the best hyperparameters
+        return tuner
+
+    def get_callbacks(self):
+        """Generate the list of callbacks used during model training."""
+        # Early stopping callback
+        early_stopping = EarlyStopping(
+            monitor='val_loss',  # Monitor validation loss
+            patience=3,  # Number of epochs with no improvement before stopping
+            restore_best_weights=True  # Restore model weights from the epoch with the best validation loss
+        )
+
+        # Model checkpoint callback
+        model_checkpoint = ModelCheckpoint(
+            filepath=self.model_trainer_config.trained_model_file_path,  # File path to save the best model
+            monitor='val_loss',  # Save model with the best validation loss
+            save_best_only=True,  # Save only the best model
+            save_weights_only=False,  # Save the full model (architecture + weights)
+            verbose=1
+        )
+
+        return [early_stopping, model_checkpoint]
+
     def initiate_model_trainer(self, train_data, test_data):
         try:
             logging.info("Split training and test input data")
 
             X_train = train_data
             X_test = test_data
 
-            logging.info("Building the LSTM model")
+            logging.info("Performing Bayesian Optimization")
+
+            tuner = self.bayesian_optimization_tuning(X_train, X_test)
 
-             # Build the LSTM Autoencoder model
-            model = Sequential([
-                LSTM(128, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
-                LSTM(64, return_sequences=False),  # Output only the last time step
-                RepeatVector(X_train.shape[1]),  # Repeat the output across all time steps
-                LSTM(64, return_sequences=True),
-                LSTM(32, return_sequences=True),
-                Dense(X_train.shape[2])  # Output layer with the same number of features as input
-            ])
+            # Retrieve the best model from tuning
+            best_model = tuner.get_best_models(num_models=1)[0]
 
             # Compile the model
-            model.compile(loss='mse', optimizer='adam')  # Mean Squared Error loss function
+            best_model.compile(loss='mse', optimizer='adam')  # Mean Squared Error loss function
             logging.info("Model compiled successfully.")
 
-            # Callbacks for early stopping, reducing learning rate, and saving the best model
-            early_stopping = EarlyStopping(
-                monitor='val_loss',  # Monitor validation loss
-                patience=3,  # Number of epochs with no improvement before stopping
-                restore_best_weights=True  # Restore model weights from the epoch with the best validation loss
-            )
-
-            reduce_lr = ReduceLROnPlateau(
-                monitor='val_loss',  # Reduce learning rate when validation loss stops improving
-                factor=0.5,  # Reduce learning rate by this factor
-                patience=3,  # Number of epochs with no improvement before reducing the learning rate
-                min_lr=1e-6  # Lower bound on the learning rate
-            )
-
-            model_checkpoint = ModelCheckpoint(
-                filepath=self.model_trainer_config.trained_model_file_path,  # File path to save the best model
-                monitor='val_loss',  # Save model with the best validation loss
-                save_best_only=True,  # Save only the best model
-                save_weights_only=False,  # Save the full model (architecture + weights)
-                verbose=1
-            )
-
             # Train the model
             logging.info("Starting model training")
-            history = model.fit(
+            history = best_model.fit(
                 X_train, X_train,  # In anomaly detection, we train the model to reconstruct the input
                 epochs=50,  # Number of epochs can be adjusted as needed
                 batch_size=32,
                 validation_data=(X_test, X_test),  # Validation on the test set
-                callbacks = [early_stopping, reduce_lr, model_checkpoint]
+                callbacks=self.get_callbacks()
             )
 
             logging.info("Model training completed.")
 
             # Save the trained model
-            # model.save(self.model_trainer_config.trained_model_file_path)
             logging.info(f"Model saved at {self.model_trainer_config.trained_model_file_path}")
 
             # Make predictions
             logging.info("Generating predictions for training and test data")
-            train_predictions = model.predict(X_train)
-            test_predictions = model.predict(X_test)
-            logging.info((X_train.shape, X_test.shape))
+            train_predictions = best_model.predict(X_train)
+            test_predictions = best_model.predict(X_test)
 
             # Calculate reconstruction errors for training and test data
             train_reconstruction_error = np.mean(np.square(X_train - train_predictions), axis=(1, 2))
@@ -106,4 +148,4 @@ def initiate_model_trainer(self, train_data, test_data):
             return self.model_trainer_config.trained_model_file_path
 
         except Exception as e:
-            raise CustomException(e,sys)
+            raise CustomException(e, sys)
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,7 +5,7 @@ __pycache__/ @@
     # C extensions
     *.so
+    bayesian_opt_logs/
     # Distribution / packaging
     .Python
     build/
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		Threshold: 0.007301321548107711
		Threshold: 0.004547533124656054