diff --git a/AI_models_pranav/guardian_monitor_models.py b/AI_models_pranav/guardian_monitor_models.py new file mode 100644 index 000000000..d6f0fe43a --- /dev/null +++ b/AI_models_pranav/guardian_monitor_models.py @@ -0,0 +1,449 @@ +# -*- coding: utf-8 -*- +"""guardian monitor.ipynb + +Automatically generated by Colab. + +Original file is located at + https://colab.research.google.com/drive/1cJmORhXpr_fDa_lXvVAtHtH6M-Z-dbXy +""" + + + +"""**Reasoning**: +To concatenate all DataFrames in `df_list` into a single DataFrame named `data`, I will use the `pd.concat()` function. + + +""" + +import pandas as pd + +data = pd.concat(df_list, ignore_index=True) +data.head() + +data = data[data[23] != 0] +print("Missing values after removing activity label 0:\n", data.isnull().sum()) + +import numpy as np +from sklearn.preprocessing import StandardScaler + +# Separate features (X) and activity labels (y) +X = data.iloc[:, :-1].values +y = data.iloc[:, -1].values + +# Normalize features using StandardScaler +scaler = StandardScaler() +X = scaler.fit_transform(X) + +print("Shape of X (features):", X.shape) +print("Shape of y (labels):", y.shape) + +from sklearn.model_selection import train_test_split + +# Split data into training and testing sets +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) + +# Print the shapes of the resulting sets +print("Shape of X_train:", X_train.shape) +print("Shape of X_test:", X_test.shape) +print("Shape of y_train:", y_train.shape) +print("Shape of y_test:", y_test.shape) + +from sklearn.ensemble import RandomForestClassifier +from sklearn.svm import SVC +from sklearn.neighbors import KNeighborsClassifier +from sklearn.metrics import accuracy_score + +# 1. Random Forest Classifier +print("Training Random Forest Classifier...") +rf_classifier = RandomForestClassifier(random_state=42) +rf_classifier.fit(X_train, y_train) +y_pred_rf = rf_classifier.predict(X_test) +accuracy_rf = accuracy_score(y_test, y_pred_rf) +print(f"Random Forest Classifier Accuracy: {accuracy_rf:.4f}") + +# 2. Support Vector Machine (SVC) +print("\nTraining Support Vector Machine (SVC)...") +# For faster training, we can use a subset of the data or reduce C/gamma parameters +# However, sticking to the instructions, we will use the full dataset for now. +# Note: SVC can be computationally intensive on large datasets. +svc_classifier = SVC(random_state=42) +svc_classifier.fit(X_train, y_train) +y_pred_svc = svc_classifier.predict(X_test) +accuracy_svc = accuracy_score(y_test, y_pred_svc) +print(f"Support Vector Machine (SVC) Accuracy: {accuracy_svc:.4f}") + +# 3. K-Nearest Neighbors (KNN) Classifier +print("\nTraining K-Nearest Neighbors (KNN) Classifier...") +knn_classifier = KNeighborsClassifier(n_neighbors=5) +knn_classifier.fit(X_train, y_train) +y_pred_knn = knn_classifier.predict(X_test) +accuracy_knn = accuracy_score(y_test, y_pred_knn) +print(f"K-Nearest Neighbors (KNN) Classifier Accuracy: {accuracy_knn:.4f}") + +"""# Task +```python +from sklearn.metrics import classification_report, confusion_matrix +import matplotlib.pyplot as plt +import seaborn as sns + +# Identify the best-performing model +models = { + "Random Forest Classifier": accuracy_rf, + "Support Vector Machine (SVC)": accuracy_svc, + "K-Nearest Neighbors (KNN) Classifier": accuracy_knn +} + +best_model_name = max(models, key=models.get) +best_accuracy = models[best_model_name] + +print(f"The best-performing model is: {best_model_name} with an accuracy of {best_accuracy:.4f}\n") + +# Get predictions for the best model +if best_model_name == "Random Forest Classifier": + best_y_pred = y_pred_rf + best_model = rf_classifier +elif best_model_name == "Support Vector Machine (SVC)": + best_y_pred = y_pred_svc + best_model = svc_classifier +else: # K-Nearest Neighbors (KNN) Classifier + best_y_pred = y_pred_knn + best_model = knn_classifier + +# Generate and print the classification report for the best model +print(f"Classification Report for {best_model_name}:\n") +print(classification_report(y_test, best_y_pred)) + +# Generate and plot the confusion matrix for the best model +cm = confusion_matrix(y_test, best_y_pred) + +plt.figure(figsize=(10, 8)) +sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False) +plt.title(f'Confusion Matrix for {best_model_name}') +plt.xlabel('Predicted Label') +plt.ylabel('True Label') +plt.show() +``` +""" + +import matplotlib.pyplot as plt +import numpy as np +from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay + +# Accuracy bar chart +model_names = ["Random Forest", "SVM", "KNN"] +model_accuracies = [accuracy_rf, accuracy_svc, accuracy_knn] + +plt.figure(figsize=(8, 5)) +bars = plt.bar(model_names, model_accuracies) +plt.title("Model Accuracy Comparison") +plt.xlabel("Models") +plt.ylabel("Accuracy") +plt.ylim(0.98, 1.0) + +for bar, acc in zip(bars, model_accuracies): + plt.text( + bar.get_x() + bar.get_width() / 2, + bar.get_height(), + f"{acc:.4f}", + ha="center", + va="bottom" + ) + +plt.show() + +# Confusion matrix for best model +best_model_name = max( + { + "Random Forest": accuracy_rf, + "SVM": accuracy_svc, + "KNN": accuracy_knn + }, + key={ + "Random Forest": accuracy_rf, + "SVM": accuracy_svc, + "KNN": accuracy_knn + }.get +) + +if best_model_name == "Random Forest": + best_predictions = y_pred_rf +elif best_model_name == "SVM": + best_predictions = y_pred_svc +else: + best_predictions = y_pred_knn + +cm = confusion_matrix(y_test, best_predictions) + +plt.figure(figsize=(10, 8)) +disp = ConfusionMatrixDisplay(confusion_matrix=cm) +disp.plot(cmap="Blues", values_format="d") +plt.title(f"Confusion Matrix - {best_model_name}") +plt.show() + +import matplotlib.pyplot as plt +import numpy as np + +feature_importances = rf_classifier.feature_importances_ +feature_numbers = np.arange(1, len(feature_importances) + 1) + +plt.figure(figsize=(12, 6)) +plt.bar(feature_numbers, feature_importances) +plt.title("Random Forest Feature Importance") +plt.xlabel("Feature Number") +plt.ylabel("Importance") +plt.xticks(feature_numbers) +plt.show() + +activity_labels_map = { + 1: 'Standing', + 2: 'Sitting', + 3: 'Lying', + 4: 'Walking', + 5: 'Climbing Stairs', + 6: 'Waist Bends Forward', + 7: 'Frontal Elevation of Arms', + 8: 'Knees Bending (Crouching)', + 9: 'Cycling', + 10: 'Jogging', + 11: 'Running', + 12: 'Jump' +} + +target_names = [ + 'Standing', + 'Sitting', + 'Lying', + 'Walking', + 'Climbing Stairs', + 'Waist Bends Forward', + 'Frontal Elevation of Arms', + 'Knees Bending (Crouching)', + 'Cycling', + 'Jogging', + 'Running', + 'Jump' +] + +print("Activity labels map and target names created.") + +import pandas as pd +import numpy as np +from sklearn.preprocessing import StandardScaler +from sklearn.model_selection import train_test_split +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import classification_report +import os + +# --- Start: Reconstructing 'data' from previous steps --- +# List of data files expected +file_names = [ + "mHealth_subject1.log", "mHealth_subject2.log", "mHealth_subject3.log", + "mHealth_subject4.log", "mHealth_subject5.log", "mHealth_subject6.log", + "mHealth_subject7.log", "mHealth_subject8.log", "mHealth_subject9.log", + "mHealth_subject10.log" +] + +# !!! IMPORTANT: Ensure these files are uploaded to your Colab environment. +# Typically, files can be uploaded to the '/content/' directory. +# If your files are in a different directory (e.g., '/content/my_data/'), +# please update the `data_directory` variable below. +data_directory = "/content/" # <--- Adjust this path if your files are elsewhere! + +df_list = [] +for file in file_names: + try: + file_path = os.path.join(data_directory, file) + df = pd.read_csv(file_path, sep=r'\s+', header=None) + df_list.append(df) + except FileNotFoundError: + print(f"Warning: File {file_path} not found. Skipping.") + +# Combine all dataframes into a single one +if df_list: + data = pd.concat(df_list, ignore_index=True) +else: + # Raise a more informative error message + raise ValueError(f"No data files were loaded from '{data_directory}'. " + "Please ensure 'mHealth_subject*.log' files are uploaded " + "to the correct directory or adjust `data_directory` variable.") + +# Remove rows where the last column (activity label) is 0 +data = data[data.iloc[:, -1] != 0] + +# --- End: Reconstructing 'data' --- + +# Define activity labels map and target names (from cell 26078274 to ensure they are available) +activity_labels_map = { + 1: 'Standing', + 2: 'Sitting', + 3: 'Lying', + 4: 'Walking', + 5: 'Climbing Stairs', + 6: 'Waist Bends Forward', + 7: 'Frontal Elevation of Arms', + 8: 'Knees Bending (Crouching)', + 9: 'Cycling', + 10: 'Jogging', + 11: 'Running', + 12: 'Jump' +} + +target_names = [ + 'Standing', + 'Sitting', + 'Lying', + 'Walking', + 'Climbing Stairs', + 'Waist Bends Forward', + 'Frontal Elevation of Arms', + 'Knees Bending (Crouching)', + 'Cycling', + 'Jogging', + 'Running', + 'Jump' +] + +# Separate features (X) and activity labels (y) +X = data.iloc[:, :-1].values +y = data.iloc[:, -1].values + +# Normalize features using StandardScaler +scaler = StandardScaler() +X = scaler.fit_transform(X) + +# Split data into training and testing sets +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) + +# Train Random Forest Classifier and make predictions +rf_classifier = RandomForestClassifier(random_state=42) +rf_classifier.fit(X_train, y_train) +y_pred_rf = rf_classifier.predict(X_test) + +# Assign to best_y_pred for consistency with the original code structure +best_y_pred = y_pred_rf + +# Generate and print the classification report using target_names +print("Classification Report for Random Forest Classifier (with activity names):\n") +print(classification_report(y_test, best_y_pred, target_names=target_names)) + +import matplotlib.pyplot as plt +from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay + +# Compute the confusion matrix +cm = confusion_matrix(y_test, y_pred_rf) + +# Create a ConfusionMatrixDisplay object +disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=target_names) + +# Initialize a figure +plt.figure(figsize=(12, 10)) + +# Plot the confusion matrix +disp.plot(cmap='Blues', values_format='d', ax=plt.gca()) + +# Add a title +plt.title('Confusion Matrix - Random Forest Classifier with Activity Names') + +# Rotate x-axis labels for better readability +plt.xticks(rotation=90) +plt.yticks(rotation=0) + +# Display the plot +plt.show() + +import matplotlib.pyplot as plt +import numpy as np +from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score +from sklearn.ensemble import RandomForestClassifier +from sklearn.svm import SVC +from sklearn.neighbors import KNeighborsClassifier + +# --- Re-execute model training and prediction to ensure variables are defined --- +# This section is taken from cell f1ceae06 to ensure accuracy and prediction variables exist. +# X_train, X_test, y_train, y_test are assumed to be defined from previous steps (e.g., cell 58c97233). + +# 1. Random Forest Classifier +print("Re-training Random Forest Classifier...") +rf_classifier = RandomForestClassifier(random_state=42) +rf_classifier.fit(X_train, y_train) +y_pred_rf = rf_classifier.predict(X_test) +accuracy_rf = accuracy_score(y_test, y_pred_rf) +print(f"Random Forest Classifier Accuracy: {accuracy_rf:.4f}") + +# 2. Support Vector Machine (SVC) +print("\nRe-training Support Vector Machine (SVC)...") +svc_classifier = SVC(random_state=42) +svc_classifier.fit(X_train, y_train) +y_pred_svc = svc_classifier.predict(X_test) +accuracy_svc = accuracy_score(y_test, y_pred_svc) +print(f"Support Vector Machine (SVC) Accuracy: {accuracy_svc:.4f}") + +# 3. K-Nearest Neighbors (KNN) Classifier +print("\nRe-training K-Nearest Neighbors (KNN) Classifier...") +knn_classifier = KNeighborsClassifier(n_neighbors=5) +knn_classifier.fit(X_train, y_train) +y_pred_knn = knn_classifier.predict(X_test) +accuracy_knn = accuracy_score(y_test, y_pred_knn) +print(f"K-Nearest Neighbors (KNN) Classifier Accuracy: {accuracy_knn:.4f}") + +# --- End of re-execution of model training --- + +# Gather the accuracy scores (now guaranteed to be available) +print(f"\nRandom Forest Classifier Accuracy: {accuracy_rf:.4f}") +print(f"Support Vector Machine (SVC) Accuracy: {accuracy_svc:.4f}") +print(f"K-Nearest Neighbors (KNN) Classifier Accuracy: {accuracy_knn:.4f}") + +# Create lists for plotting +model_names = ["Random Forest", "SVM", "KNN"] +model_accuracies = [accuracy_rf, accuracy_svc, accuracy_knn] + +print("\nModel names for plotting:", model_names) +print("Model accuracies for plotting:", [f'{acc:.4f}' for acc in model_accuracies]) + +# Accuracy bar chart +plt.figure(figsize=(8, 5)) +bars = plt.bar(model_names, model_accuracies) +plt.title("Model Accuracy Comparison") +plt.xlabel("Models") +plt.ylabel("Accuracy") +plt.ylim(0.98, 1.0) + +for bar, acc in zip(bars, model_accuracies): + plt.text( + bar.get_x() + bar.get_width() / 2, + bar.get_height(), + f"{acc:.4f}", + ha="center", + va="bottom" + ) + +plt.show() + +# Confusion matrix for best model (retained for completeness of original cell) +best_model_name = max( + { + "Random Forest": accuracy_rf, + "SVM": accuracy_svc, + "KNN": accuracy_knn + }, + key={ + "Random Forest": accuracy_rf, + "SVM": accuracy_svc, + "KNN": accuracy_knn + }.get +) + +if best_model_name == "Random Forest": + best_predictions = y_pred_rf +elif best_model_name == "SVM": + best_predictions = y_pred_svc +else: + best_predictions = y_pred_knn + +cm = confusion_matrix(y_test, best_predictions) + +plt.figure(figsize=(10, 8)) +disp = ConfusionMatrixDisplay(confusion_matrix=cm) +disp.plot(cmap="Blues", values_format="d") +plt.title(f"Confusion Matrix - {best_model_name}") +plt.show() \ No newline at end of file diff --git a/AI_models_pranav/report.pdf b/AI_models_pranav/report.pdf new file mode 100644 index 000000000..004924894 Binary files /dev/null and b/AI_models_pranav/report.pdf differ