diff --git a/app.py b/app.py
index e16ff7a..ec5cea5 100644
--- a/app.py
+++ b/app.py
@@ -1,30 +1,10 @@
-
-# Importing required library
-import streamlit as st
-import pandas as pd
-
-#import upload_validate() from data validation
-
-# ==============================
-# app.py - AlgoLab Main Script
-# ----------------------------
-# - Handles UI and navigation
-# - Dataset Upload & Generation
-# - Calls interactive_model_tuning()
-# ==============================
-
import streamlit as st
import pandas as pd
-from supervised_module import interactive_model_tuning
-
+from unsupervised_algos.kmeans_clustering import run_kmeans
+from unsupervised_algos.dbscan_clustering import run_dbscan
from data_handler.upload_validate import upload_and_validate
from sklearn.datasets import make_classification
-
-
-
-# Page configuration
-=======
-# ✅ Page configuration
+from supervised_module import interactive_model_tuning
st.set_page_config(
page_title="Algo Lab",
@@ -33,10 +13,8 @@
initial_sidebar_state="expanded"
)
-# ✅ App Title
st.title("🔬 Algo Labs - Visualize and Learn")
-# ✅ Motivational Quote Box
st.markdown("""
.footer {
diff --git a/docs/dbscan.md b/docs/dbscan.md
new file mode 100644
index 0000000..533ceeb
--- /dev/null
+++ b/docs/dbscan.md
@@ -0,0 +1,53 @@
+
+# DBSCAN Clustering
+
+## 📌 What is DBSCAN?
+
+DBSCAN (Density-Based Spatial Clustering of Applications with Noise) is an unsupervised clustering algorithm based on the **density** of points.
+
+It can find clusters of **arbitrary shapes** and identify outliers (noise).
+
+---
+
+## 🛠️ Parameters
+
+| Parameter | Description |
+|-----------|-------------|
+| `eps` | Maximum distance between two samples to be considered neighbors |
+| `min_samples` | Minimum number of neighbors to form a dense region |
+
+---
+
+## 🎯 Output
+
+- **Clustered Scatter Plot**
+- **Silhouette Score** (if valid)
+- **Clustered Data Preview**
+
+---
+
+## 🧑💻 How It Works in Streamlit UI
+
+1. Upload a `.csv` file with 2 numeric columns
+2. Select `eps` and `min_samples` via sliders
+3. Click **"Run DBSCAN"**
+4. Get visual clusters and evaluation score (if possible)
+
+---
+
+## ⚠️ Special Note
+
+If DBSCAN finds only one cluster or marks all points as noise, silhouette score cannot be calculated.
+
+---
+
+## ✅ Example CSV Format
+
+```csv
+x,y
+1,2
+2,3
+3,4
+8,7
+9,6
+10,8
diff --git a/docs/kmeans.md b/docs/kmeans.md
new file mode 100644
index 0000000..b8082e5
--- /dev/null
+++ b/docs/kmeans.md
@@ -0,0 +1,45 @@
+# KMeans Clustering
+
+## 📌 What is KMeans?
+
+KMeans is a centroid-based unsupervised clustering algorithm that groups data into **k clusters**. It tries to minimize the distance between data points and their respective cluster centers.
+
+---
+
+## 🛠️ Parameters
+
+| Parameter | Description |
+|-----------|-------------|
+| `n_clusters` | Number of clusters to form |
+| `random_state` | Ensures reproducibility |
+| `n_init='auto'` | Stable initialization |
+
+---
+
+## 🎯 Output
+
+- **Clustered Scatter Plot**
+- **Silhouette Score** to evaluate cluster separation
+- **Preview of Clustered Data**
+
+---
+
+## 🧑💻 How It Works in Streamlit UI
+
+1. Upload a `.csv` file with 2 numeric columns (e.g., x, y)
+2. Select the number of clusters using a slider
+3. Click **"Run KMeans Clustering"**
+4. Get visual clusters and evaluation score
+
+---
+
+## ✅ Example CSV Format
+
+```csv
+x,y
+1,2
+2,3
+3,4
+8,7
+9,6
+10,8
diff --git a/sample_data.csv b/sample_data.csv
new file mode 100644
index 0000000..c189873
--- /dev/null
+++ b/sample_data.csv
@@ -0,0 +1,7 @@
+x,y
+1,2
+2,3
+3,4
+8,7
+9,6
+10,8
diff --git a/unsupervised_algos/dbscan_clustering.py b/unsupervised_algos/dbscan_clustering.py
new file mode 100644
index 0000000..cbdec50
--- /dev/null
+++ b/unsupervised_algos/dbscan_clustering.py
@@ -0,0 +1,74 @@
+import streamlit as st
+from sklearn.cluster import DBSCAN
+from sklearn.metrics import silhouette_score
+import matplotlib.pyplot as plt
+import uuid
+import pandas as pd
+
+def run_dbscan(data: pd.DataFrame):
+ st.subheader("DBSCAN Clustering")
+
+ # Session state variables
+ if 'dbscan_eps' not in st.session_state:
+ st.session_state.dbscan_eps = 0.5
+ if 'dbscan_min_samples' not in st.session_state:
+ st.session_state.dbscan_min_samples = 5
+ if 'dbscan_score' not in st.session_state:
+ st.session_state.dbscan_score = None
+ if 'dbscan_plot_fig' not in st.session_state:
+ st.session_state.dbscan_plot_fig = None
+ if 'dbscan_clustered_data' not in st.session_state:
+ st.session_state.dbscan_clustered_data = None
+
+ # UI form
+ with st.form(key="dbscan_form"):
+ eps = st.slider("Epsilon (eps)", 0.1, 5.0, st.session_state.dbscan_eps, step=0.1)
+ min_samples = st.slider("Min Samples", 1, 20, st.session_state.dbscan_min_samples)
+ submitted = st.form_submit_button("Run DBSCAN")
+
+ if submitted:
+ st.write("✅ Inside DBSCAN submit")
+
+ input_data = data.copy()
+ input_data = input_data.apply(pd.to_numeric, errors='coerce')
+ input_data.dropna(inplace=True)
+
+ # Drop old cluster column
+ if "Cluster" in input_data.columns:
+ input_data.drop("Cluster", axis=1, inplace=True)
+
+ model = DBSCAN(eps=eps, min_samples=min_samples)
+ labels = model.fit_predict(input_data)
+ input_data["Cluster"] = labels
+
+ st.session_state.dbscan_eps = eps
+ st.session_state.dbscan_min_samples = min_samples
+ st.session_state.dbscan_clustered_data = input_data.head()
+
+ if len(set(labels)) > 1 and -1 not in set(labels):
+ score = silhouette_score(input_data.iloc[:, :-1], labels)
+ st.session_state.dbscan_score = score
+ else:
+ st.session_state.dbscan_score = "Not enough clusters to compute score"
+
+ # Plot
+ fig, ax = plt.subplots(figsize=(6, 4))
+ ax.scatter(input_data.iloc[:, 0], input_data.iloc[:, 1], c=labels, cmap='plasma')
+ ax.set_title("DBSCAN Clustering")
+ ax.set_xlabel("X")
+ ax.set_ylabel("Y")
+ st.session_state.dbscan_plot_fig = fig
+
+ # Result display (outside submit)
+ if st.session_state.dbscan_score is not None:
+ if isinstance(st.session_state.dbscan_score, str):
+ st.warning(st.session_state.dbscan_score)
+ else:
+ st.write(f"### Silhouette Score: {st.session_state.dbscan_score:.2f}")
+
+ if st.session_state.dbscan_plot_fig:
+ st.pyplot(st.session_state.dbscan_plot_fig)
+ plt.close(st.session_state.dbscan_plot_fig)
+
+ if st.session_state.dbscan_clustered_data is not None:
+ st.write("### Clustered Data", st.session_state.dbscan_clustered_data)
diff --git a/unsupervised_algos/kmeans_clustering.py b/unsupervised_algos/kmeans_clustering.py
new file mode 100644
index 0000000..1e656c9
--- /dev/null
+++ b/unsupervised_algos/kmeans_clustering.py
@@ -0,0 +1,69 @@
+import streamlit as st
+from sklearn.cluster import KMeans
+from sklearn.metrics import silhouette_score
+import matplotlib.pyplot as plt
+import pandas as pd # Added for type hinting and clarity
+
+def run_kmeans(data: pd.DataFrame):
+ st.subheader("KMeans Clustering")
+
+ # Session state variables को इनिशियलाइज़ करें ताकि परिणाम बने रहें
+ if 'silhouette_score' not in st.session_state:
+ st.session_state.silhouette_score = None
+ if 'kmeans_plot_fig' not in st.session_state:
+ st.session_state.kmeans_plot_fig = None
+ if 'clustered_data_head' not in st.session_state:
+ st.session_state.clustered_data_head = None
+ if 'kmeans_n_clusters' not in st.session_state: # पिछली बार उपयोग किए गए n_clusters को स्टोर करने के लिए
+ st.session_state.kmeans_n_clusters = 3 # डिफ़ॉल्ट मान
+
+ # st.form के लिए एक स्थिर key का उपयोग करें
+ with st.form(key="kmeans_params_form"):
+ # स्लाइडर का डिफ़ॉल्ट मान session_state से लें
+ n_clusters = st.slider("Select number of clusters", 2, 10, st.session_state.kmeans_n_clusters, key="kmeans_n_clusters_slider")
+ submitted = st.form_submit_button("Run KMeans Clustering")
+ st.write("Data Types:", data.dtypes)
+
+ if submitted:
+ st.write("✅ Inside submit block")
+
+ # इनपुट डेटा की कॉपी बनाएं
+ input_data = data.copy()
+
+ # यदि 'Cluster' कॉलम पहले से मौजूद है तो उसे हटा दें
+ if "Cluster" in input_data.columns:
+ input_data.drop("Cluster", axis=1, inplace=True)
+
+ # KMeans फिट करें
+ # KMeans के लिए n_init='auto' जोड़ा गया ताकि भविष्य की चेतावनी से बचा जा सके
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
+ kmeans.fit(input_data)
+
+ input_data["Cluster"] = kmeans.labels_
+
+ # सिलुएट स्कोर की गणना करें और session_state में स्टोर करें
+ score = silhouette_score(input_data.iloc[:, :-1], input_data["Cluster"])
+ st.session_state.silhouette_score = score
+ st.session_state.kmeans_n_clusters = n_clusters # चुने गए n_clusters को स्टोर करें
+
+ # प्लॉट बनाएं और figure को session_state में स्टोर करें
+ fig, ax = plt.subplots(figsize=(6, 4))
+ ax.scatter(input_data.iloc[:, 0], input_data.iloc[:, 1], c=input_data["Cluster"], cmap='viridis')
+ ax.set_xlabel("Feature 1")
+ ax.set_ylabel("Feature 2")
+ ax.set_title("KMeans Clustering")
+ st.session_state.kmeans_plot_fig = fig
+
+ # क्लस्टर किए गए डेटा का head session_state में स्टोर करें
+ st.session_state.clustered_data_head = input_data.head()
+
+ # परिणाम 'if submitted' ब्लॉक के बाहर प्रदर्शित करें, लेकिन तभी जब वे session_state में मौजूद हों
+ if st.session_state.silhouette_score is not None:
+ st.write(f"### Silhouette Score: {st.session_state.silhouette_score:.2f}")
+
+ if st.session_state.kmeans_plot_fig is not None:
+ st.pyplot(st.session_state.kmeans_plot_fig)
+ plt.close(st.session_state.kmeans_plot_fig) # मेमोरी लीक से बचने के लिए figure को बंद करें
+
+ if st.session_state.clustered_data_head is not None:
+ st.write("### Clustered Data", st.session_state.clustered_data_head)
\ No newline at end of file