manasvi-0 · ravicoder01 · Jul 28, 2025 · Jul 28, 2025 · Jul 29, 2025 · Aug 6, 2025
diff --git a/app.py b/app.py
@@ -1,30 +1,10 @@
-
-# Importing required library
-import streamlit as st
-import pandas as  pd
-
-#import upload_validate() from data validation
-
-# ==============================
-#  app.py - AlgoLab Main Script
-#  ----------------------------
-#  - Handles UI and navigation
-#  - Dataset Upload & Generation
-#  - Calls interactive_model_tuning()
-# ==============================
-
 import streamlit as st
 import pandas as pd
-from supervised_module import interactive_model_tuning
-
+from unsupervised_algos.kmeans_clustering import run_kmeans
+from unsupervised_algos.dbscan_clustering import run_dbscan
 from data_handler.upload_validate import upload_and_validate
 from sklearn.datasets import make_classification
-
-
-
-# Page configuration
-=======
-# ✅ Page configuration
+from supervised_module import interactive_model_tuning
 
 st.set_page_config(
     page_title="Algo Lab",
@@ -33,10 +13,8 @@
     initial_sidebar_state="expanded"
 )
 
-# ✅ App Title
 st.title("🔬 Algo Labs - Visualize and Learn")
 
-# ✅ Motivational Quote Box
 st.markdown("""
 <div style='padding: 12px; border-left: 5px solid black;
             background-color: rgba(74, 144, 226, 0.1);
@@ -46,55 +24,18 @@
 </div>
 """, unsafe_allow_html=True)
 
-# ✅ Tabs for navigation
 tab1, tab2, tab3 = st.tabs(["Home Page", "Supervised Learning", "Unsupervised Learning"])
 
-
-with tab1:
-    st.write("Veiw Dataframe")
-
-#Supervised  Learning
-with tab2:
-    st.write("Supervised  Learning")
-    options = ["KNN", "Decision Tree", "Logestic Regression","SVM"]
-    selected_option = st.selectbox("Choose an option:", options)
-
-    st.write("You have  selected:", selected_option)
-
-    # KNN Option selection
-    #if selected_option=="KNN":
-     #view = st.radio("Choose View", ["KNN Overview", "KNN Playground"])
-     #if view == "KNN Overview":
-        #from supervised_algo.KNN import knn_theory
-        #knn_theory.render()
-     #elif view == "KNN Playground":
-         #from supervised_algo.KNN import knn_visualization
-         #knn_visualization.render()
-
-#Unsupervised Learning
-with tab3:
-    from unsupervised_algorithms.unsupervised_module import unsupervised
-    # Store uploaded data in session state for unsupervised algorithms
-    if 'df' in locals() and df is not None:
-        st.session_state.uploaded_data = df
-    unsupervised()
-
-# ✅ Global variable to store dataset
 df = None
 
-# ==============================
-# 📂 Sidebar - Upload or Generate Dataset
-# ==============================
 with st.sidebar:
     st.header("📂 Dataset Options")
     options = ["Upload Dataset", "Generate Dataset"]
     selected_option = st.radio("Choose your preferred option:", options, index=0)
 
-    # ✅ Upload dataset with validation
     if selected_option == "Upload Dataset":
         df = upload_and_validate()
 
-    # ✅ Generate synthetic dataset
     elif selected_option == "Generate Dataset":
         no_of_sample = st.slider("No. of Samples", 10, 2000, 100)
         no_of_feature = st.slider("No. of Features", 2, 20, 2)
@@ -117,9 +58,6 @@
             st.success("✅ Dataset Generated Successfully!")
             st.dataframe(df.head())
 
-# ==============================
-# 🏠 Tab 1: Home Page
-# ==============================
 with tab1:
     st.write("Welcome to AlgoLab! 👋")
     if df is not None:
@@ -128,23 +66,36 @@
     else:
         st.info("Upload or generate a dataset to preview here.")
 
-# ==============================
-# 🤖 Tab 2: Supervised Learning
-# ==============================
 with tab2:
     st.write("### Supervised Learning Playground")
     if df is not None:
         interactive_model_tuning(df)
     else:
         st.info("Upload or generate a dataset first to start tuning models.")
 
-# ==============================
-# 🚧 Tab 3: Unsupervised Learning
-# ==============================
 with tab3:
-    st.write("Unsupervised module is under development.")
+    st.write("### Unsupervised Learning")
+
+    if df is not None:
+        st.write("### Data Preview", df.head())
+
+        if 'selected_algo' not in st.session_state:
+            st.session_state.selected_algo = "KMeans"
+
+        st.session_state.selected_algo = st.selectbox(
+            "Choose Clustering Algorithm",
+            ["KMeans", "DBSCAN"],
+            index=["KMeans", "DBSCAN"].index(st.session_state.selected_algo)
+        )
+
+        if st.session_state.selected_algo == "KMeans":
+            run_kmeans(df)
+        elif st.session_state.selected_algo == "DBSCAN":
+            run_dbscan(df)
+    else:
+        st.info("Please upload or generate a dataset from the sidebar to use Unsupervised Learning algorithms.")
+
 
-# Footer
 st.markdown("""
 <style>
 .footer {

diff --git a/docs/dbscan.md b/docs/dbscan.md
@@ -0,0 +1,53 @@
+
+# DBSCAN Clustering
+
+## 📌 What is DBSCAN?
+
+DBSCAN (Density-Based Spatial Clustering of Applications with Noise) is an unsupervised clustering algorithm based on the **density** of points.
+
+It can find clusters of **arbitrary shapes** and identify outliers (noise).
+
+---
+
+## 🛠️ Parameters
+
+| Parameter | Description |
+|-----------|-------------|
+| `eps` | Maximum distance between two samples to be considered neighbors |
+| `min_samples` | Minimum number of neighbors to form a dense region |
+
+---
+
+## 🎯 Output
+
+- **Clustered Scatter Plot**
+- **Silhouette Score** (if valid)
+- **Clustered Data Preview**
+
+---
+
+## 🧑‍💻 How It Works in Streamlit UI
+
+1. Upload a `.csv` file with 2 numeric columns
+2. Select `eps` and `min_samples` via sliders
+3. Click **"Run DBSCAN"**
+4. Get visual clusters and evaluation score (if possible)
+
+---
+
+## ⚠️ Special Note
+
+If DBSCAN finds only one cluster or marks all points as noise, silhouette score cannot be calculated.
+
+---
+
+## ✅ Example CSV Format
+
+```csv
+x,y
+1,2
+2,3
+3,4
+8,7
+9,6
+10,8
diff --git a/docs/kmeans.md b/docs/kmeans.md
@@ -0,0 +1,45 @@
+# KMeans Clustering
+
+## 📌 What is KMeans?
+
+KMeans is a centroid-based unsupervised clustering algorithm that groups data into **k clusters**. It tries to minimize the distance between data points and their respective cluster centers.
+
+---
+
+## 🛠️ Parameters
+
+| Parameter | Description |
+|-----------|-------------|
+| `n_clusters` | Number of clusters to form |
+| `random_state` | Ensures reproducibility |
+| `n_init='auto'` | Stable initialization |
+
+---
+
+## 🎯 Output
+
+- **Clustered Scatter Plot**
+- **Silhouette Score** to evaluate cluster separation
+- **Preview of Clustered Data**
+
+---
+
+## 🧑‍💻 How It Works in Streamlit UI
+
+1. Upload a `.csv` file with 2 numeric columns (e.g., x, y)
+2. Select the number of clusters using a slider
+3. Click **"Run KMeans Clustering"**
+4. Get visual clusters and evaluation score
+
+---
+
+## ✅ Example CSV Format
+
+```csv
+x,y
+1,2
+2,3
+3,4
+8,7
+9,6
+10,8
diff --git a/sample_data.csv b/sample_data.csv
@@ -0,0 +1,7 @@
+x,y
+1,2
+2,3
+3,4
+8,7
+9,6
+10,8
diff --git a/unsupervised_algos/dbscan_clustering.py b/unsupervised_algos/dbscan_clustering.py
@@ -0,0 +1,74 @@
+import streamlit as st
+from sklearn.cluster import DBSCAN
+from sklearn.metrics import silhouette_score
+import matplotlib.pyplot as plt
+import uuid
+import pandas as pd
+
+def run_dbscan(data: pd.DataFrame):
+    st.subheader("DBSCAN Clustering")
+
+    # Session state variables
+    if 'dbscan_eps' not in st.session_state:
+        st.session_state.dbscan_eps = 0.5
+    if 'dbscan_min_samples' not in st.session_state:
+        st.session_state.dbscan_min_samples = 5
+    if 'dbscan_score' not in st.session_state:
+        st.session_state.dbscan_score = None
+    if 'dbscan_plot_fig' not in st.session_state:
+        st.session_state.dbscan_plot_fig = None
+    if 'dbscan_clustered_data' not in st.session_state:
+        st.session_state.dbscan_clustered_data = None
+
+    # UI form
+    with st.form(key="dbscan_form"):
+        eps = st.slider("Epsilon (eps)", 0.1, 5.0, st.session_state.dbscan_eps, step=0.1)
+        min_samples = st.slider("Min Samples", 1, 20, st.session_state.dbscan_min_samples)
+        submitted = st.form_submit_button("Run DBSCAN")
+
+        if submitted:
+            st.write("✅ Inside DBSCAN submit")
+
+            input_data = data.copy()
+            input_data = input_data.apply(pd.to_numeric, errors='coerce')
+            input_data.dropna(inplace=True)
+
+            # Drop old cluster column
+            if "Cluster" in input_data.columns:
+                input_data.drop("Cluster", axis=1, inplace=True)
+
+            model = DBSCAN(eps=eps, min_samples=min_samples)
+            labels = model.fit_predict(input_data)
+            input_data["Cluster"] = labels
+
+            st.session_state.dbscan_eps = eps
+            st.session_state.dbscan_min_samples = min_samples
+            st.session_state.dbscan_clustered_data = input_data.head()
+
+            if len(set(labels)) > 1 and -1 not in set(labels):
+                score = silhouette_score(input_data.iloc[:, :-1], labels)
+                st.session_state.dbscan_score = score
+            else:
+                st.session_state.dbscan_score = "Not enough clusters to compute score"
+
+            # Plot
+            fig, ax = plt.subplots(figsize=(6, 4))
+            ax.scatter(input_data.iloc[:, 0], input_data.iloc[:, 1], c=labels, cmap='plasma')
+            ax.set_title("DBSCAN Clustering")
+            ax.set_xlabel("X")
+            ax.set_ylabel("Y")
+            st.session_state.dbscan_plot_fig = fig
+
+    # Result display (outside submit)
+    if st.session_state.dbscan_score is not None:
+        if isinstance(st.session_state.dbscan_score, str):
+            st.warning(st.session_state.dbscan_score)
+        else:
+            st.write(f"### Silhouette Score: {st.session_state.dbscan_score:.2f}")
+
+    if st.session_state.dbscan_plot_fig:
+        st.pyplot(st.session_state.dbscan_plot_fig)
+        plt.close(st.session_state.dbscan_plot_fig)
+
+    if st.session_state.dbscan_clustered_data is not None:
+        st.write("### Clustered Data", st.session_state.dbscan_clustered_data)
-Original file line number
+Diff line change
@@ -0,0 +1,7 @@
+    x,y
+,2
+,3
+,4
+,7
+,6
+,8