Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 24 additions & 73 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,10 @@

# Importing required library
import streamlit as st
import pandas as pd

#import upload_validate() from data validation

# ==============================
# app.py - AlgoLab Main Script
# ----------------------------
# - Handles UI and navigation
# - Dataset Upload & Generation
# - Calls interactive_model_tuning()
# ==============================

import streamlit as st
import pandas as pd
from supervised_module import interactive_model_tuning

from unsupervised_algos.kmeans_clustering import run_kmeans
from unsupervised_algos.dbscan_clustering import run_dbscan
from data_handler.upload_validate import upload_and_validate
from sklearn.datasets import make_classification



# Page configuration
=======
# βœ… Page configuration
from supervised_module import interactive_model_tuning

st.set_page_config(
page_title="Algo Lab",
Expand All @@ -33,10 +13,8 @@
initial_sidebar_state="expanded"
)

# βœ… App Title
st.title("πŸ”¬ Algo Labs - Visualize and Learn")

# βœ… Motivational Quote Box
st.markdown("""
<div style='padding: 12px; border-left: 5px solid black;
background-color: rgba(74, 144, 226, 0.1);
Expand All @@ -46,55 +24,18 @@
</div>
""", unsafe_allow_html=True)

# βœ… Tabs for navigation
tab1, tab2, tab3 = st.tabs(["Home Page", "Supervised Learning", "Unsupervised Learning"])


with tab1:
st.write("Veiw Dataframe")

#Supervised Learning
with tab2:
st.write("Supervised Learning")
options = ["KNN", "Decision Tree", "Logestic Regression","SVM"]
selected_option = st.selectbox("Choose an option:", options)

st.write("You have selected:", selected_option)

# KNN Option selection
#if selected_option=="KNN":
#view = st.radio("Choose View", ["KNN Overview", "KNN Playground"])
#if view == "KNN Overview":
#from supervised_algo.KNN import knn_theory
#knn_theory.render()
#elif view == "KNN Playground":
#from supervised_algo.KNN import knn_visualization
#knn_visualization.render()

#Unsupervised Learning
with tab3:
from unsupervised_algorithms.unsupervised_module import unsupervised
# Store uploaded data in session state for unsupervised algorithms
if 'df' in locals() and df is not None:
st.session_state.uploaded_data = df
unsupervised()

# βœ… Global variable to store dataset
df = None

# ==============================
# πŸ“‚ Sidebar - Upload or Generate Dataset
# ==============================
with st.sidebar:
st.header("πŸ“‚ Dataset Options")
options = ["Upload Dataset", "Generate Dataset"]
selected_option = st.radio("Choose your preferred option:", options, index=0)

# βœ… Upload dataset with validation
if selected_option == "Upload Dataset":
df = upload_and_validate()

# βœ… Generate synthetic dataset
elif selected_option == "Generate Dataset":
no_of_sample = st.slider("No. of Samples", 10, 2000, 100)
no_of_feature = st.slider("No. of Features", 2, 20, 2)
Expand All @@ -117,9 +58,6 @@
st.success("βœ… Dataset Generated Successfully!")
st.dataframe(df.head())

# ==============================
# 🏠 Tab 1: Home Page
# ==============================
with tab1:
st.write("Welcome to AlgoLab! πŸ‘‹")
if df is not None:
Expand All @@ -128,23 +66,36 @@
else:
st.info("Upload or generate a dataset to preview here.")

# ==============================
# πŸ€– Tab 2: Supervised Learning
# ==============================
with tab2:
st.write("### Supervised Learning Playground")
if df is not None:
interactive_model_tuning(df)
else:
st.info("Upload or generate a dataset first to start tuning models.")

# ==============================
# 🚧 Tab 3: Unsupervised Learning
# ==============================
with tab3:
st.write("Unsupervised module is under development.")
st.write("### Unsupervised Learning")

if df is not None:
st.write("### Data Preview", df.head())

if 'selected_algo' not in st.session_state:
st.session_state.selected_algo = "KMeans"

st.session_state.selected_algo = st.selectbox(
"Choose Clustering Algorithm",
["KMeans", "DBSCAN"],
index=["KMeans", "DBSCAN"].index(st.session_state.selected_algo)
)

if st.session_state.selected_algo == "KMeans":
run_kmeans(df)
elif st.session_state.selected_algo == "DBSCAN":
run_dbscan(df)
else:
st.info("Please upload or generate a dataset from the sidebar to use Unsupervised Learning algorithms.")


# Footer
st.markdown("""
<style>
.footer {
Expand Down
53 changes: 53 additions & 0 deletions docs/dbscan.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@

# DBSCAN Clustering

## πŸ“Œ What is DBSCAN?

DBSCAN (Density-Based Spatial Clustering of Applications with Noise) is an unsupervised clustering algorithm based on the **density** of points.

It can find clusters of **arbitrary shapes** and identify outliers (noise).

---

## πŸ› οΈ Parameters

| Parameter | Description |
|-----------|-------------|
| `eps` | Maximum distance between two samples to be considered neighbors |
| `min_samples` | Minimum number of neighbors to form a dense region |

---

## 🎯 Output

- **Clustered Scatter Plot**
- **Silhouette Score** (if valid)
- **Clustered Data Preview**

---

## πŸ§‘β€πŸ’» How It Works in Streamlit UI

1. Upload a `.csv` file with 2 numeric columns
2. Select `eps` and `min_samples` via sliders
3. Click **"Run DBSCAN"**
4. Get visual clusters and evaluation score (if possible)

---

## ⚠️ Special Note

If DBSCAN finds only one cluster or marks all points as noise, silhouette score cannot be calculated.

---

## βœ… Example CSV Format

```csv
x,y
1,2
2,3
3,4
8,7
9,6
10,8
45 changes: 45 additions & 0 deletions docs/kmeans.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# KMeans Clustering

## πŸ“Œ What is KMeans?

KMeans is a centroid-based unsupervised clustering algorithm that groups data into **k clusters**. It tries to minimize the distance between data points and their respective cluster centers.

---

## πŸ› οΈ Parameters

| Parameter | Description |
|-----------|-------------|
| `n_clusters` | Number of clusters to form |
| `random_state` | Ensures reproducibility |
| `n_init='auto'` | Stable initialization |

---

## 🎯 Output

- **Clustered Scatter Plot**
- **Silhouette Score** to evaluate cluster separation
- **Preview of Clustered Data**

---

## πŸ§‘β€πŸ’» How It Works in Streamlit UI

1. Upload a `.csv` file with 2 numeric columns (e.g., x, y)
2. Select the number of clusters using a slider
3. Click **"Run KMeans Clustering"**
4. Get visual clusters and evaluation score

---

## βœ… Example CSV Format

```csv
x,y
1,2
2,3
3,4
8,7
9,6
10,8
7 changes: 7 additions & 0 deletions sample_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
x,y
1,2
2,3
3,4
8,7
9,6
10,8
74 changes: 74 additions & 0 deletions unsupervised_algos/dbscan_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import streamlit as st
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
import uuid
import pandas as pd

def run_dbscan(data: pd.DataFrame):
st.subheader("DBSCAN Clustering")

# Session state variables
if 'dbscan_eps' not in st.session_state:
st.session_state.dbscan_eps = 0.5
if 'dbscan_min_samples' not in st.session_state:
st.session_state.dbscan_min_samples = 5
if 'dbscan_score' not in st.session_state:
st.session_state.dbscan_score = None
if 'dbscan_plot_fig' not in st.session_state:
st.session_state.dbscan_plot_fig = None
if 'dbscan_clustered_data' not in st.session_state:
st.session_state.dbscan_clustered_data = None

# UI form
with st.form(key="dbscan_form"):
eps = st.slider("Epsilon (eps)", 0.1, 5.0, st.session_state.dbscan_eps, step=0.1)
min_samples = st.slider("Min Samples", 1, 20, st.session_state.dbscan_min_samples)
submitted = st.form_submit_button("Run DBSCAN")

if submitted:
st.write("βœ… Inside DBSCAN submit")

input_data = data.copy()
input_data = input_data.apply(pd.to_numeric, errors='coerce')
input_data.dropna(inplace=True)

# Drop old cluster column
if "Cluster" in input_data.columns:
input_data.drop("Cluster", axis=1, inplace=True)

model = DBSCAN(eps=eps, min_samples=min_samples)
labels = model.fit_predict(input_data)
input_data["Cluster"] = labels

st.session_state.dbscan_eps = eps
st.session_state.dbscan_min_samples = min_samples
st.session_state.dbscan_clustered_data = input_data.head()

if len(set(labels)) > 1 and -1 not in set(labels):
score = silhouette_score(input_data.iloc[:, :-1], labels)
st.session_state.dbscan_score = score
else:
st.session_state.dbscan_score = "Not enough clusters to compute score"

# Plot
fig, ax = plt.subplots(figsize=(6, 4))
ax.scatter(input_data.iloc[:, 0], input_data.iloc[:, 1], c=labels, cmap='plasma')
ax.set_title("DBSCAN Clustering")
ax.set_xlabel("X")
ax.set_ylabel("Y")
st.session_state.dbscan_plot_fig = fig

# Result display (outside submit)
if st.session_state.dbscan_score is not None:
if isinstance(st.session_state.dbscan_score, str):
st.warning(st.session_state.dbscan_score)
else:
st.write(f"### Silhouette Score: {st.session_state.dbscan_score:.2f}")

if st.session_state.dbscan_plot_fig:
st.pyplot(st.session_state.dbscan_plot_fig)
plt.close(st.session_state.dbscan_plot_fig)

if st.session_state.dbscan_clustered_data is not None:
st.write("### Clustered Data", st.session_state.dbscan_clustered_data)
Loading