admin-analytics-agent/simple_semantic_agent.py at main · ShifanaRajamohamed/admin-analytics-agent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""
Fully Open-Ended Semantic CSV Agent
- Works for ANY question about your CSV data
- No hardcoded intents
"""

import pandas as pd
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from sentence_transformers import SentenceTransformer, util
import torch
import uvicorn

# =============================
# LOAD CSVs
# =============================
trainer_df = pd.read_csv("trainer_details.csv").fillna("")
session_df = pd.read_csv("trainer_session_details.csv").fillna("")

# Combine all text columns into a single searchable string
trainer_df['combined_text'] = trainer_df.apply(lambda row: " | ".join([str(x) for x in row]), axis=1)
session_df['combined_text'] = session_df.apply(lambda row: " | ".join([str(x) for x in row]), axis=1)

# =============================
# LOAD EMBEDDING MODEL
# =============================
model = SentenceTransformer('all-MiniLM-L6-v2')

# Compute embeddings for CSV content
trainer_embeddings = model.encode(trainer_df['combined_text'].tolist(), convert_to_tensor=True)
session_embeddings = model.encode(session_df['combined_text'].tolist(), convert_to_tensor=True)

# =============================
# AGENT FUNCTION
# =============================
def semantic_search(question: str, top_k=5):
    """Return top matching rows from trainers and sessions"""
    q_embedding = model.encode(question, convert_to_tensor=True)

    # Similarity search
    trainer_scores = util.cos_sim(q_embedding, trainer_embeddings)[0]
    session_scores = util.cos_sim(q_embedding, session_embeddings)[0]

    top_trainers_idx = torch.topk(trainer_scores, k=min(top_k, len(trainer_df))).indices
    top_sessions_idx = torch.topk(session_scores, k=min(top_k, len(session_df))).indices

    trainers_result = trainer_df.iloc[top_trainers_idx].to_dict(orient='records')
    sessions_result = session_df.iloc[top_sessions_idx].to_dict(orient='records')

    return {
        "trainers": trainers_result,
        "sessions": sessions_result
    }

# =============================
# FASTAPI
# =============================
app = FastAPI(title="Open-Ended Semantic CSV Agent")

@app.get("/")
def home():
    return JSONResponse(content={"message":"Semantic CSV Agent is running. Use /ask with POST method."})

@app.post("/ask")
def ask(payload: dict):
    question = payload.get("question", "").strip()
    if not question:
        raise HTTPException(status_code=400, detail="No question provided")
    result = semantic_search(question)
    return JSONResponse(content=result)

@app.get("/health")
def health():
    return {"status":"healthy"}

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8012)