-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsimple_semantic_agent.py
More file actions
77 lines (62 loc) · 2.61 KB
/
simple_semantic_agent.py
File metadata and controls
77 lines (62 loc) · 2.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""
Fully Open-Ended Semantic CSV Agent
- Works for ANY question about your CSV data
- No hardcoded intents
"""
import pandas as pd
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from sentence_transformers import SentenceTransformer, util
import torch
import uvicorn
# =============================
# LOAD CSVs
# =============================
trainer_df = pd.read_csv("trainer_details.csv").fillna("")
session_df = pd.read_csv("trainer_session_details.csv").fillna("")
# Combine all text columns into a single searchable string
trainer_df['combined_text'] = trainer_df.apply(lambda row: " | ".join([str(x) for x in row]), axis=1)
session_df['combined_text'] = session_df.apply(lambda row: " | ".join([str(x) for x in row]), axis=1)
# =============================
# LOAD EMBEDDING MODEL
# =============================
model = SentenceTransformer('all-MiniLM-L6-v2')
# Compute embeddings for CSV content
trainer_embeddings = model.encode(trainer_df['combined_text'].tolist(), convert_to_tensor=True)
session_embeddings = model.encode(session_df['combined_text'].tolist(), convert_to_tensor=True)
# =============================
# AGENT FUNCTION
# =============================
def semantic_search(question: str, top_k=5):
"""Return top matching rows from trainers and sessions"""
q_embedding = model.encode(question, convert_to_tensor=True)
# Similarity search
trainer_scores = util.cos_sim(q_embedding, trainer_embeddings)[0]
session_scores = util.cos_sim(q_embedding, session_embeddings)[0]
top_trainers_idx = torch.topk(trainer_scores, k=min(top_k, len(trainer_df))).indices
top_sessions_idx = torch.topk(session_scores, k=min(top_k, len(session_df))).indices
trainers_result = trainer_df.iloc[top_trainers_idx].to_dict(orient='records')
sessions_result = session_df.iloc[top_sessions_idx].to_dict(orient='records')
return {
"trainers": trainers_result,
"sessions": sessions_result
}
# =============================
# FASTAPI
# =============================
app = FastAPI(title="Open-Ended Semantic CSV Agent")
@app.get("/")
def home():
return JSONResponse(content={"message":"Semantic CSV Agent is running. Use /ask with POST method."})
@app.post("/ask")
def ask(payload: dict):
question = payload.get("question", "").strip()
if not question:
raise HTTPException(status_code=400, detail="No question provided")
result = semantic_search(question)
return JSONResponse(content=result)
@app.get("/health")
def health():
return {"status":"healthy"}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8012)