Skip to content

Commit cf6af85

Browse files
authored
Merge branch 'main' into bug/inconsistent_db
2 parents da9d027 + b404bee commit cf6af85

File tree

15 files changed

+483
-25
lines changed

15 files changed

+483
-25
lines changed
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
"""Change vector_store_id to vector_store_ids in openai_assistant table
2+
3+
Revision ID: f2589428c1d0
4+
Revises: 3389c67fdcb4
5+
Create Date: 2025-07-10 11:18:21.223114
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
from sqlalchemy.dialects import postgresql
11+
12+
# revision identifiers, used by Alembic.
13+
revision = "f2589428c1d0"
14+
down_revision = "3389c67fdcb4"
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
op.add_column(
22+
"openai_assistant",
23+
sa.Column("vector_store_ids", postgresql.ARRAY(sa.String()), nullable=True),
24+
)
25+
26+
op.execute(
27+
"""
28+
UPDATE openai_assistant
29+
SET vector_store_ids = ARRAY[vector_store_id]
30+
WHERE vector_store_id IS NOT NULL
31+
"""
32+
)
33+
34+
op.drop_column("openai_assistant", "vector_store_id")
35+
# ### end Alembic commands ###
36+
37+
38+
def downgrade():
39+
# Add back the single vector_store_id column as nullable for safe data migration
40+
op.add_column(
41+
"openai_assistant",
42+
sa.Column(
43+
"vector_store_id",
44+
sa.VARCHAR(length=255),
45+
autoincrement=False,
46+
nullable=True, # Allow nulls temporarily for safe migration
47+
),
48+
)
49+
50+
op.execute(
51+
"""
52+
UPDATE openai_assistant
53+
SET vector_store_id = vector_store_ids[1]
54+
WHERE vector_store_ids IS NOT NULL AND array_length(vector_store_ids, 1) > 0
55+
"""
56+
)
57+
58+
op.drop_column("openai_assistant", "vector_store_ids")

backend/app/api/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from app.api.routes import (
44
api_keys,
5+
assistants,
56
collections,
67
documents,
78
login,
@@ -20,6 +21,7 @@
2021

2122
api_router = APIRouter()
2223
api_router.include_router(api_keys.router)
24+
api_router.include_router(assistants.router)
2325
api_router.include_router(collections.router)
2426
api_router.include_router(credentials.router)
2527
api_router.include_router(documents.router)
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from typing import Annotated
2+
3+
from fastapi import APIRouter, Depends, Path
4+
from sqlmodel import Session
5+
6+
from app.api.deps import get_db, get_current_user_org_project
7+
from app.crud import (
8+
fetch_assistant_from_openai,
9+
sync_assistant,
10+
)
11+
from app.models import UserProjectOrg
12+
from app.utils import APIResponse, get_openai_client
13+
14+
router = APIRouter(prefix="/assistant", tags=["Assistants"])
15+
16+
17+
@router.post(
18+
"/{assistant_id}/ingest",
19+
response_model=APIResponse,
20+
status_code=201,
21+
)
22+
def ingest_assistant_route(
23+
assistant_id: Annotated[str, Path(description="The ID of the assistant to ingest")],
24+
session: Session = Depends(get_db),
25+
current_user: UserProjectOrg = Depends(get_current_user_org_project),
26+
):
27+
"""
28+
Ingest an assistant from OpenAI and store it in the platform.
29+
"""
30+
31+
client = get_openai_client(
32+
session, current_user.organization_id, current_user.project_id
33+
)
34+
35+
openai_assistant = fetch_assistant_from_openai(assistant_id, client)
36+
assistant = sync_assistant(
37+
session=session,
38+
organization_id=current_user.organization_id,
39+
project_id=current_user.project_id,
40+
openai_assistant=openai_assistant,
41+
)
42+
43+
return APIResponse.success_response(assistant)

backend/app/api/routes/responses.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,11 @@ def process_response(
125125
"input": [{"role": "user", "content": request.question}],
126126
}
127127

128-
if assistant.vector_store_id:
128+
if assistant.vector_store_ids:
129129
params["tools"] = [
130130
{
131131
"type": "file_search",
132-
"vector_store_ids": [assistant.vector_store_id],
132+
"vector_store_ids": assistant.vector_store_ids,
133133
"max_num_results": assistant.max_num_results,
134134
}
135135
]

backend/app/crud/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
update_user,
66
)
77
from .collection import CollectionCrud
8+
89
from .document import DocumentCrud
910
from .document_collection import DocumentCollectionCrud
1011

@@ -43,4 +44,8 @@
4344

4445
from .thread_results import upsert_thread_result, get_thread_result
4546

46-
from .assistants import get_assistant_by_id
47+
from .assistants import (
48+
get_assistant_by_id,
49+
fetch_assistant_from_openai,
50+
sync_assistant,
51+
)

backend/app/crud/assistants.py

Lines changed: 98 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,17 @@
1-
from typing import Optional, List, Tuple
2-
from sqlmodel import Session, select, and_
1+
import logging
2+
3+
from typing import Optional
4+
5+
import openai
6+
from fastapi import HTTPException
7+
from openai import OpenAI
8+
from openai.types.beta import Assistant as OpenAIAssistant
9+
from sqlmodel import Session, and_, select
310

4-
from app.core.util import now
511
from app.models import Assistant
12+
from app.utils import mask_string
13+
14+
logger = logging.getLogger(__name__)
615

716

817
def get_assistant_by_id(
@@ -16,3 +25,89 @@ def get_assistant_by_id(
1625
)
1726
)
1827
return session.exec(statement).first()
28+
29+
30+
def fetch_assistant_from_openai(assistant_id: str, client: OpenAI) -> OpenAIAssistant:
31+
"""
32+
Fetch an assistant from OpenAI.
33+
Returns OpenAI Assistant model.
34+
"""
35+
36+
try:
37+
assistant = client.beta.assistants.retrieve(assistant_id=assistant_id)
38+
return assistant
39+
except openai.NotFoundError as e:
40+
logger.error(
41+
f"[fetch_assistant_from_openai] Assistant not found: {mask_string(assistant_id)} | {e}"
42+
)
43+
raise HTTPException(status_code=404, detail="Assistant not found in OpenAI.")
44+
except openai.OpenAIError as e:
45+
logger.error(
46+
f"[fetch_assistant_from_openai] OpenAI API error while retrieving assistant {mask_string(assistant_id)}: {e}"
47+
)
48+
raise HTTPException(status_code=502, detail=f"OpenAI API error: {e}")
49+
50+
51+
def sync_assistant(
52+
session: Session,
53+
organization_id: int,
54+
project_id: int,
55+
openai_assistant: OpenAIAssistant,
56+
) -> Assistant:
57+
"""
58+
Insert an assistant into the database by converting OpenAI Assistant to local Assistant model.
59+
"""
60+
assistant_id = openai_assistant.id
61+
62+
existing_assistant = get_assistant_by_id(session, assistant_id, organization_id)
63+
if existing_assistant:
64+
logger.info(
65+
f"[sync_assistant] Assistant with ID {mask_string(assistant_id)} already exists in the database."
66+
)
67+
raise HTTPException(
68+
status_code=409,
69+
detail=f"Assistant with ID {assistant_id} already exists.",
70+
)
71+
72+
if not openai_assistant.instructions:
73+
raise HTTPException(
74+
status_code=400,
75+
detail="Assistant has no instruction.",
76+
)
77+
78+
vector_store_ids = []
79+
if openai_assistant.tool_resources and hasattr(
80+
openai_assistant.tool_resources, "file_search"
81+
):
82+
file_search = openai_assistant.tool_resources.file_search
83+
if file_search and hasattr(file_search, "vector_store_ids"):
84+
vector_store_ids = file_search.vector_store_ids or []
85+
86+
max_num_results = 20
87+
for tool in openai_assistant.tools or []:
88+
if tool.type == "file_search":
89+
file_search = getattr(tool, "file_search", None)
90+
if file_search and hasattr(file_search, "max_num_results"):
91+
max_num_results = file_search.max_num_results
92+
break
93+
94+
db_assistant = Assistant(
95+
assistant_id=openai_assistant.id,
96+
name=openai_assistant.name or openai_assistant.id,
97+
instructions=openai_assistant.instructions,
98+
model=openai_assistant.model,
99+
vector_store_ids=vector_store_ids,
100+
temperature=openai_assistant.temperature or 0.1,
101+
max_num_results=max_num_results,
102+
project_id=project_id,
103+
organization_id=organization_id,
104+
)
105+
106+
session.add(db_assistant)
107+
session.commit()
108+
session.refresh(db_assistant)
109+
110+
logger.info(
111+
f"[sync_assistant] Successfully ingested assistant with ID {mask_string(assistant_id)}."
112+
)
113+
return db_assistant

backend/app/models/assistants.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from datetime import datetime
22
from typing import Optional, List
33
from sqlmodel import Field, Relationship, SQLModel
4-
from sqlalchemy import Text, Column
4+
from sqlalchemy import Column, String
5+
from sqlalchemy.dialects.postgresql import ARRAY
56

67
from app.core.util import now
78

@@ -11,7 +12,9 @@ class AssistantBase(SQLModel):
1112
name: str
1213
instructions: str = Field(sa_column=Column(Text, nullable=False))
1314
model: str
14-
vector_store_id: str
15+
vector_store_ids: List[str] = Field(
16+
default_factory=list, sa_column=Column(ARRAY(String))
17+
)
1518
temperature: float = 0.1
1619
max_num_results: int = 20
1720
project_id: int = Field(

backend/app/seed_data/seed_data.json

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,23 +57,42 @@
5757
{
5858
"is_active": true,
5959
"provider": "openai",
60-
"credential": "{\"openai\": {\"api_key\": \"sk-proj-YxK21qI3i5SCxN\"}}",
60+
"credential": "{\"api_key\": \"sk-proj-GlificI3i5SCxN\"}",
6161
"project_name": "Glific",
6262
"organization_name": "Project Tech4dev",
6363
"deleted_at": null
64+
},
65+
{
66+
"is_active": true,
67+
"provider": "openai",
68+
"credential": "{\"api_key\": \"sk-proj-DalgoI3i5SCxN\"}",
69+
"project_name": "Dalgo",
70+
"organization_name": "Project Tech4dev",
71+
"deleted_at": null
6472
}
6573
],
6674
"assistants": [
6775
{
68-
"assistant_id": "assistant_123",
69-
"name": "Test Assistant",
76+
"assistant_id": "assistant_glific",
77+
"name": "Test Assistant Glific",
7078
"instructions": "Test instructions",
7179
"model": "gpt-4o",
72-
"vector_store_id": "vs_123",
80+
"vector_store_ids": ["vs_glific"],
7381
"temperature": 0.1,
7482
"max_num_results": 20,
7583
"project_name": "Glific",
7684
"organization_name": "Project Tech4dev"
85+
},
86+
{
87+
"assistant_id": "assistant_dalgo",
88+
"name": "Test Assistant Dalgo",
89+
"instructions": "Test instructions",
90+
"model": "gpt-4o",
91+
"vector_store_ids": ["vs_dalgo"],
92+
"temperature": 0.1,
93+
"max_num_results": 20,
94+
"project_name": "Dalgo",
95+
"organization_name": "Project Tech4dev"
7796
}
7897
]
7998
}

backend/app/seed_data/seed_data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class AssistantData(BaseModel):
5757
name: str
5858
instructions: str
5959
model: str
60-
vector_store_id: str
60+
vector_store_ids: list[str]
6161
temperature: float
6262
max_num_results: int
6363
project_name: str
@@ -261,7 +261,7 @@ def create_assistant(session: Session, assistant_data_raw: dict) -> Assistant:
261261
name=assistant_data.name,
262262
instructions=assistant_data.instructions,
263263
model=assistant_data.model,
264-
vector_store_id=assistant_data.vector_store_id,
264+
vector_store_ids=assistant_data.vector_store_ids,
265265
temperature=assistant_data.temperature,
266266
max_num_results=assistant_data.max_num_results,
267267
organization_id=organization.id,
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import pytest
2+
from fastapi.testclient import TestClient
3+
from unittest.mock import patch
4+
from app.tests.utils.openai import mock_openai_assistant
5+
6+
7+
@pytest.fixture
8+
def normal_user_api_key_header():
9+
return {"X-API-KEY": "ApiKey Px8y47B6roJHin1lWLkR88eiDrFdXSJRZmFQazzai8j9"}
10+
11+
12+
@patch("app.api.routes.assistants.fetch_assistant_from_openai")
13+
def test_ingest_assistant_success(
14+
mock_fetch_assistant,
15+
client: TestClient,
16+
normal_user_api_key_header: str,
17+
):
18+
"""Test successful assistant ingestion from OpenAI."""
19+
mock_assistant = mock_openai_assistant()
20+
21+
mock_fetch_assistant.return_value = mock_assistant
22+
23+
response = client.post(
24+
f"/api/v1/assistant/{mock_assistant.id}/ingest",
25+
headers=normal_user_api_key_header,
26+
)
27+
28+
assert response.status_code == 201
29+
response_json = response.json()
30+
assert response_json["success"] is True
31+
assert response_json["data"]["assistant_id"] == mock_assistant.id

0 commit comments

Comments
 (0)