Skip to content

Commit 7fe7b28

Browse files
authored
Kaapi v1.0: Database Comments (#476)
* added database comments * sticking to pydantic * updating migration * first stab at comments * cleanups * updated migration * cleanup * coderabbit suggestions * cleanups * cleanups * generated new migration with updated format * added comment for config as well * fixed minor comments
1 parent 6b20d12 commit 7fe7b28

21 files changed

+4287
-259
lines changed

backend/app/alembic/versions/040_add_db_comments.py

Lines changed: 3255 additions & 0 deletions
Large diffs are not rendered by default.

backend/app/models/api_key.py

Lines changed: 61 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,35 @@
1-
from uuid import UUID, uuid4
2-
import secrets
3-
import base64
41
from datetime import datetime
5-
from typing import Optional, List
6-
from sqlmodel import SQLModel, Field, Relationship
2+
from uuid import UUID, uuid4
3+
4+
from sqlmodel import Field, SQLModel
75

86
from app.core.util import now
97

108

119
class APIKeyBase(SQLModel):
10+
"""Base model for API keys with foreign key fields."""
11+
12+
# Foreign keys
1213
organization_id: int = Field(
13-
foreign_key="organization.id", nullable=False, ondelete="CASCADE"
14+
foreign_key="organization.id",
15+
nullable=False,
16+
ondelete="CASCADE",
17+
sa_column_kwargs={"comment": "Reference to the organization"},
1418
)
1519
project_id: int = Field(
16-
foreign_key="project.id", nullable=False, ondelete="CASCADE"
20+
foreign_key="project.id",
21+
nullable=False,
22+
ondelete="CASCADE",
23+
sa_column_kwargs={"comment": "Reference to the project"},
24+
)
25+
user_id: int = Field(
26+
foreign_key="user.id",
27+
nullable=False,
28+
ondelete="CASCADE",
29+
sa_column_kwargs={
30+
"comment": "Reference to the user for whom the API key was created"
31+
},
1732
)
18-
user_id: int = Field(foreign_key="user.id", nullable=False, ondelete="CASCADE")
1933

2034

2135
class APIKeyPublic(APIKeyBase):
@@ -32,14 +46,44 @@ class APIKeyCreateResponse(APIKeyPublic):
3246

3347

3448
class APIKey(APIKeyBase, table=True):
35-
id: UUID = Field(default_factory=uuid4, primary_key=True)
49+
"""Database model for API keys."""
3650

51+
id: UUID = Field(
52+
default_factory=uuid4,
53+
primary_key=True,
54+
sa_column_kwargs={"comment": "Unique identifier for the API key"},
55+
)
3756
key_prefix: str = Field(
38-
unique=True, index=True, nullable=False
39-
) # Unique identifier from the key
40-
key_hash: str = Field(nullable=False) # bcrypt hash of the secret portion
41-
42-
inserted_at: datetime = Field(default_factory=now, nullable=False)
43-
updated_at: datetime = Field(default_factory=now, nullable=False)
44-
is_deleted: bool = Field(default=False, nullable=False)
45-
deleted_at: Optional[datetime] = Field(default=None, nullable=True)
57+
unique=True,
58+
index=True,
59+
nullable=False,
60+
sa_column_kwargs={
61+
"comment": "Unique prefix portion of the API key for identification"
62+
},
63+
)
64+
key_hash: str = Field(
65+
nullable=False,
66+
sa_column_kwargs={"comment": "Bcrypt hash of the secret of the API key"},
67+
)
68+
is_deleted: bool = Field(
69+
default=False,
70+
nullable=False,
71+
sa_column_kwargs={"comment": "Soft delete flag"},
72+
)
73+
74+
# Timestamps
75+
inserted_at: datetime = Field(
76+
default_factory=now,
77+
nullable=False,
78+
sa_column_kwargs={"comment": "Timestamp when the API key was created"},
79+
)
80+
updated_at: datetime = Field(
81+
default_factory=now,
82+
nullable=False,
83+
sa_column_kwargs={"comment": "Timestamp when the API key was last updated"},
84+
)
85+
deleted_at: datetime | None = Field(
86+
default=None,
87+
nullable=True,
88+
sa_column_kwargs={"comment": "Timestamp when the API key was deleted"},
89+
)

backend/app/models/assistants.py

Lines changed: 81 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
from datetime import datetime
2-
from typing import List, Optional
32

43
from sqlalchemy import Column, String, Text
54
from sqlalchemy.dialects.postgresql import ARRAY
65
from sqlmodel import Field, Relationship, SQLModel, UniqueConstraint
76

87
from app.core.util import now
8+
from app.models.organization import Organization
9+
from app.models.project import Project
910

1011

1112
class AssistantBase(SQLModel):
@@ -17,7 +18,7 @@ class AssistantBase(SQLModel):
1718
name: str
1819
instructions: str = Field(sa_column=Column(Text, nullable=False))
1920
model: str
20-
vector_store_ids: List[str] = Field(
21+
vector_store_ids: list[str] = Field(
2122
default_factory=list, sa_column=Column(ARRAY(String))
2223
)
2324
temperature: float = 0.1
@@ -31,17 +32,88 @@ class AssistantBase(SQLModel):
3132

3233

3334
class Assistant(AssistantBase, table=True):
35+
"""OpenAI assistant configuration and metadata."""
36+
3437
__tablename__ = "openai_assistant"
3538

36-
id: int = Field(default=None, primary_key=True)
37-
inserted_at: datetime = Field(default_factory=now, nullable=False)
38-
updated_at: datetime = Field(default_factory=now, nullable=False)
39-
is_deleted: bool = Field(default=False, nullable=False)
40-
deleted_at: Optional[datetime] = Field(default=None, nullable=True)
39+
id: int = Field(
40+
default=None,
41+
primary_key=True,
42+
sa_column_kwargs={"comment": "Unique identifier for the assistant"},
43+
)
44+
assistant_id: str = Field(
45+
index=True,
46+
sa_column_kwargs={"comment": "Unique identifier for the assistant at OpenAI"},
47+
)
48+
name: str = Field(
49+
sa_column_kwargs={"comment": "Name of the assistant"},
50+
)
51+
instructions: str = Field(
52+
sa_column=Column(
53+
Text, nullable=False, comment="System instructions for the assistant"
54+
)
55+
)
56+
model: str = Field(
57+
sa_column_kwargs={"comment": "OpenAI model used by the assistant"},
58+
)
59+
vector_store_ids: list[str] = Field(
60+
default_factory=list,
61+
sa_column=Column(
62+
ARRAY(String), comment="List of OpenAI vector store IDs attached"
63+
),
64+
)
65+
temperature: float = Field(
66+
default=0.1,
67+
sa_column_kwargs={
68+
"comment": "Parameter that controls the creativity or randomness of the text generated by model"
69+
},
70+
)
71+
max_num_results: int = Field(
72+
default=20,
73+
sa_column_kwargs={
74+
"comment": "Parameter that controls maximum number of results to return"
75+
},
76+
)
77+
is_deleted: bool = Field(
78+
default=False,
79+
nullable=False,
80+
sa_column_kwargs={"comment": "Soft delete flag"},
81+
)
82+
83+
# Foreign keys
84+
project_id: int = Field(
85+
foreign_key="project.id",
86+
nullable=False,
87+
ondelete="CASCADE",
88+
sa_column_kwargs={"comment": "Reference to the project"},
89+
)
90+
organization_id: int = Field(
91+
foreign_key="organization.id",
92+
nullable=False,
93+
ondelete="CASCADE",
94+
sa_column_kwargs={"comment": "Reference to the organization"},
95+
)
96+
97+
# Timestamps
98+
inserted_at: datetime = Field(
99+
default_factory=now,
100+
nullable=False,
101+
sa_column_kwargs={"comment": "Timestamp when the assistant was created"},
102+
)
103+
updated_at: datetime = Field(
104+
default_factory=now,
105+
nullable=False,
106+
sa_column_kwargs={"comment": "Timestamp when the assistant was last updated"},
107+
)
108+
deleted_at: datetime | None = Field(
109+
default=None,
110+
nullable=True,
111+
sa_column_kwargs={"comment": "Timestamp when the assistant was deleted"},
112+
)
41113

42114
# Relationships
43-
project: "Project" = Relationship(back_populates="assistants")
44-
organization: "Organization" = Relationship(back_populates="assistants")
115+
project: Project = Relationship(back_populates="assistants")
116+
organization: Organization = Relationship(back_populates="assistants")
45117

46118

47119
class AssistantCreate(SQLModel):

backend/app/models/batch_job.py

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,49 +13,63 @@
1313

1414

1515
class BatchJob(SQLModel, table=True):
16-
"""Batch job table for tracking async LLM batch operations."""
16+
"""Database model for BatchJob operations."""
1717

1818
__tablename__ = "batch_job"
1919
__table_args__ = (
2020
Index("idx_batch_job_status_org", "provider_status", "organization_id"),
2121
Index("idx_batch_job_status_project", "provider_status", "project_id"),
2222
)
2323

24-
id: int | None = Field(default=None, primary_key=True)
24+
id: int | None = Field(
25+
default=None,
26+
primary_key=True,
27+
sa_column_kwargs={"comment": "Unique identifier for the batch job"},
28+
)
2529

2630
# Provider and job type
2731
provider: str = Field(
2832
description="LLM provider name (e.g., 'openai', 'anthropic')",
33+
sa_column_kwargs={"comment": "LLM provider name (e.g., openai, anthropic)"},
2934
)
3035
job_type: str = Field(
3136
index=True,
3237
description=(
3338
"Type of batch job (e.g., 'evaluation', 'classification', 'embedding')"
3439
),
40+
sa_column_kwargs={
41+
"comment": "Type of batch job (e.g., evaluation, classification, embedding)"
42+
},
3543
)
3644

3745
# Batch configuration - stores all provider-specific config
3846
config: dict[str, Any] = Field(
3947
default_factory=dict,
40-
sa_column=Column(JSONB, nullable=False),
48+
sa_column=Column(
49+
JSONB,
50+
nullable=False,
51+
comment="Complete batch configuration including model, temperature, instructions, tools, etc.",
52+
),
4153
description=(
42-
"Complete batch configuration including model, temperature, "
43-
"instructions, tools, etc."
54+
"Complete batch configuration including model, temperature, instructions, tools, etc."
4455
),
4556
)
4657

4758
# Provider-specific batch tracking
4859
provider_batch_id: str | None = Field(
4960
default=None,
5061
description="Provider's batch job ID (e.g., OpenAI batch_id)",
62+
sa_column_kwargs={"comment": "Provider's batch job ID (e.g., OpenAI batch_id)"},
5163
)
5264
provider_file_id: str | None = Field(
5365
default=None,
5466
description="Provider's input file ID",
67+
sa_column_kwargs={"comment": "Provider's input file ID"},
5568
)
5669
provider_output_file_id: str | None = Field(
5770
default=None,
5871
description="Provider's output file ID",
72+
sa_column_kwargs={"comment": "Provider's output file ID"},
5973
)
6074

6175
# Provider status tracking
@@ -65,40 +79,56 @@ class BatchJob(SQLModel, table=True):
6579
"Provider-specific status (e.g., OpenAI: validating, in_progress, "
6680
"finalizing, completed, failed, expired, cancelling, cancelled)"
6781
),
82+
sa_column_kwargs={
83+
"comment": "Provider-specific status (e.g., validating, in_progress, completed, failed)"
84+
},
6885
)
6986

7087
# Raw results (before parent-specific processing)
7188
raw_output_url: str | None = Field(
7289
default=None,
7390
description="S3 URL of raw batch output file",
91+
sa_column_kwargs={"comment": "S3 URL of raw batch output file"},
7492
)
7593
total_items: int = Field(
7694
default=0,
7795
description="Total number of items in the batch",
96+
sa_column_kwargs={"comment": "Total number of items in the batch"},
7897
)
7998

8099
# Error handling
81100
error_message: str | None = Field(
82101
default=None,
83-
sa_column=Column(Text, nullable=True),
102+
sa_column=Column(Text, nullable=True, comment="Error message if batch failed"),
84103
description="Error message if batch failed",
85104
)
86105

87106
# Foreign keys
88107
organization_id: int = Field(
89-
foreign_key="organization.id", nullable=False, ondelete="CASCADE", index=True
108+
foreign_key="organization.id",
109+
nullable=False,
110+
ondelete="CASCADE",
111+
index=True,
112+
sa_column_kwargs={"comment": "Reference to the organization"},
90113
)
91114
project_id: int = Field(
92-
foreign_key="project.id", nullable=False, ondelete="CASCADE", index=True
115+
foreign_key="project.id",
116+
nullable=False,
117+
ondelete="CASCADE",
118+
index=True,
119+
sa_column_kwargs={"comment": "Reference to the project"},
93120
)
94121

95122
# Timestamps
96123
inserted_at: datetime = Field(
97-
default_factory=now, description="The timestamp when the batch job was started"
124+
default_factory=now,
125+
description="The timestamp when the batch job was started",
126+
sa_column_kwargs={"comment": "Timestamp when the batch job was started"},
98127
)
99128
updated_at: datetime = Field(
100129
default_factory=now,
101130
description="The timestamp when the batch job was last updated",
131+
sa_column_kwargs={"comment": "Timestamp when the batch job was last updated"},
102132
)
103133

104134
# Relationships

0 commit comments

Comments
 (0)