Skip to content

Commit 3a52931

Browse files
feat(api): api update
1 parent 428e500 commit 3a52931

File tree

7 files changed

+1676
-13
lines changed

7 files changed

+1676
-13
lines changed

.stats.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
configured_endpoints: 44
2-
openapi_spec_hash: 67d5aeebff72f48ee4730227ca0b47c2
2+
openapi_spec_hash: 0f1841fad65926e7ddfb22dd7a642b46
33
config_hash: 659f65b6ccf5612986f920f7f9abbcb5

src/codex/types/project_create_params.py

Lines changed: 282 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,22 @@
22

33
from __future__ import annotations
44

5-
from typing import Optional
6-
from typing_extensions import Required, TypedDict
5+
from typing import Dict, Optional
6+
from typing_extensions import Literal, Required, TypedDict
77

8-
__all__ = ["ProjectCreateParams", "Config"]
8+
__all__ = [
9+
"ProjectCreateParams",
10+
"Config",
11+
"ConfigEvalConfig",
12+
"ConfigEvalConfigCustomEvals",
13+
"ConfigEvalConfigCustomEvalsEvals",
14+
"ConfigEvalConfigDefaultEvals",
15+
"ConfigEvalConfigDefaultEvalsContextSufficiency",
16+
"ConfigEvalConfigDefaultEvalsQueryEase",
17+
"ConfigEvalConfigDefaultEvalsResponseGroundedness",
18+
"ConfigEvalConfigDefaultEvalsResponseHelpfulness",
19+
"ConfigEvalConfigDefaultEvalsTrustworthiness",
20+
]
921

1022

1123
class ProjectCreateParams(TypedDict, total=False):
@@ -18,9 +30,276 @@ class ProjectCreateParams(TypedDict, total=False):
1830
description: Optional[str]
1931

2032

33+
class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False):
34+
criteria: Required[str]
35+
"""
36+
The evaluation criteria text that describes what aspect is being evaluated and
37+
how
38+
"""
39+
40+
eval_key: Required[str]
41+
"""
42+
Unique key for eval metric - currently maps to the TrustworthyRAG name property
43+
and eval_scores dictionary key to check against threshold
44+
"""
45+
46+
name: Required[str]
47+
"""Display name/label for the evaluation metric"""
48+
49+
context_identifier: Optional[str]
50+
"""
51+
The exact string used in your evaluation criteria to reference the retrieved
52+
context.
53+
"""
54+
55+
enabled: bool
56+
"""Allows the evaluation to be disabled without removing it"""
57+
58+
priority: Optional[int]
59+
"""
60+
Priority order for evals (lower number = higher priority) to determine primary
61+
eval issue to surface
62+
"""
63+
64+
query_identifier: Optional[str]
65+
"""
66+
The exact string used in your evaluation criteria to reference the user's query.
67+
"""
68+
69+
response_identifier: Optional[str]
70+
"""
71+
The exact string used in your evaluation criteria to reference the RAG/LLM
72+
response.
73+
"""
74+
75+
should_escalate: bool
76+
"""
77+
If true, failing this eval means the response is considered bad and can trigger
78+
escalation to Codex/SME
79+
"""
80+
81+
threshold: float
82+
"""Threshold value that determines if the evaluation fails"""
83+
84+
threshold_direction: Literal["above", "below"]
85+
"""Whether the evaluation fails when score is above or below the threshold"""
86+
87+
88+
class ConfigEvalConfigCustomEvals(TypedDict, total=False):
89+
evals: Dict[str, ConfigEvalConfigCustomEvalsEvals]
90+
91+
92+
class ConfigEvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False):
93+
eval_key: Required[str]
94+
"""
95+
Unique key for eval metric - currently maps to the TrustworthyRAG name property
96+
and eval_scores dictionary key to check against threshold
97+
"""
98+
99+
name: Required[str]
100+
"""Display name/label for the evaluation metric"""
101+
102+
enabled: bool
103+
"""Allows the evaluation to be disabled without removing it"""
104+
105+
priority: Optional[int]
106+
"""
107+
Priority order for evals (lower number = higher priority) to determine primary
108+
eval issue to surface
109+
"""
110+
111+
should_escalate: bool
112+
"""
113+
If true, failing this eval means the response is considered bad and can trigger
114+
escalation to Codex/SME
115+
"""
116+
117+
threshold: float
118+
"""Threshold value that determines if the evaluation fails"""
119+
120+
threshold_direction: Literal["above", "below"]
121+
"""Whether the evaluation fails when score is above or below the threshold"""
122+
123+
124+
class ConfigEvalConfigDefaultEvalsQueryEase(TypedDict, total=False):
125+
eval_key: Required[str]
126+
"""
127+
Unique key for eval metric - currently maps to the TrustworthyRAG name property
128+
and eval_scores dictionary key to check against threshold
129+
"""
130+
131+
name: Required[str]
132+
"""Display name/label for the evaluation metric"""
133+
134+
enabled: bool
135+
"""Allows the evaluation to be disabled without removing it"""
136+
137+
priority: Optional[int]
138+
"""
139+
Priority order for evals (lower number = higher priority) to determine primary
140+
eval issue to surface
141+
"""
142+
143+
should_escalate: bool
144+
"""
145+
If true, failing this eval means the response is considered bad and can trigger
146+
escalation to Codex/SME
147+
"""
148+
149+
threshold: float
150+
"""Threshold value that determines if the evaluation fails"""
151+
152+
threshold_direction: Literal["above", "below"]
153+
"""Whether the evaluation fails when score is above or below the threshold"""
154+
155+
156+
class ConfigEvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False):
157+
eval_key: Required[str]
158+
"""
159+
Unique key for eval metric - currently maps to the TrustworthyRAG name property
160+
and eval_scores dictionary key to check against threshold
161+
"""
162+
163+
name: Required[str]
164+
"""Display name/label for the evaluation metric"""
165+
166+
enabled: bool
167+
"""Allows the evaluation to be disabled without removing it"""
168+
169+
priority: Optional[int]
170+
"""
171+
Priority order for evals (lower number = higher priority) to determine primary
172+
eval issue to surface
173+
"""
174+
175+
should_escalate: bool
176+
"""
177+
If true, failing this eval means the response is considered bad and can trigger
178+
escalation to Codex/SME
179+
"""
180+
181+
threshold: float
182+
"""Threshold value that determines if the evaluation fails"""
183+
184+
threshold_direction: Literal["above", "below"]
185+
"""Whether the evaluation fails when score is above or below the threshold"""
186+
187+
188+
class ConfigEvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False):
189+
eval_key: Required[str]
190+
"""
191+
Unique key for eval metric - currently maps to the TrustworthyRAG name property
192+
and eval_scores dictionary key to check against threshold
193+
"""
194+
195+
name: Required[str]
196+
"""Display name/label for the evaluation metric"""
197+
198+
enabled: bool
199+
"""Allows the evaluation to be disabled without removing it"""
200+
201+
priority: Optional[int]
202+
"""
203+
Priority order for evals (lower number = higher priority) to determine primary
204+
eval issue to surface
205+
"""
206+
207+
should_escalate: bool
208+
"""
209+
If true, failing this eval means the response is considered bad and can trigger
210+
escalation to Codex/SME
211+
"""
212+
213+
threshold: float
214+
"""Threshold value that determines if the evaluation fails"""
215+
216+
threshold_direction: Literal["above", "below"]
217+
"""Whether the evaluation fails when score is above or below the threshold"""
218+
219+
220+
class ConfigEvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False):
221+
eval_key: Required[str]
222+
"""
223+
Unique key for eval metric - currently maps to the TrustworthyRAG name property
224+
and eval_scores dictionary key to check against threshold
225+
"""
226+
227+
name: Required[str]
228+
"""Display name/label for the evaluation metric"""
229+
230+
enabled: bool
231+
"""Allows the evaluation to be disabled without removing it"""
232+
233+
priority: Optional[int]
234+
"""
235+
Priority order for evals (lower number = higher priority) to determine primary
236+
eval issue to surface
237+
"""
238+
239+
should_escalate: bool
240+
"""
241+
If true, failing this eval means the response is considered bad and can trigger
242+
escalation to Codex/SME
243+
"""
244+
245+
threshold: float
246+
"""Threshold value that determines if the evaluation fails"""
247+
248+
threshold_direction: Literal["above", "below"]
249+
"""Whether the evaluation fails when score is above or below the threshold"""
250+
251+
252+
class ConfigEvalConfigDefaultEvals(TypedDict, total=False):
253+
context_sufficiency: ConfigEvalConfigDefaultEvalsContextSufficiency
254+
"""A pre-configured evaluation metric from TrustworthyRAG or built into the system.
255+
256+
The evaluation criteria and identifiers are immutable and system-managed, while
257+
other properties like thresholds and priorities can be configured.
258+
"""
259+
260+
query_ease: ConfigEvalConfigDefaultEvalsQueryEase
261+
"""A pre-configured evaluation metric from TrustworthyRAG or built into the system.
262+
263+
The evaluation criteria and identifiers are immutable and system-managed, while
264+
other properties like thresholds and priorities can be configured.
265+
"""
266+
267+
response_groundedness: ConfigEvalConfigDefaultEvalsResponseGroundedness
268+
"""A pre-configured evaluation metric from TrustworthyRAG or built into the system.
269+
270+
The evaluation criteria and identifiers are immutable and system-managed, while
271+
other properties like thresholds and priorities can be configured.
272+
"""
273+
274+
response_helpfulness: ConfigEvalConfigDefaultEvalsResponseHelpfulness
275+
"""A pre-configured evaluation metric from TrustworthyRAG or built into the system.
276+
277+
The evaluation criteria and identifiers are immutable and system-managed, while
278+
other properties like thresholds and priorities can be configured.
279+
"""
280+
281+
trustworthiness: ConfigEvalConfigDefaultEvalsTrustworthiness
282+
"""A pre-configured evaluation metric from TrustworthyRAG or built into the system.
283+
284+
The evaluation criteria and identifiers are immutable and system-managed, while
285+
other properties like thresholds and priorities can be configured.
286+
"""
287+
288+
289+
class ConfigEvalConfig(TypedDict, total=False):
290+
custom_evals: ConfigEvalConfigCustomEvals
291+
"""Configuration for custom evaluation metrics."""
292+
293+
default_evals: ConfigEvalConfigDefaultEvals
294+
"""Configuration for default evaluation metrics."""
295+
296+
21297
class Config(TypedDict, total=False):
22298
clustering_use_llm_matching: bool
23299

300+
eval_config: ConfigEvalConfig
301+
"""Configuration for project-specific evaluation metrics"""
302+
24303
llm_matching_model: str
25304

26305
llm_matching_quality_preset: str

0 commit comments

Comments
 (0)