2
2
3
3
from __future__ import annotations
4
4
5
- from typing import Optional
6
- from typing_extensions import Required , TypedDict
5
+ from typing import Dict , Optional
6
+ from typing_extensions import Literal , Required , TypedDict
7
7
8
- __all__ = ["ProjectCreateParams" , "Config" ]
8
+ __all__ = [
9
+ "ProjectCreateParams" ,
10
+ "Config" ,
11
+ "ConfigEvalConfig" ,
12
+ "ConfigEvalConfigCustomEvals" ,
13
+ "ConfigEvalConfigCustomEvalsEvals" ,
14
+ "ConfigEvalConfigDefaultEvals" ,
15
+ "ConfigEvalConfigDefaultEvalsContextSufficiency" ,
16
+ "ConfigEvalConfigDefaultEvalsQueryEase" ,
17
+ "ConfigEvalConfigDefaultEvalsResponseGroundedness" ,
18
+ "ConfigEvalConfigDefaultEvalsResponseHelpfulness" ,
19
+ "ConfigEvalConfigDefaultEvalsTrustworthiness" ,
20
+ ]
9
21
10
22
11
23
class ProjectCreateParams (TypedDict , total = False ):
@@ -18,9 +30,276 @@ class ProjectCreateParams(TypedDict, total=False):
18
30
description : Optional [str ]
19
31
20
32
33
+ class ConfigEvalConfigCustomEvalsEvals (TypedDict , total = False ):
34
+ criteria : Required [str ]
35
+ """
36
+ The evaluation criteria text that describes what aspect is being evaluated and
37
+ how
38
+ """
39
+
40
+ eval_key : Required [str ]
41
+ """
42
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
43
+ and eval_scores dictionary key to check against threshold
44
+ """
45
+
46
+ name : Required [str ]
47
+ """Display name/label for the evaluation metric"""
48
+
49
+ context_identifier : Optional [str ]
50
+ """
51
+ The exact string used in your evaluation criteria to reference the retrieved
52
+ context.
53
+ """
54
+
55
+ enabled : bool
56
+ """Allows the evaluation to be disabled without removing it"""
57
+
58
+ priority : Optional [int ]
59
+ """
60
+ Priority order for evals (lower number = higher priority) to determine primary
61
+ eval issue to surface
62
+ """
63
+
64
+ query_identifier : Optional [str ]
65
+ """
66
+ The exact string used in your evaluation criteria to reference the user's query.
67
+ """
68
+
69
+ response_identifier : Optional [str ]
70
+ """
71
+ The exact string used in your evaluation criteria to reference the RAG/LLM
72
+ response.
73
+ """
74
+
75
+ should_escalate : bool
76
+ """
77
+ If true, failing this eval means the response is considered bad and can trigger
78
+ escalation to Codex/SME
79
+ """
80
+
81
+ threshold : float
82
+ """Threshold value that determines if the evaluation fails"""
83
+
84
+ threshold_direction : Literal ["above" , "below" ]
85
+ """Whether the evaluation fails when score is above or below the threshold"""
86
+
87
+
88
+ class ConfigEvalConfigCustomEvals (TypedDict , total = False ):
89
+ evals : Dict [str , ConfigEvalConfigCustomEvalsEvals ]
90
+
91
+
92
+ class ConfigEvalConfigDefaultEvalsContextSufficiency (TypedDict , total = False ):
93
+ eval_key : Required [str ]
94
+ """
95
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
96
+ and eval_scores dictionary key to check against threshold
97
+ """
98
+
99
+ name : Required [str ]
100
+ """Display name/label for the evaluation metric"""
101
+
102
+ enabled : bool
103
+ """Allows the evaluation to be disabled without removing it"""
104
+
105
+ priority : Optional [int ]
106
+ """
107
+ Priority order for evals (lower number = higher priority) to determine primary
108
+ eval issue to surface
109
+ """
110
+
111
+ should_escalate : bool
112
+ """
113
+ If true, failing this eval means the response is considered bad and can trigger
114
+ escalation to Codex/SME
115
+ """
116
+
117
+ threshold : float
118
+ """Threshold value that determines if the evaluation fails"""
119
+
120
+ threshold_direction : Literal ["above" , "below" ]
121
+ """Whether the evaluation fails when score is above or below the threshold"""
122
+
123
+
124
+ class ConfigEvalConfigDefaultEvalsQueryEase (TypedDict , total = False ):
125
+ eval_key : Required [str ]
126
+ """
127
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
128
+ and eval_scores dictionary key to check against threshold
129
+ """
130
+
131
+ name : Required [str ]
132
+ """Display name/label for the evaluation metric"""
133
+
134
+ enabled : bool
135
+ """Allows the evaluation to be disabled without removing it"""
136
+
137
+ priority : Optional [int ]
138
+ """
139
+ Priority order for evals (lower number = higher priority) to determine primary
140
+ eval issue to surface
141
+ """
142
+
143
+ should_escalate : bool
144
+ """
145
+ If true, failing this eval means the response is considered bad and can trigger
146
+ escalation to Codex/SME
147
+ """
148
+
149
+ threshold : float
150
+ """Threshold value that determines if the evaluation fails"""
151
+
152
+ threshold_direction : Literal ["above" , "below" ]
153
+ """Whether the evaluation fails when score is above or below the threshold"""
154
+
155
+
156
+ class ConfigEvalConfigDefaultEvalsResponseGroundedness (TypedDict , total = False ):
157
+ eval_key : Required [str ]
158
+ """
159
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
160
+ and eval_scores dictionary key to check against threshold
161
+ """
162
+
163
+ name : Required [str ]
164
+ """Display name/label for the evaluation metric"""
165
+
166
+ enabled : bool
167
+ """Allows the evaluation to be disabled without removing it"""
168
+
169
+ priority : Optional [int ]
170
+ """
171
+ Priority order for evals (lower number = higher priority) to determine primary
172
+ eval issue to surface
173
+ """
174
+
175
+ should_escalate : bool
176
+ """
177
+ If true, failing this eval means the response is considered bad and can trigger
178
+ escalation to Codex/SME
179
+ """
180
+
181
+ threshold : float
182
+ """Threshold value that determines if the evaluation fails"""
183
+
184
+ threshold_direction : Literal ["above" , "below" ]
185
+ """Whether the evaluation fails when score is above or below the threshold"""
186
+
187
+
188
+ class ConfigEvalConfigDefaultEvalsResponseHelpfulness (TypedDict , total = False ):
189
+ eval_key : Required [str ]
190
+ """
191
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
192
+ and eval_scores dictionary key to check against threshold
193
+ """
194
+
195
+ name : Required [str ]
196
+ """Display name/label for the evaluation metric"""
197
+
198
+ enabled : bool
199
+ """Allows the evaluation to be disabled without removing it"""
200
+
201
+ priority : Optional [int ]
202
+ """
203
+ Priority order for evals (lower number = higher priority) to determine primary
204
+ eval issue to surface
205
+ """
206
+
207
+ should_escalate : bool
208
+ """
209
+ If true, failing this eval means the response is considered bad and can trigger
210
+ escalation to Codex/SME
211
+ """
212
+
213
+ threshold : float
214
+ """Threshold value that determines if the evaluation fails"""
215
+
216
+ threshold_direction : Literal ["above" , "below" ]
217
+ """Whether the evaluation fails when score is above or below the threshold"""
218
+
219
+
220
+ class ConfigEvalConfigDefaultEvalsTrustworthiness (TypedDict , total = False ):
221
+ eval_key : Required [str ]
222
+ """
223
+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
224
+ and eval_scores dictionary key to check against threshold
225
+ """
226
+
227
+ name : Required [str ]
228
+ """Display name/label for the evaluation metric"""
229
+
230
+ enabled : bool
231
+ """Allows the evaluation to be disabled without removing it"""
232
+
233
+ priority : Optional [int ]
234
+ """
235
+ Priority order for evals (lower number = higher priority) to determine primary
236
+ eval issue to surface
237
+ """
238
+
239
+ should_escalate : bool
240
+ """
241
+ If true, failing this eval means the response is considered bad and can trigger
242
+ escalation to Codex/SME
243
+ """
244
+
245
+ threshold : float
246
+ """Threshold value that determines if the evaluation fails"""
247
+
248
+ threshold_direction : Literal ["above" , "below" ]
249
+ """Whether the evaluation fails when score is above or below the threshold"""
250
+
251
+
252
+ class ConfigEvalConfigDefaultEvals (TypedDict , total = False ):
253
+ context_sufficiency : ConfigEvalConfigDefaultEvalsContextSufficiency
254
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
255
+
256
+ The evaluation criteria and identifiers are immutable and system-managed, while
257
+ other properties like thresholds and priorities can be configured.
258
+ """
259
+
260
+ query_ease : ConfigEvalConfigDefaultEvalsQueryEase
261
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
262
+
263
+ The evaluation criteria and identifiers are immutable and system-managed, while
264
+ other properties like thresholds and priorities can be configured.
265
+ """
266
+
267
+ response_groundedness : ConfigEvalConfigDefaultEvalsResponseGroundedness
268
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
269
+
270
+ The evaluation criteria and identifiers are immutable and system-managed, while
271
+ other properties like thresholds and priorities can be configured.
272
+ """
273
+
274
+ response_helpfulness : ConfigEvalConfigDefaultEvalsResponseHelpfulness
275
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
276
+
277
+ The evaluation criteria and identifiers are immutable and system-managed, while
278
+ other properties like thresholds and priorities can be configured.
279
+ """
280
+
281
+ trustworthiness : ConfigEvalConfigDefaultEvalsTrustworthiness
282
+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
283
+
284
+ The evaluation criteria and identifiers are immutable and system-managed, while
285
+ other properties like thresholds and priorities can be configured.
286
+ """
287
+
288
+
289
+ class ConfigEvalConfig (TypedDict , total = False ):
290
+ custom_evals : ConfigEvalConfigCustomEvals
291
+ """Configuration for custom evaluation metrics."""
292
+
293
+ default_evals : ConfigEvalConfigDefaultEvals
294
+ """Configuration for default evaluation metrics."""
295
+
296
+
21
297
class Config (TypedDict , total = False ):
22
298
clustering_use_llm_matching : bool
23
299
300
+ eval_config : ConfigEvalConfig
301
+ """Configuration for project-specific evaluation metrics"""
302
+
24
303
llm_matching_model : str
25
304
26
305
llm_matching_quality_preset : str
0 commit comments