27
27
from google .adk .evaluation .eval_result import EvalCaseResult
28
28
from google .adk .evaluation .eval_set import EvalCase
29
29
from google .adk .evaluation .eval_set import EvalSet
30
+ from google .adk .evaluation .eval_set_results_manager import EvalSetResultsManager
30
31
from google .adk .evaluation .eval_sets_manager import EvalSetsManager
31
32
from google .adk .evaluation .evaluator import EvalStatus
32
33
from google .adk .evaluation .evaluator import EvaluationResult
@@ -51,13 +52,21 @@ def dummy_agent():
51
52
52
53
53
54
@pytest .fixture
54
- def eval_service (dummy_agent , mock_eval_sets_manager ):
55
+ def mock_eval_set_results_manager ():
56
+ return mock .create_autospec (EvalSetResultsManager )
57
+
58
+
59
+ @pytest .fixture
60
+ def eval_service (
61
+ dummy_agent , mock_eval_sets_manager , mock_eval_set_results_manager
62
+ ):
55
63
DEFAULT_METRIC_EVALUATOR_REGISTRY .register_evaluator (
56
64
metric_name = "fake_metric" , evaluator = FakeEvaluator
57
65
)
58
66
return LocalEvalService (
59
67
root_agent = dummy_agent ,
60
68
eval_sets_manager = mock_eval_sets_manager ,
69
+ eval_set_results_manager = mock_eval_set_results_manager ,
61
70
)
62
71
63
72
@@ -90,7 +99,9 @@ def evaluate_invocations(
90
99
91
100
@pytest .mark .asyncio
92
101
async def test_perform_inference_success (
93
- eval_service , dummy_agent , mock_eval_sets_manager
102
+ eval_service ,
103
+ dummy_agent ,
104
+ mock_eval_sets_manager ,
94
105
):
95
106
eval_set = EvalSet (
96
107
eval_set_id = "test_eval_set" ,
@@ -127,7 +138,9 @@ async def test_perform_inference_success(
127
138
128
139
@pytest .mark .asyncio
129
140
async def test_perform_inference_with_case_ids (
130
- eval_service , dummy_agent , mock_eval_sets_manager
141
+ eval_service ,
142
+ dummy_agent ,
143
+ mock_eval_sets_manager ,
131
144
):
132
145
eval_set = EvalSet (
133
146
eval_set_id = "test_eval_set" ,
@@ -172,7 +185,8 @@ async def test_perform_inference_with_case_ids(
172
185
173
186
@pytest .mark .asyncio
174
187
async def test_perform_inference_eval_set_not_found (
175
- eval_service , mock_eval_sets_manager
188
+ eval_service ,
189
+ mock_eval_sets_manager ,
176
190
):
177
191
mock_eval_sets_manager .get_eval_set .return_value = None
178
192
@@ -188,7 +202,9 @@ async def test_perform_inference_eval_set_not_found(
188
202
189
203
190
204
@pytest .mark .asyncio
191
- async def test_evaluate_success (eval_service , mock_eval_sets_manager ):
205
+ async def test_evaluate_success (
206
+ eval_service , mock_eval_sets_manager , mock_eval_set_results_manager
207
+ ):
192
208
inference_results = [
193
209
InferenceResult (
194
210
app_name = "test_app" ,
@@ -224,11 +240,13 @@ async def test_evaluate_success(eval_service, mock_eval_sets_manager):
224
240
assert isinstance (results [0 ], EvalCaseResult )
225
241
assert isinstance (results [1 ], EvalCaseResult )
226
242
assert mock_eval_sets_manager .get_eval_case .call_count == 2
243
+ assert mock_eval_set_results_manager .save_eval_set_result .call_count == 2
227
244
228
245
229
246
@pytest .mark .asyncio
230
247
async def test_evaluate_eval_case_not_found (
231
- eval_service , mock_eval_sets_manager
248
+ eval_service ,
249
+ mock_eval_sets_manager ,
232
250
):
233
251
inference_results = [
234
252
InferenceResult (
@@ -256,7 +274,7 @@ async def test_evaluate_eval_case_not_found(
256
274
257
275
@pytest .mark .asyncio
258
276
async def test_evaluate_single_inference_result (
259
- eval_service , mock_eval_sets_manager
277
+ eval_service , mock_eval_sets_manager , mock_eval_set_results_manager
260
278
):
261
279
invocation = Invocation (
262
280
user_content = genai_types .Content (
@@ -289,7 +307,7 @@ async def test_evaluate_single_inference_result(
289
307
mock_eval_case .session_input = None
290
308
mock_eval_sets_manager .get_eval_case .return_value = mock_eval_case
291
309
292
- result = await eval_service ._evaluate_single_inference_result (
310
+ _ , result = await eval_service ._evaluate_single_inference_result (
293
311
inference_result = inference_result , evaluate_config = evaluate_config
294
312
)
295
313
0 commit comments