Skip to content

Commit 3f5c0b1

Browse files
committed
added testcases
1 parent f738f3a commit 3f5c0b1

File tree

1 file changed

+229
-1
lines changed

1 file changed

+229
-1
lines changed

backend/app/tests/crud/evaluations/test_processing.py

Lines changed: 229 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from sqlmodel import Session, select
77

88
from app.crud.evaluations.processing import (
9+
_extract_batch_error_message,
910
check_and_process_evaluation,
1011
parse_evaluation_output,
1112
process_completed_embedding_batch,
@@ -653,11 +654,12 @@ async def test_check_and_process_evaluation_completed(
653654
db, project_id=test_dataset.project_id, use_kaapi_schema=True
654655
)
655656

656-
# Create batch job
657+
# Create batch job with output file (successful completion)
657658
batch_job = BatchJob(
658659
provider="openai",
659660
provider_batch_id="batch_abc",
660661
provider_status="completed",
662+
provider_output_file_id="output-file-123",
661663
job_type=BatchJobType.EVALUATION,
662664
total_items=2,
663665
status="submitted",
@@ -688,6 +690,12 @@ async def test_check_and_process_evaluation_completed(
688690
db.refresh(eval_run)
689691

690692
mock_get_batch.return_value = batch_job
693+
mock_poll.return_value = {
694+
"provider_status": "completed",
695+
"provider_output_file_id": "output-file-123",
696+
"error_file_id": None,
697+
"request_counts": {"total": 2, "completed": 2, "failed": 0},
698+
}
691699
mock_process.return_value = eval_run
692700

693701
mock_openai = MagicMock()
@@ -756,6 +764,111 @@ async def test_check_and_process_evaluation_failed(
756764
db.refresh(eval_run)
757765

758766
mock_get_batch.return_value = batch_job
767+
mock_poll.return_value = {
768+
"provider_status": "failed",
769+
"provider_output_file_id": None,
770+
"error_file_id": None,
771+
"error_message": "Provider error",
772+
"request_counts": {"total": 2, "completed": 0, "failed": 2},
773+
}
774+
775+
mock_openai = MagicMock()
776+
mock_langfuse = MagicMock()
777+
778+
result = await check_and_process_evaluation(
779+
eval_run=eval_run,
780+
session=db,
781+
openai_client=mock_openai,
782+
langfuse=mock_langfuse,
783+
)
784+
785+
assert result["action"] == "failed"
786+
assert result["current_status"] == "failed"
787+
db.refresh(eval_run)
788+
assert eval_run.status == "failed"
789+
790+
@pytest.mark.asyncio
791+
@patch("app.crud.evaluations.processing.get_batch_job")
792+
@patch("app.crud.evaluations.processing.poll_batch_status")
793+
@patch("app.crud.evaluations.processing.OpenAIBatchProvider")
794+
async def test_check_and_process_evaluation_completed_all_requests_failed(
795+
self,
796+
mock_provider_cls,
797+
mock_poll,
798+
mock_get_batch,
799+
db: Session,
800+
test_dataset,
801+
):
802+
"""Test batch completed but all requests failed — both batch_job and eval_run get error_message."""
803+
config = create_test_config(
804+
db, project_id=test_dataset.project_id, use_kaapi_schema=True
805+
)
806+
807+
# Create batch job: completed status but NO provider_output_file_id
808+
batch_job = BatchJob(
809+
provider="openai",
810+
provider_batch_id="batch_all_fail",
811+
provider_status="completed",
812+
job_type=BatchJobType.EVALUATION,
813+
total_items=9,
814+
status="submitted",
815+
organization_id=test_dataset.organization_id,
816+
project_id=test_dataset.project_id,
817+
inserted_at=now(),
818+
updated_at=now(),
819+
)
820+
db.add(batch_job)
821+
db.commit()
822+
db.refresh(batch_job)
823+
824+
eval_run = create_evaluation_run(
825+
session=db,
826+
run_name="test_run_all_fail",
827+
dataset_name=test_dataset.name,
828+
dataset_id=test_dataset.id,
829+
config_id=config.id,
830+
config_version=1,
831+
organization_id=test_dataset.organization_id,
832+
project_id=test_dataset.project_id,
833+
)
834+
eval_run.batch_job_id = batch_job.id
835+
eval_run.status = "processing"
836+
db.add(eval_run)
837+
db.commit()
838+
db.refresh(eval_run)
839+
840+
mock_get_batch.return_value = batch_job
841+
mock_poll.return_value = {
842+
"provider_status": "completed",
843+
"provider_output_file_id": None,
844+
"error_file_id": "error-file-abc",
845+
"request_counts": {"total": 9, "completed": 0, "failed": 9},
846+
}
847+
848+
# Mock the provider instance returned by OpenAIBatchProvider(client=...)
849+
# to return realistic error file content
850+
error_lines = "\n".join(
851+
[
852+
json.dumps(
853+
{
854+
"id": f"batch_req_{i}",
855+
"custom_id": f"id-{i}",
856+
"response": {
857+
"status_code": 400,
858+
"body": {
859+
"error": {
860+
"message": "Unsupported parameter: 'temperature' is not supported with this model.",
861+
}
862+
},
863+
},
864+
"error": None,
865+
}
866+
)
867+
for i in range(9)
868+
]
869+
)
870+
mock_provider_instance = mock_provider_cls.return_value
871+
mock_provider_instance.download_file.return_value = error_lines
759872

760873
mock_openai = MagicMock()
761874
mock_langfuse = MagicMock()
@@ -769,8 +882,123 @@ async def test_check_and_process_evaluation_failed(
769882

770883
assert result["action"] == "failed"
771884
assert result["current_status"] == "failed"
885+
assert "temperature" in result["error"]
886+
assert "(9/9 requests)" in result["error"]
887+
888+
# Verify eval_run updated with error
772889
db.refresh(eval_run)
773890
assert eval_run.status == "failed"
891+
assert "temperature" in eval_run.error_message
892+
893+
# Verify batch_job updated with error
894+
db.refresh(batch_job)
895+
assert "temperature" in batch_job.error_message
896+
assert "(9/9 requests)" in batch_job.error_message
897+
898+
899+
class TestExtractBatchErrorMessage:
900+
"""Test extracting error messages from OpenAI error files."""
901+
902+
def test_single_unique_error(self) -> None:
903+
"""Test error file where all requests have the same error."""
904+
error_lines = []
905+
for i in range(5):
906+
error_lines.append(
907+
json.dumps(
908+
{
909+
"id": f"batch_req_{i}",
910+
"custom_id": f"id-{i}",
911+
"response": {
912+
"status_code": 400,
913+
"body": {
914+
"error": {
915+
"message": "Unsupported parameter: 'temperature' is not supported with this model.",
916+
"type": "invalid_request_error",
917+
}
918+
},
919+
},
920+
"error": None,
921+
}
922+
)
923+
)
924+
error_content = "\n".join(error_lines)
925+
926+
mock_provider = MagicMock()
927+
mock_provider.download_file.return_value = error_content
928+
929+
mock_session = MagicMock()
930+
mock_batch_job = MagicMock()
931+
mock_batch_job.id = 1
932+
933+
result = _extract_batch_error_message(
934+
provider=mock_provider,
935+
error_file_id="error-file-123",
936+
batch_job=mock_batch_job,
937+
session=mock_session,
938+
)
939+
940+
assert "Unsupported parameter" in result
941+
assert "(5/5 requests)" in result
942+
mock_provider.download_file.assert_called_once_with("error-file-123")
943+
944+
def test_multiple_unique_errors_picks_most_common(self) -> None:
945+
"""Test error file with mixed errors; picks the most frequent one."""
946+
error_lines = []
947+
# 3 requests with temperature error
948+
for i in range(3):
949+
error_lines.append(
950+
json.dumps(
951+
{
952+
"id": f"batch_req_{i}",
953+
"custom_id": f"id-{i}",
954+
"response": {
955+
"status_code": 400,
956+
"body": {
957+
"error": {
958+
"message": "Unsupported parameter: 'temperature'",
959+
}
960+
},
961+
},
962+
"error": None,
963+
}
964+
)
965+
)
966+
# 1 request with rate limit error
967+
error_lines.append(
968+
json.dumps(
969+
{
970+
"id": "batch_req_3",
971+
"custom_id": "id-3",
972+
"response": {
973+
"status_code": 429,
974+
"body": {
975+
"error": {
976+
"message": "Rate limit exceeded",
977+
}
978+
},
979+
},
980+
"error": None,
981+
}
982+
)
983+
)
984+
error_content = "\n".join(error_lines)
985+
986+
mock_provider = MagicMock()
987+
mock_provider.download_file.return_value = error_content
988+
989+
mock_session = MagicMock()
990+
mock_batch_job = MagicMock()
991+
mock_batch_job.id = 1
992+
993+
result = _extract_batch_error_message(
994+
provider=mock_provider,
995+
error_file_id="error-file-123",
996+
batch_job=mock_batch_job,
997+
session=mock_session,
998+
)
999+
1000+
assert "Unsupported parameter: 'temperature'" in result
1001+
assert "(3/4 requests)" in result
7741002

7751003

7761004
class TestPollAllPendingEvaluations:

0 commit comments

Comments
 (0)