diff --git a/packages/augmentation/tests/unit/test_eicr_augmenter.py b/packages/augmentation/tests/unit/test_eicr_augmenter.py index 174f767d..140fd070 100644 --- a/packages/augmentation/tests/unit/test_eicr_augmenter.py +++ b/packages/augmentation/tests/unit/test_eicr_augmenter.py @@ -145,7 +145,7 @@ def test_get_old_document_id_preserves_assigning_authority_name_when_present(sel """Tests old document id preserves assigningAuthorityName when present.""" eicr_with_assigning_authority_name = BASIC_ECR.replace( ' assigningAuthorityName="original-document"', - ' assigningAuthorityName="original-document"', + "", ).replace( ' assigningAuthorityName="TEXT_TO_CODE"', "", diff --git a/packages/text-to-code-lambda/src/text_to_code_lambda/lambda_function.py b/packages/text-to-code-lambda/src/text_to_code_lambda/lambda_function.py index 7f7757c7..a8d866cd 100644 --- a/packages/text-to-code-lambda/src/text_to_code_lambda/lambda_function.py +++ b/packages/text-to-code-lambda/src/text_to_code_lambda/lambda_function.py @@ -13,6 +13,10 @@ from opensearchpy import OpenSearch import lambda_handler +from shared_models import Code +from shared_models import NonstandardCodeInstance +from text_to_code.models import Candidate +from text_to_code.models import SchematronErrorDetail from text_to_code.models import query as query_models from text_to_code.services import eicr_processor from text_to_code.services import embedder @@ -140,7 +144,12 @@ def _initialize_ttc_outputs(persistence_id: str) -> tuple[dict, dict]: :return: The TTC output and TTC metadata output dictionaries. """ # TODO: Update the ttc_output to ensure it matches and uses the expected model once ticket #263 is completed - ttc_output: dict = {"persistence_id": "", "eicr_metadata": {}, "schematron_errors": {}} + ttc_output: dict = { + "persistence_id": "", + "eicr_metadata": {}, + "schematron_errors": {}, + "unmatched_schematron_errors": {}, + } ttc_metadata_output: dict = { "persistence_id": "", "eicr_metadata": {}, @@ -212,6 +221,29 @@ def _populate_eicr_metadata( ttc_metadata_output["eicr_metadata"] = eicr_metadata +def _build_nonstandard_code_instance( + schematron_error: SchematronErrorDetail, + new_translation: Code, + selected_candidate: Candidate, +) -> NonstandardCodeInstance: + """Build a NonstandardCodeInstance object for the TTC output. + + :param schematron_error: The Schematron error being processed. + :param new_translation: The new translation retrieved from OpenSearch for the error. + :param selected_candidate: The text candidate that was selected as the most relevant for the error. + :return: A NonstandardCodeInstance object populated with the relevant information. + """ + new_translation_with_text = new_translation.model_copy( + update={"original_text": selected_candidate.value} + ) + return NonstandardCodeInstance( + schematron_error=schematron_error.error_message, + schematron_error_xpath=schematron_error.error_context, + field_type=schematron_error.field, + new_translation=new_translation_with_text, + ) + + def _process_schematron_errors( original_eicr_content: str, schematron_data_fields: list, @@ -234,6 +266,8 @@ def _process_schematron_errors( if data_field not in ttc_output["schematron_errors"]: ttc_output["schematron_errors"][data_field] = [] + if data_field not in ttc_output["unmatched_schematron_errors"]: + ttc_output["unmatched_schematron_errors"][data_field] = [] if data_field not in ttc_metadata_output["schematron_errors"]: ttc_metadata_output["schematron_errors"][data_field] = [] @@ -250,13 +284,19 @@ def _process_schematron_errors( ) error.candidate = selected_candidate - ttc_output["schematron_errors"][data_field].append(error.model_dump()) logger.info( "Embedding the relevant text strings for each error in the eICR for persistence_id" ) if selected_candidate is None: + unmatched_error = error.model_dump() + unmatched_error["reason"] = "No relevant text candidate was selected" + ttc_output["unmatched_schematron_errors"][data_field].append(unmatched_error) + + metadata_error = error.model_dump() + metadata_error["reason"] = "No relevant text candidate was selected" + ttc_metadata_output["schematron_errors"][data_field].append(metadata_error) continue vector_embedding = RETRIEVER.embed(selected_candidate.value) @@ -281,9 +321,34 @@ def _process_schematron_errors( retrieved_loinc_names = [hit.source.description for hit in results_list] ranked_results = RERANKER.rerank(selected_candidate.value, retrieved_loinc_names) + if results_list: + ttc_output["schematron_errors"][data_field].append( + _build_nonstandard_code_instance( + schematron_error=error, + new_translation=Code( + code=results_list[0].source.loinc_code, + code_system="2.16.840.1.113883.6.1", + code_system_name="LOINC", + display_name=results_list[0].source.description, + ), + selected_candidate=selected_candidate, + ).model_dump() + ) + else: + # TODO: Shape of this output could change depending on needs of the Augmentation Lambda + unmatched_error = error.model_dump() + unmatched_error["reason"] = ( + "Selected candidate found, but no OpenSearch code match was returned" + ) + ttc_output["unmatched_schematron_errors"][data_field].append(unmatched_error) + metadata_error = error.model_dump() metadata_error["opensearch_retrieved_scores"] = opensearch_retrieved_scores metadata_error["reranker_processed_results"] = ranked_results + if not results_list: + metadata_error["reason"] = ( + "Selected candidate found, but no OpenSearch code match was returned" + ) ttc_metadata_output["schematron_errors"][data_field].append(metadata_error) diff --git a/packages/text-to-code-lambda/tests/test_lambda_function.py b/packages/text-to-code-lambda/tests/test_lambda_function.py index 39051c6c..cf4a2bd2 100644 --- a/packages/text-to-code-lambda/tests/test_lambda_function.py +++ b/packages/text-to-code-lambda/tests/test_lambda_function.py @@ -44,6 +44,7 @@ def test_handler_success(self, example_sqs_event, mock_aws_setup, mock_opensearc assert ttc_output is not None assert ttc_output["persistence_id"] == mock_aws_setup.persistence_id assert "schematron_errors" in ttc_output + assert "unmatched_schematron_errors" in ttc_output assert "eicr_metadata" in ttc_output assert ( len(ttc_output["schematron_errors"]["Lab Test Name Resulted"]) @@ -52,14 +53,54 @@ def test_handler_success(self, example_sqs_event, mock_aws_setup, mock_opensearc assert ( len(ttc_output["schematron_errors"]["Lab Test Name Ordered"]) == EXPECTED_ORDERED_ERRORS ) + assert ttc_output["unmatched_schematron_errors"]["Lab Test Name Resulted"] == [] + assert ttc_output["unmatched_schematron_errors"]["Lab Test Name Ordered"] == [] assert ( "opensearch_retrieved_scores" not in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0] ) - assert "candidate" in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0] - assert "error_context" in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0] - assert "error_id" in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0] - assert ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["candidate"] is not None + assert ( + "reranker_processed_results" + not in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0] + ) + assert "schematron_error" in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0] + assert ( + "schematron_error_xpath" in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0] + ) + assert "field_type" in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0] + assert "new_translation" in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0] + assert ( + ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["field_type"] + == "Lab Test Name Resulted" + ) + assert ( + ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["new_translation"]["code"] + == "109224-6" + ) + assert ( + ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["new_translation"][ + "code_system" + ] + == "2.16.840.1.113883.6.1" + ) + assert ( + ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["new_translation"][ + "code_system_name" + ] + == "LOINC" + ) + assert ( + ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["new_translation"][ + "display_name" + ] + is not None + ) + assert ( + ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["new_translation"][ + "original_text" + ] + == "weed allergen mix 3" + ) # Assert that the TTC metadata output was saved to S3 with the expected content ttc_metadata_output = json.loads( @@ -247,7 +288,118 @@ def test_handler_continues_when_selected_candidate_is_none( ) assert ttc_output is not None assert ttc_output["persistence_id"] == mock_aws_setup.persistence_id - assert ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["candidate"] is None + assert ttc_output["schematron_errors"]["Lab Test Name Resulted"] == [] + assert ttc_output["schematron_errors"]["Lab Test Name Ordered"] == [] + assert ( + len(ttc_output["unmatched_schematron_errors"]["Lab Test Name Resulted"]) + == EXPECTED_RESULTED_ERRORS + ) + assert ( + len(ttc_output["unmatched_schematron_errors"]["Lab Test Name Ordered"]) + == EXPECTED_ORDERED_ERRORS + ) + assert ( + ttc_output["unmatched_schematron_errors"]["Lab Test Name Resulted"][0]["candidate"] + is None + ) + assert ( + ttc_output["unmatched_schematron_errors"]["Lab Test Name Resulted"][0]["reason"] + == "No relevant text candidate was selected" + ) + + ttc_metadata_output = json.loads( + lambda_handler.get_file_content_from_s3( + bucket_name=S3_BUCKET, + object_key=f"{TTC_METADATA_PREFIX}{mock_aws_setup.persistence_id}", + ) + ) + assert ttc_metadata_output is not None + assert ttc_metadata_output["persistence_id"] == mock_aws_setup.persistence_id + assert ( + len(ttc_metadata_output["schematron_errors"]["Lab Test Name Resulted"]) + == EXPECTED_RESULTED_ERRORS + ) + assert ( + len(ttc_metadata_output["schematron_errors"]["Lab Test Name Ordered"]) + == EXPECTED_ORDERED_ERRORS + ) + assert ( + ttc_metadata_output["schematron_errors"]["Lab Test Name Resulted"][0]["candidate"] + is None + ) + assert ( + ttc_metadata_output["schematron_errors"]["Lab Test Name Resulted"][0]["reason"] + == "No relevant text candidate was selected" + ) + + def test_handler_adds_unmatched_error_when_selected_candidate_has_no_opensearch_hits( + self, example_sqs_event, mock_aws_setup, mock_opensearch, mocker + ): + """Test handler records unmatched errors when a selected candidate has no OpenSearch hits.""" + selected_candidate = { + "value": "weed allergen mix 3", + "confidence": 1.0, + } + + mocker.patch( + "text_to_code.services.evaluator.select_relevant_text", + return_value=type("SelectedCandidate", (), selected_candidate)(), + ) + + empty_opensearch_scores = type( + "OpenSearchScores", + (), + {"hits": type("Hits", (), {"hits": []})()}, + )() + + mocker.patch( + "text_to_code_lambda.lambda_function.lambda_handler.retrieve_opensearch_results", + return_value=empty_opensearch_scores, + ) + + reranker_mock = mocker.patch.object( + lambda_function.RERANKER, + "rerank", + return_value=[], + ) + + resp = lambda_function.handler(example_sqs_event, {}) + + assert resp == { + "statusCode": 200, + "message": "TTC processed successfully!", + "num_success_eicrs": 1, + } + + assert mock_opensearch.search.call_count == 0 + assert reranker_mock.call_count == EXPECTED_RESULTED_ERRORS + EXPECTED_ORDERED_ERRORS + + ttc_output = json.loads( + lambda_handler.get_file_content_from_s3( + bucket_name=S3_BUCKET, + object_key=f"{TTC_OUTPUT_PREFIX}{mock_aws_setup.persistence_id}", + ) + ) + assert ttc_output is not None + assert ttc_output["persistence_id"] == mock_aws_setup.persistence_id + assert ttc_output["schematron_errors"]["Lab Test Name Resulted"] == [] + assert ttc_output["schematron_errors"]["Lab Test Name Ordered"] == [] + assert ( + len(ttc_output["unmatched_schematron_errors"]["Lab Test Name Resulted"]) + == EXPECTED_RESULTED_ERRORS + ) + assert ( + len(ttc_output["unmatched_schematron_errors"]["Lab Test Name Ordered"]) + == EXPECTED_ORDERED_ERRORS + ) + assert ( + ttc_output["unmatched_schematron_errors"]["Lab Test Name Resulted"][0]["candidate"] + is not None + ) + assert ( + ttc_output["unmatched_schematron_errors"]["Lab Test Name Resulted"][0]["reason"] + == "Selected candidate found, but no OpenSearch code match was returned" + ) ttc_metadata_output = json.loads( lambda_handler.get_file_content_from_s3( @@ -257,5 +409,27 @@ def test_handler_continues_when_selected_candidate_is_none( ) assert ttc_metadata_output is not None assert ttc_metadata_output["persistence_id"] == mock_aws_setup.persistence_id - assert ttc_metadata_output["schematron_errors"]["Lab Test Name Resulted"] == [] - assert ttc_metadata_output["schematron_errors"]["Lab Test Name Ordered"] == [] + assert ( + len(ttc_metadata_output["schematron_errors"]["Lab Test Name Resulted"]) + == EXPECTED_RESULTED_ERRORS + ) + assert ( + len(ttc_metadata_output["schematron_errors"]["Lab Test Name Ordered"]) + == EXPECTED_ORDERED_ERRORS + ) + assert ( + ttc_metadata_output["schematron_errors"]["Lab Test Name Resulted"][0][ + "opensearch_retrieved_scores" + ] + is not None + ) + assert ( + ttc_metadata_output["schematron_errors"]["Lab Test Name Resulted"][0][ + "reranker_processed_results" + ] + == [] + ) + assert ( + ttc_metadata_output["schematron_errors"]["Lab Test Name Resulted"][0]["reason"] + == "Selected candidate found, but no OpenSearch code match was returned" + ) diff --git a/packages/text-to-code/src/text_to_code/models/__init__.py b/packages/text-to-code/src/text_to_code/models/__init__.py index d510ec70..19409134 100644 --- a/packages/text-to-code/src/text_to_code/models/__init__.py +++ b/packages/text-to-code/src/text_to_code/models/__init__.py @@ -12,6 +12,7 @@ from .schematron import LabTestNameOrderedSchematronErrors from .schematron import LabTestNameResultedSchematronErrors from .schematron import SchematronConfig +from .schematron import SchematronErrorDetail from .schematron import SchematronErrors __all__ = [ @@ -28,6 +29,7 @@ "LabTestNameResultedSchematronErrors", "LabXPaths", "SchematronConfig", + "SchematronErrorDetail", "SchematronErrors", "VectorSearchParams", ]