Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
from opensearchpy import OpenSearch

import lambda_handler
from shared_models import Code
from shared_models import NonstandardCodeInstance
from text_to_code.models import Candidate
from text_to_code.models import SchematronErrorDetail
from text_to_code.models import query as query_models
from text_to_code.services import eicr_processor
from text_to_code.services import embedder
Expand Down Expand Up @@ -212,6 +216,29 @@ def _populate_eicr_metadata(
ttc_metadata_output["eicr_metadata"] = eicr_metadata


def _build_nonstandard_code_instance(
schematron_error: SchematronErrorDetail,
new_translation: Code,
selected_candidate: Candidate,
) -> NonstandardCodeInstance:
"""Build a NonstandardCodeInstance object for the TTC output.

:param schematron_error: The Schematron error being processed.
:param new_translation: The new translation retrieved from OpenSearch for the error.
:param selected_candidate: The text candidate that was selected as the most relevant for the error.
:return: A NonstandardCodeInstance object populated with the relevant information.
"""
new_translation_with_text = new_translation.model_copy(
update={"original_text": selected_candidate.value}
)
return NonstandardCodeInstance(
schematron_error=schematron_error.error_message,
schematron_error_xpath=schematron_error.error_context,
field_type=schematron_error.field,
new_translation=new_translation_with_text,
)


def _process_schematron_errors(
original_eicr_content: str,
schematron_data_fields: list,
Expand Down Expand Up @@ -250,13 +277,13 @@ def _process_schematron_errors(
)

error.candidate = selected_candidate
ttc_output["schematron_errors"][data_field].append(error.model_dump())

logger.info(
"Embedding the relevant text strings for each error in the eICR for persistence_id"
)

if selected_candidate is None:
ttc_output["schematron_errors"][data_field].append(error.model_dump())
continue

vector_embedding = RETRIEVER.embed(selected_candidate.value)
Expand All @@ -281,6 +308,19 @@ def _process_schematron_errors(
retrieved_loinc_names = [hit.source.description for hit in results_list]
ranked_results = RERANKER.rerank(selected_candidate.value, retrieved_loinc_names)

if results_list:
ttc_output["schematron_errors"][data_field].append(
_build_nonstandard_code_instance(
schematron_error=error,
new_translation=Code(
code=results_list[0].source.loinc_code,
code_system="2.16.840.1.113883.6.1",
code_system_name="LOINC",
display_name=results_list[0].source.description,
),
selected_candidate=selected_candidate,
).model_dump()
)
metadata_error = error.model_dump()
metadata_error["opensearch_retrieved_scores"] = opensearch_retrieved_scores
metadata_error["reranker_processed_results"] = ranked_results
Expand Down
46 changes: 42 additions & 4 deletions packages/text-to-code-lambda/tests/test_lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,48 @@ def test_handler_success(self, example_sqs_event, mock_aws_setup, mock_opensearc
"opensearch_retrieved_scores"
not in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]
)
assert "candidate" in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]
assert "error_context" in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]
assert "error_id" in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]
assert ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["candidate"] is not None
assert (
"reranker_processed_results"
not in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]
)
assert "schematron_error" in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]
assert (
"schematron_error_xpath" in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]
)
assert "field_type" in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]
assert "new_translation" in ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]
assert (
ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["field_type"]
== "Lab Test Name Resulted"
)
assert (
ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["new_translation"]["code"]
== "109224-6"
)
assert (
ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["new_translation"][
"code_system"
]
== "2.16.840.1.113883.6.1"
)
assert (
ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["new_translation"][
"code_system_name"
]
== "LOINC"
)
assert (
ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["new_translation"][
"display_name"
]
is not None
)
assert (
ttc_output["schematron_errors"]["Lab Test Name Resulted"][0]["new_translation"][
"original_text"
]
== "weed allergen mix 3"
)

# Assert that the TTC metadata output was saved to S3 with the expected content
ttc_metadata_output = json.loads(
Expand Down
2 changes: 2 additions & 0 deletions packages/text-to-code/src/text_to_code/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from .schematron import LabTestNameOrderedSchematronErrors
from .schematron import LabTestNameResultedSchematronErrors
from .schematron import SchematronConfig
from .schematron import SchematronErrorDetail
from .schematron import SchematronErrors

__all__ = [
Expand All @@ -28,6 +29,7 @@
"LabTestNameResultedSchematronErrors",
"LabXPaths",
"SchematronConfig",
"SchematronErrorDetail",
"SchematronErrors",
"VectorSearchParams",
]
Loading