diff --git a/packages/augmentation-lambda/pyproject.toml b/packages/augmentation-lambda/pyproject.toml
index 7e16095..f5cdab5 100644
--- a/packages/augmentation-lambda/pyproject.toml
+++ b/packages/augmentation-lambda/pyproject.toml
@@ -2,10 +2,13 @@
name = "augmentation-lambda"
version = "0.1.0"
readme = "README.md"
-dependencies = ["aws-lambda-typing>=2.20.0"]
+dependencies = [
+ "aws-lambda-powertools>=2.0.0",
+ "structlog>=24.0.0",
+]
[dependency-groups]
-dev = []
+dev = ["aws-lambda-typing>=2.20.0", "boto3>=1.40.60", "moto"]
[build-system]
requires = ["hatchling"]
diff --git a/packages/augmentation-lambda/src/augmentation_lambda/lambda_function.py b/packages/augmentation-lambda/src/augmentation_lambda/lambda_function.py
index b2d120f..4527bc3 100644
--- a/packages/augmentation-lambda/src/augmentation_lambda/lambda_function.py
+++ b/packages/augmentation-lambda/src/augmentation_lambda/lambda_function.py
@@ -1,20 +1,27 @@
import io
import json
import os
-from typing import TypedDict
-from aws_lambda_typing import context as lambda_context
-from aws_lambda_typing import events as lambda_events
+import structlog
+from aws_lambda_powertools.utilities.data_classes import SQSEvent
+from aws_lambda_powertools.utilities.data_classes import event_source
+from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord
+from aws_lambda_powertools.utilities.typing import LambdaContext
from botocore.client import BaseClient
import lambda_handler
from augmentation.models import TTCAugmenterConfig
from augmentation.models.application import TTCAugmenterOutput
from augmentation.services.eicr_augmenter import EICRAugmenter
+from shared_models import NonstandardCodeInstance
from shared_models import TTCAugmenterInput
+logger = structlog.get_logger()
+
# Environment variables
S3_BUCKET = os.getenv("S3_BUCKET", "dibbs-text-to-code")
+EICR_INPUT_PREFIX = os.getenv("EICR_INPUT_PREFIX", "eCRMessageV2/")
+TTC_OUTPUT_PREFIX = os.getenv("TTC_OUTPUT_PREFIX", "TTCAugmentationMetadataV2/")
AUGMENTED_EICR_PREFIX = os.getenv("AUGMENTED_EICR_PREFIX", "AugmentationEICRV2/")
AUGMENTATION_METADATA_PREFIX = os.getenv("AUGMENTATION_METADATA_PREFIX", "AugmentationMetadataV2/")
@@ -22,19 +29,16 @@
_cached_s3_client: BaseClient | None = None
-class HandlerResponse(TypedDict):
- """Response from the AWS Lambda handler."""
-
- results: list[dict[str, object]]
- batchItemFailures: list[dict[str, str]]
-
-
-def handler(event: lambda_events.SQSEvent, context: lambda_context.Context) -> HandlerResponse:
+@event_source(data_class=SQSEvent)
+def handler(event: SQSEvent, context: LambdaContext) -> dict:
"""AWS Lambda handler for augmenting eICRs with nonstandard codes.
- :param event: The SQS event containing messages with eICRs to augment.
+ Triggered by S3 events when TTC output objects are created in TTCAugmentationMetadataV2/.
+ Reads TTC output and original eICR from S3, performs augmentation, and writes results to S3.
+
+ :param event: The SQS event containing S3 event data.
:param context: The AWS Lambda context object.
- :return: A dictionary containing the results of the augmentation and any batch item failures.
+ :return: A dictionary containing processing results and any batch item failures.
"""
global _cached_s3_client # noqa: PLW0603
@@ -42,90 +46,153 @@ def handler(event: lambda_events.SQSEvent, context: lambda_context.Context) -> H
_cached_s3_client = lambda_handler.create_s3_client()
s3_client = _cached_s3_client
- results: list[dict[str, object]] = []
- batch_item_failures: list[dict[str, str]] = []
+ logger.info(f"Received event with {len(event['Records'])} record(s)")
- for record in event["Records"]:
- message_id = record["messageId"]
+ failures = []
+ successes = []
+ for record in event.records:
try:
- payload = json.loads(record["body"])
- augmenter_input = TTCAugmenterInput.model_validate(
- {
- "eicr_id": payload["eicr_id"],
- "nonstandard_codes": payload["nonstandard_codes"],
- }
- )
-
- eicr = payload["eicr"]
-
- # TODO: will need to determine config based on application code when there are multiple applications using the augmentation service. For now, since TTC is the only application, we can directly initialize the config as a TTC config.
- config = (
- TTCAugmenterConfig.model_validate(payload["config"])
- if "config" in payload
- else TTCAugmenterConfig()
- )
-
- # TODO: in the future, when there are multiple applications using the augmentation service, we will need to determine which augmenter to use based on the application code in the config. For now, since TTC is the only application, we can directly initialize the EICRAugmenter.
- augmenter = EICRAugmenter(
- document=eicr,
- nonstandard_codes=augmenter_input.nonstandard_codes,
- config=config,
- )
-
- metadata = augmenter.augment()
-
- # TODO: the output of the augmenter will likely need to be modified when there are multiple applications and augmenters, but for now we can directly create a TTC augmenter output.
- output = TTCAugmenterOutput(
- eicr_id=augmenter_input.eicr_id,
- augmented_eicr=augmenter.augmented_xml,
- metadata=metadata,
- )
-
- # Save augmented eICR and metadata to S3
- _save_augmentation_outputs(augmenter_input.eicr_id, output, s3_client)
-
- results.append(
- {
- "messageId": message_id,
- "status": "success",
- "result": output.model_dump(),
- }
- )
- except Exception as exc:
- batch_item_failures.append({"itemIdentifier": message_id})
- results.append(
- {
- "messageId": message_id,
- "status": "error",
- "error": str(exc),
- }
- )
-
- return {
- "results": results,
- "batchItemFailures": batch_item_failures,
- }
+ _process_record(record, s3_client)
+ successes.append(record.message_id)
+ except Exception as e:
+ logger.exception(f"Error processing record: {e}", message_id=record.message_id)
+ failures.append({"message_id": record.message_id, "error": str(e)})
+
+ return (
+ {
+ "statusCode": 200,
+ "message": "Augmentation processed with some failures!",
+ "failures": failures,
+ "num_failure_eicrs": len(failures),
+ "num_success_eicrs": len(successes),
+ }
+ if failures
+ else {
+ "statusCode": 200,
+ "message": "Augmentation processed successfully!",
+ "num_success_eicrs": len(successes),
+ }
+ )
+
+
+def _process_record(record: SQSRecord, s3_client: BaseClient) -> None:
+ """Process a single SQS record containing an S3 event.
+
+ :param record: The SQS record with an EventBridge S3 event in the body.
+ :param s3_client: The S3 client to use for reading and writing files.
+ """
+ if not record.body:
+ logger.warning("Empty SQS body", message_id=record.message_id)
+ return
+
+ s3_event = json.loads(record.body)
+
+ eventbridge_data = lambda_handler.get_eventbridge_data_from_s3_event(s3_event)
+ object_key = eventbridge_data["object_key"]
+ bucket_name = eventbridge_data.get("bucket_name") or S3_BUCKET
+ logger.info(f"Processing S3 Object: s3://{bucket_name}/{object_key}")
+
+ persistence_id = lambda_handler.get_persistence_id(object_key, TTC_OUTPUT_PREFIX)
+ logger.info(f"Extracted persistence_id: {persistence_id}")
+
+ ttc_output = _load_ttc_output(persistence_id, s3_client, bucket_name)
+ original_eicr = _load_original_eicr(persistence_id, s3_client, bucket_name)
+ nonstandard_codes = _parse_nonstandard_codes(ttc_output)
+
+ augmenter_input = TTCAugmenterInput(
+ eicr_id=persistence_id,
+ nonstandard_codes=nonstandard_codes,
+ )
+
+ config = TTCAugmenterConfig()
+ augmenter = EICRAugmenter(
+ document=original_eicr,
+ nonstandard_codes=augmenter_input.nonstandard_codes,
+ config=config,
+ )
+
+ metadata = augmenter.augment()
+
+ output = TTCAugmenterOutput(
+ eicr_id=augmenter_input.eicr_id,
+ augmented_eicr=augmenter.augmented_xml,
+ metadata=metadata,
+ )
+
+ _save_augmentation_outputs(persistence_id, output, s3_client, bucket_name)
+
+
+def _load_ttc_output(persistence_id: str, s3_client: BaseClient, bucket_name: str) -> dict:
+ """Load TTC output from S3.
+
+ :param persistence_id: The persistence ID for the S3 object key.
+ :param s3_client: The S3 client.
+ :param bucket_name: The S3 bucket name.
+ :return: The parsed TTC output dictionary.
+ """
+ object_key = f"{TTC_OUTPUT_PREFIX}{persistence_id}"
+ logger.info(f"Retrieving TTC output from s3://{bucket_name}/{object_key}")
+ content = lambda_handler.get_file_content_from_s3(
+ bucket_name=bucket_name, object_key=object_key, s3_client=s3_client
+ )
+ return json.loads(content)
+
+
+def _load_original_eicr(persistence_id: str, s3_client: BaseClient, bucket_name: str) -> str:
+ """Load original eICR XML from S3.
+
+ :param persistence_id: The persistence ID for the S3 object key.
+ :param s3_client: The S3 client.
+ :param bucket_name: The S3 bucket name.
+ :return: The raw eICR XML string.
+ """
+ object_key = f"{EICR_INPUT_PREFIX}{persistence_id}"
+ logger.info(f"Retrieving eICR from s3://{bucket_name}/{object_key}")
+ return lambda_handler.get_file_content_from_s3(
+ bucket_name=bucket_name, object_key=object_key, s3_client=s3_client
+ )
+
+
+def _parse_nonstandard_codes(ttc_output: dict) -> list[NonstandardCodeInstance]:
+ """Parse nonstandard codes from TTC output.
+
+ The TTC Lambda writes NonstandardCodeInstance model dumps to the schematron_errors
+ field of the TTC output. This function validates and reconstructs them.
+
+ :param ttc_output: The TTC output dictionary from S3.
+ :return: A list of NonstandardCodeInstance objects.
+ """
+ codes = []
+ for entries in ttc_output.get("schematron_errors", {}).values():
+ for entry in entries:
+ if "new_translation" in entry:
+ codes.append(NonstandardCodeInstance.model_validate(entry))
+ return codes
def _save_augmentation_outputs(
- eicr_id: str, output: TTCAugmenterOutput, s3_client: BaseClient
+ persistence_id: str,
+ output: TTCAugmenterOutput,
+ s3_client: BaseClient,
+ bucket_name: str,
) -> None:
"""Save augmented eICR and metadata to S3.
- :param eicr_id: The eICR identifier.
+ :param persistence_id: The persistence ID for the S3 object key.
:param output: The augmentation output containing the augmented eICR and metadata.
:param s3_client: The S3 client to use for uploading files.
+ :param bucket_name: The S3 bucket name to write to.
"""
lambda_handler.put_file(
file_obj=io.BytesIO(output.augmented_eicr.encode("utf-8")),
- bucket_name=S3_BUCKET,
- object_key=f"{AUGMENTED_EICR_PREFIX}{eicr_id}",
+ bucket_name=bucket_name,
+ object_key=f"{AUGMENTED_EICR_PREFIX}{persistence_id}",
s3_client=s3_client,
)
lambda_handler.put_file(
file_obj=io.BytesIO(output.metadata.model_dump_json().encode("utf-8")),
- bucket_name=S3_BUCKET,
- object_key=f"{AUGMENTATION_METADATA_PREFIX}{eicr_id}",
+ bucket_name=bucket_name,
+ object_key=f"{AUGMENTATION_METADATA_PREFIX}{persistence_id}",
s3_client=s3_client,
)
diff --git a/packages/augmentation-lambda/tests/conftest.py b/packages/augmentation-lambda/tests/conftest.py
new file mode 100644
index 0000000..d6770c8
--- /dev/null
+++ b/packages/augmentation-lambda/tests/conftest.py
@@ -0,0 +1,158 @@
+import json
+import os
+from pathlib import Path
+
+import boto3
+import moto
+import pytest
+
+from augmentation_lambda import lambda_function
+
+S3_BUCKET = "dibbs-text-to-code"
+EICR_INPUT_PREFIX = "eCRMessageV2/"
+TTC_OUTPUT_PREFIX = "TTCAugmentationMetadataV2/"
+AUGMENTED_EICR_PREFIX = "AugmentationEICRV2/"
+AUGMENTATION_METADATA_PREFIX = "AugmentationMetadataV2/"
+AWS_REGION = "us-east-1"
+AWS_ACCESS_KEY_ID = "test_access_key_id"
+AWS_SECRET_ACCESS_KEY = "test_secret_access_key" # noqa: S105
+TEST_PERSISTENCE_ID = "2025/09/03/1-5f84c7a5-91d7f5c6a2b7c9e08f0d1234"
+
+TEST_EICR_PATH = (
+ Path(__file__).parent.parent.parent
+ / "augmentation"
+ / "tests"
+ / "assets"
+ / "basic_test_eicr.xml"
+)
+
+TEST_TTC_OUTPUT = {
+ "persistence_id": TEST_PERSISTENCE_ID,
+ "eicr_metadata": {},
+ "schematron_errors": {
+ "Lab Test Name Resulted": [
+ {
+ "schematron_error": "Text to Code: Lab Test Name Resulted does not have a @code attribute",
+ "schematron_error_xpath": "/ClinicalDocument/component/structuredBody/component/section/entry/component/observation",
+ "field_type": "Lab Test Name Resulted",
+ "new_translation": {
+ "code": "109224-6",
+ "code_system": "2.16.840.1.113883.6.1",
+ "code_system_name": "LOINC",
+ "display_name": "Weed Allergen Mix 3 IgE Ab",
+ "value_set": None,
+ "value_set_version": None,
+ "original_text": "A custom code in original text.",
+ },
+ }
+ ]
+ },
+}
+
+
+def pytest_configure() -> None:
+ """Configure env variables for pytest."""
+ os.environ["S3_BUCKET"] = S3_BUCKET
+ os.environ["EICR_INPUT_PREFIX"] = EICR_INPUT_PREFIX
+ os.environ["TTC_OUTPUT_PREFIX"] = TTC_OUTPUT_PREFIX
+ os.environ["AUGMENTED_EICR_PREFIX"] = AUGMENTED_EICR_PREFIX
+ os.environ["AUGMENTATION_METADATA_PREFIX"] = AUGMENTATION_METADATA_PREFIX
+ os.environ["AWS_REGION"] = AWS_REGION
+ os.environ["AWS_ACCESS_KEY_ID"] = AWS_ACCESS_KEY_ID
+ os.environ["AWS_SECRET_ACCESS_KEY"] = AWS_SECRET_ACCESS_KEY
+
+
+@pytest.fixture
+def example_s3_event_payload() -> dict:
+ """EventBridge S3 event payload (what SQS body contains as JSON string)."""
+ return {
+ "version": "0",
+ "id": "12345678-1234-5678-9012-123456789012",
+ "detail-type": "Object Created",
+ "source": "aws.s3",
+ "account": "111122223333",
+ "time": "2025-09-03T12:34:56Z",
+ "region": "us-east-1",
+ "resources": [f"arn:aws:s3:::{S3_BUCKET}"],
+ "detail": {
+ "version": "0",
+ "bucket": {"name": S3_BUCKET},
+ "object": {
+ "key": f"{TTC_OUTPUT_PREFIX}{TEST_PERSISTENCE_ID}",
+ "size": 1024,
+ "etag": "0123456789abcdef0123456789abcdef",
+ "sequencer": "0055AED6DCD90281E5",
+ },
+ "request-id": "C3D13FE58DE4C810",
+ "requester": "arn:aws:iam::111122223333:user/example-user",
+ "reason": "PutObject",
+ },
+ }
+
+
+@pytest.fixture
+def example_sqs_event(example_s3_event_payload: dict) -> dict:
+ """Full SQS event that mimics real Lambda input."""
+ return {
+ "Records": [
+ {
+ "messageId": "f9ccdff5-0acb-4933-8995-bd7f0ab5f2f7",
+ "receiptHandle": "test-receipt-handle",
+ "body": json.dumps(example_s3_event_payload),
+ "attributes": {
+ "ApproximateReceiveCount": "1",
+ "SentTimestamp": "1752691260451",
+ "SenderId": "AIDAJXNJGGKNS7OSV23OI",
+ "ApproximateFirstReceiveTimestamp": "1752691260458",
+ },
+ "messageAttributes": {},
+ "md5OfBody": "dummy-md5",
+ "eventSource": "aws:sqs",
+ "eventSourceARN": "arn:aws:sqs:us-east-1:123456789012:queue-name",
+ "awsRegion": "us-east-1",
+ }
+ ]
+ }
+
+
+@pytest.fixture(scope="function")
+def mock_aws_setup(monkeypatch: pytest.MonkeyPatch) -> boto3.client:
+ """Setup test AWS environment with moto mock S3."""
+ with moto.mock_aws():
+ monkeypatch.setenv("AWS_REGION", AWS_REGION)
+ monkeypatch.setenv("AWS_ACCESS_KEY_ID", AWS_ACCESS_KEY_ID)
+ monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", AWS_SECRET_ACCESS_KEY)
+
+ s3 = boto3.client(
+ "s3",
+ region_name=AWS_REGION,
+ aws_access_key_id=AWS_ACCESS_KEY_ID,
+ aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
+ )
+ s3.create_bucket(Bucket=S3_BUCKET)
+
+ s3.bucket_name = S3_BUCKET
+ s3.persistence_id = TEST_PERSISTENCE_ID
+
+ # Put test eICR in mock S3
+ with TEST_EICR_PATH.open() as f:
+ eicr_content = f.read()
+ s3.put_object(
+ Bucket=S3_BUCKET,
+ Key=f"{EICR_INPUT_PREFIX}{TEST_PERSISTENCE_ID}",
+ Body=eicr_content,
+ )
+
+ # Put test TTC output in mock S3
+ s3.put_object(
+ Bucket=S3_BUCKET,
+ Key=f"{TTC_OUTPUT_PREFIX}{TEST_PERSISTENCE_ID}",
+ Body=json.dumps(TEST_TTC_OUTPUT),
+ )
+
+ # Reset cached S3 client so Lambda creates a new one inside moto context
+ lambda_function._cached_s3_client = None
+
+ yield s3
+
+ lambda_function._cached_s3_client = None
diff --git a/packages/augmentation-lambda/tests/test_augmentation_lambda_function.py b/packages/augmentation-lambda/tests/test_augmentation_lambda_function.py
index 137df4e..db920d5 100644
--- a/packages/augmentation-lambda/tests/test_augmentation_lambda_function.py
+++ b/packages/augmentation-lambda/tests/test_augmentation_lambda_function.py
@@ -1,408 +1,284 @@
import json
-from unittest.mock import MagicMock
-from unittest.mock import patch
-import pytest
-
-from augmentation.models import Metadata
+import lambda_handler
from augmentation_lambda import lambda_function
-from shared_models import TTCAugmenterInput
-
-
-class FakeAugmenter:
- def __init__(self, document: str, nonstandard_codes: list[object], config: object) -> None:
- """Fake augmenter for testing purposes.
-
- :param document: The input document to augment.
- :param nonstandard_codes: The list of nonstandard codes to resolve.
- :param config: The augmenter config to use for augmentation.
- """
- self.document = document
- self.nonstandard_codes = nonstandard_codes
- self.config = config
- self.augmented_xml = ''
-
- def augment(self) -> Metadata:
- """Fake augment method that returns a successful augmentation result.
-
- :return: A Metadata object representing the result of the augmentation.
- """
- return Metadata(
- original_eicr_id="original-doc-id",
- augmented_eicr_id="augmented-doc-id",
- nonstandard_codes=[],
- )
-
-
-@pytest.fixture(autouse=True)
-def mock_s3_client():
- """Mock the S3 client and put_file for all tests."""
- lambda_function._cached_s3_client = MagicMock()
- with patch.object(lambda_function, "lambda_handler") as mock_handler:
- mock_handler.create_s3_client.return_value = MagicMock()
- mock_handler.put_file = MagicMock()
- yield mock_handler
- lambda_function._cached_s3_client = None
-
-
-def test_handler_returns_success_result(mocker, mock_s3_client) -> None:
- """Tests that the handler returns a successful result when the augmenter runs without errors.
-
- :param mocker: The pytest-mock fixture for mocking objects.
- """
- mocker.patch.object(lambda_function, "EICRAugmenter", FakeAugmenter)
-
- model_validate_spy = mocker.spy(TTCAugmenterInput, "model_validate")
-
- event = {
- "Records": [
+from shared_models import DataField
+from shared_models import NonstandardCodeInstance
+
+S3_BUCKET = "dibbs-text-to-code"
+TTC_OUTPUT_PREFIX = "TTCAugmentationMetadataV2/"
+AUGMENTED_EICR_PREFIX = "AugmentationEICRV2/"
+AUGMENTATION_METADATA_PREFIX = "AugmentationMetadataV2/"
+TEST_PERSISTENCE_ID = "2025/09/03/1-5f84c7a5-91d7f5c6a2b7c9e08f0d1234"
+
+TEST_TTC_OUTPUT = {
+ "persistence_id": TEST_PERSISTENCE_ID,
+ "eicr_metadata": {},
+ "schematron_errors": {
+ "Lab Test Name Resulted": [
{
- "messageId": "message-1",
- "body": json.dumps(
- {
- "eicr_id": "source-eicr-id",
- "eicr": "",
- "nonstandard_codes": [],
- }
- ),
- }
- ]
- }
-
- result = lambda_function.handler(event, None)
-
- assert model_validate_spy.call_count == 1
- assert model_validate_spy.call_args.args[0] == {
- "eicr_id": "source-eicr-id",
- "nonstandard_codes": [],
- }
- assert result == {
- "results": [
- {
- "messageId": "message-1",
- "status": "success",
- "result": {
- "eicr_id": "source-eicr-id",
- "augmented_eicr": '',
- "metadata": {
- "original_eicr_id": "original-doc-id",
- "augmented_eicr_id": "augmented-doc-id",
- "nonstandard_codes": [],
- "error": None,
- },
+ "schematron_error": "Text to Code: Lab Test Name Resulted does not have a @code attribute",
+ "schematron_error_xpath": "/ClinicalDocument/component/structuredBody/component/section/entry/component/observation",
+ "field_type": "Lab Test Name Resulted",
+ "new_translation": {
+ "code": "109224-6",
+ "code_system": "2.16.840.1.113883.6.1",
+ "code_system_name": "LOINC",
+ "display_name": "Weed Allergen Mix 3 IgE Ab",
+ "value_set": None,
+ "value_set_version": None,
+ "original_text": "A custom code in original text.",
},
}
- ],
- "batchItemFailures": [],
- }
-
-
-def test_handler_creates_and_caches_s3_client_when_cache_is_empty(mocker) -> None:
- """Tests that the handler creates the S3 client when the cache is empty and reuses it.
-
- :param mocker: The pytest-mock fixture for mocking objects.
- """
- lambda_function._cached_s3_client = None
-
- created_s3_client = MagicMock()
- create_s3_client_spy = mocker.patch.object(
- lambda_function.lambda_handler,
- "create_s3_client",
- return_value=created_s3_client,
- )
- mocker.patch.object(lambda_function, "EICRAugmenter", FakeAugmenter)
-
- event = {
- "Records": [
- {
- "messageId": "message-cache",
- "body": json.dumps(
- {
- "eicr_id": "cached-eicr-id",
- "eicr": "",
- "nonstandard_codes": [],
- }
- ),
- }
]
- }
-
- result = lambda_function.handler(event, None)
+ },
+}
- assert create_s3_client_spy.call_count == 1
- assert lambda_function._cached_s3_client is created_s3_client
- assert result["batchItemFailures"] == []
- assert result["results"][0]["status"] == "success"
+class TestParseNonstandardCodes:
+ """Tests for the _parse_nonstandard_codes helper."""
-def test_handler_saves_outputs_to_s3(mocker, mock_s3_client) -> None:
- """Tests that the handler writes augmented eICR and metadata to S3.
+ def test_parses_valid_ttc_output(self) -> None:
+ codes = lambda_function._parse_nonstandard_codes(TEST_TTC_OUTPUT)
- :param mocker: The pytest-mock fixture for mocking objects.
- """
- mocker.patch.object(lambda_function, "EICRAugmenter", FakeAugmenter)
+ assert len(codes) == 1
+ assert isinstance(codes[0], NonstandardCodeInstance)
+ assert codes[0].field_type == DataField.LAB_TEST_NAME_RESULTED
+ assert codes[0].new_translation.code == "109224-6"
+ assert codes[0].new_translation.code_system == "2.16.840.1.113883.6.1"
+ assert codes[0].new_translation.display_name == "Weed Allergen Mix 3 IgE Ab"
- event = {
- "Records": [
- {
- "messageId": "message-s3",
- "body": json.dumps(
+ def test_skips_entries_without_new_translation(self) -> None:
+ ttc_output = {
+ "schematron_errors": {
+ "Lab Test Name Resulted": [
{
- "eicr_id": "test-eicr-id",
- "eicr": "",
- "nonstandard_codes": [],
+ "field": "Lab Test Name Resulted",
+ "error": "some error",
+ "error_context": "/some/xpath",
}
- ),
+ ]
}
- ]
- }
-
- lambda_function.handler(event, None)
-
- # Verify put_file was called once for augmented eICR and once for metadata
- expected_put_file_calls = 2
- assert mock_s3_client.put_file.call_count == expected_put_file_calls
-
- # First call: augmented eICR
- eicr_call = mock_s3_client.put_file.call_args_list[0]
- assert eicr_call.kwargs["bucket_name"] == lambda_function.S3_BUCKET
- assert eicr_call.kwargs["object_key"] == f"{lambda_function.AUGMENTED_EICR_PREFIX}test-eicr-id"
-
- # Second call: metadata
- metadata_call = mock_s3_client.put_file.call_args_list[1]
- assert metadata_call.kwargs["bucket_name"] == lambda_function.S3_BUCKET
- assert (
- metadata_call.kwargs["object_key"]
- == f"{lambda_function.AUGMENTATION_METADATA_PREFIX}test-eicr-id"
- )
-
-
-def test_handler_uses_provided_config(mocker, mock_s3_client) -> None:
- """Tests that the handler uses the provided config when creating the augmenter.
-
- :param mocker: The pytest-mock fixture for mocking objects.
- """
- augmenter_mock = mocker.patch.object(lambda_function, "EICRAugmenter", autospec=True)
- augmenter_instance = augmenter_mock.return_value
- augmenter_instance.augment.return_value = Metadata(
- original_eicr_id="original-doc-id",
- augmented_eicr_id="augmented-doc-id",
- nonstandard_codes=[],
- )
- augmenter_instance.augmented_xml = (
- ''
- )
-
- config_validate_spy = mocker.spy(lambda_function.TTCAugmenterConfig, "model_validate")
-
- event = {
- "Records": [
- {
- "messageId": "message-2",
- "body": json.dumps(
- {
- "eicr_id": "source-eicr-id",
- "eicr": "",
- "nonstandard_codes": [],
- "config": {
- "rules": {
- "document": [
- "document_id_header",
- "author_header",
- ]
- }
- },
- }
- ),
- }
- ]
- }
-
- result = lambda_function.handler(event, None)
-
- assert config_validate_spy.call_count == 1
- augmenter_mock.assert_called_once()
- assert augmenter_mock.call_args.kwargs["document"] == ""
- assert augmenter_mock.call_args.kwargs["config"] == config_validate_spy.spy_return
- assert result["batchItemFailures"] == []
- assert result["results"][0]["status"] == "success"
-
+ }
-def test_handler_returns_error_for_invalid_payload(mocker) -> None:
- """Tests that the handler returns an error result for an invalid payload.
+ codes = lambda_function._parse_nonstandard_codes(ttc_output)
- :param mocker: The pytest-mock fixture for mocking objects.
- """
- mocker.patch.object(lambda_function, "EICRAugmenter", FakeAugmenter)
+ assert len(codes) == 0
- event = {
- "Records": [
- {
- "messageId": "message-3",
- "body": json.dumps(
- {
- "eicr": "",
- "nonstandard_codes": [],
- }
- ),
- }
- ]
- }
+ def test_handles_empty_schematron_errors(self) -> None:
+ codes = lambda_function._parse_nonstandard_codes({"schematron_errors": {}})
+ assert len(codes) == 0
- result = lambda_function.handler(event, None)
+ def test_handles_missing_schematron_errors(self) -> None:
+ codes = lambda_function._parse_nonstandard_codes({})
+ assert len(codes) == 0
- assert result["batchItemFailures"] == [{"itemIdentifier": "message-3"}]
- assert result["results"][0]["messageId"] == "message-3"
- assert result["results"][0]["status"] == "error"
+class TestHandler:
+ """Tests for the augmentation Lambda handler."""
-def test_handler_returns_error_when_augmenter_raises(mocker) -> None:
- """Tests that the handler returns an error result when the augmenter raises an exception.
+ def test_handler_success(self, example_sqs_event, mock_aws_setup) -> None:
+ result = lambda_function.handler(example_sqs_event, None)
- :param mocker: The pytest-mock fixture for mocking objects.
- """
+ assert result["statusCode"] == 200 # noqa: PLR2004
+ assert result["message"] == "Augmentation processed successfully!"
+ assert result["num_success_eicrs"] == 1
- class RaisingAugmenter:
- def __init__(self, document: str, nonstandard_codes: list[object], config: object):
- self.document = document
- self.nonstandard_codes = nonstandard_codes
- self.config = config
+ def test_handler_writes_outputs_to_s3(self, example_sqs_event, mock_aws_setup) -> None:
+ lambda_function.handler(example_sqs_event, None)
- def augment(self) -> Metadata:
- raise ValueError("augmentation failed")
-
- mocker.patch.object(lambda_function, "EICRAugmenter", RaisingAugmenter)
-
- event = {
- "Records": [
- {
- "messageId": "message-4",
- "body": json.dumps(
- {
- "eicr_id": "source-eicr-id",
- "eicr": "",
- "nonstandard_codes": [],
- }
- ),
- }
- ]
- }
+ # Verify augmented eICR was written
+ augmented_eicr = lambda_handler.get_file_content_from_s3(
+ bucket_name=S3_BUCKET,
+ object_key=f"{AUGMENTED_EICR_PREFIX}{TEST_PERSISTENCE_ID}",
+ s3_client=mock_aws_setup,
+ )
+ assert "ClinicalDocument" in augmented_eicr
- result = lambda_function.handler(event, None)
+ # Verify metadata was written
+ metadata_raw = lambda_handler.get_file_content_from_s3(
+ bucket_name=S3_BUCKET,
+ object_key=f"{AUGMENTATION_METADATA_PREFIX}{TEST_PERSISTENCE_ID}",
+ s3_client=mock_aws_setup,
+ )
+ metadata = json.loads(metadata_raw)
+ assert "original_eicr_id" in metadata
+ assert "augmented_eicr_id" in metadata
+
+ def test_handler_source_bucket_routing(self, example_s3_event_payload, mock_aws_setup) -> None:
+ """Verify bucket name is extracted from the S3 event, not the env var."""
+ custom_bucket = "custom-bucket"
+
+ # Create the custom bucket and populate it with the same test data
+ mock_aws_setup.create_bucket(Bucket=custom_bucket)
+ # Copy eICR to custom bucket
+ eicr_obj = mock_aws_setup.get_object(
+ Bucket=S3_BUCKET,
+ Key=f"eCRMessageV2/{TEST_PERSISTENCE_ID}",
+ )
+ mock_aws_setup.put_object(
+ Bucket=custom_bucket,
+ Key=f"eCRMessageV2/{TEST_PERSISTENCE_ID}",
+ Body=eicr_obj["Body"].read(),
+ )
+ # Copy TTC output to custom bucket
+ ttc_obj = mock_aws_setup.get_object(
+ Bucket=S3_BUCKET,
+ Key=f"{TTC_OUTPUT_PREFIX}{TEST_PERSISTENCE_ID}",
+ )
+ mock_aws_setup.put_object(
+ Bucket=custom_bucket,
+ Key=f"{TTC_OUTPUT_PREFIX}{TEST_PERSISTENCE_ID}",
+ Body=ttc_obj["Body"].read(),
+ )
- assert result == {
- "results": [
- {
- "messageId": "message-4",
- "status": "error",
- "error": "augmentation failed",
- }
- ],
- "batchItemFailures": [{"itemIdentifier": "message-4"}],
- }
-
-
-def test_handler_returns_mixed_batch_results(mocker) -> None:
- """Tests that the handler returns a mixed batch of success and error results.
-
- :param mocker: The pytest-mock fixture for mocking objects.
- """
-
- class ConditionalAugmenter:
- def __init__(self, document: str, nonstandard_codes: list[object], config: object):
- self.document = document
- self.nonstandard_codes = nonstandard_codes
- self.config = config
- self.augmented_xml = (
- ''
- )
-
- def augment(self) -> Metadata:
- if self.document == "":
- raise ValueError("broken document")
- return Metadata(
- original_eicr_id="original-doc-id",
- augmented_eicr_id="augmented-doc-id",
- nonstandard_codes=[],
- )
-
- mocker.patch.object(lambda_function, "EICRAugmenter", ConditionalAugmenter)
-
- event = {
- "Records": [
- {
- "messageId": "message-5",
- "body": json.dumps(
- {
- "eicr_id": "source-eicr-id-1",
- "eicr": "",
- "nonstandard_codes": [],
- }
- ),
- },
- {
- "messageId": "message-6",
- "body": json.dumps(
- {
- "eicr_id": "source-eicr-id-2",
- "eicr": "",
- "nonstandard_codes": [],
- }
- ),
- },
- ]
- }
+ # Modify the event to use custom bucket
+ example_s3_event_payload["detail"]["bucket"]["name"] = custom_bucket
+ event = {
+ "Records": [
+ {
+ "messageId": "msg-routing",
+ "receiptHandle": "test-receipt-handle",
+ "body": json.dumps(example_s3_event_payload),
+ "attributes": {
+ "ApproximateReceiveCount": "1",
+ "SentTimestamp": "1752691260451",
+ "SenderId": "AIDAJXNJGGKNS7OSV23OI",
+ "ApproximateFirstReceiveTimestamp": "1752691260458",
+ },
+ "messageAttributes": {},
+ "md5OfBody": "dummy-md5",
+ "eventSource": "aws:sqs",
+ "eventSourceARN": "arn:aws:sqs:us-east-1:123456789012:queue-name",
+ "awsRegion": "us-east-1",
+ }
+ ]
+ }
+
+ lambda_function.handler(event, None)
+
+ # Outputs should be in the custom bucket, not the default
+ augmented_eicr = lambda_handler.get_file_content_from_s3(
+ bucket_name=custom_bucket,
+ object_key=f"{AUGMENTED_EICR_PREFIX}{TEST_PERSISTENCE_ID}",
+ s3_client=mock_aws_setup,
+ )
+ assert "ClinicalDocument" in augmented_eicr
+
+ def test_handler_error_missing_eicr(self, example_s3_event_payload, mock_aws_setup) -> None:
+ """Test error when the original eICR is not found in S3."""
+ # Remove the eICR from S3
+ mock_aws_setup.delete_object(
+ Bucket=S3_BUCKET,
+ Key=f"eCRMessageV2/{TEST_PERSISTENCE_ID}",
+ )
- result = lambda_function.handler(event, None)
+ event = {
+ "Records": [
+ {
+ "messageId": "msg-missing-eicr",
+ "receiptHandle": "test-receipt-handle",
+ "body": json.dumps(example_s3_event_payload),
+ "attributes": {
+ "ApproximateReceiveCount": "1",
+ "SentTimestamp": "1752691260451",
+ "SenderId": "AIDAJXNJGGKNS7OSV23OI",
+ "ApproximateFirstReceiveTimestamp": "1752691260458",
+ },
+ "messageAttributes": {},
+ "md5OfBody": "dummy-md5",
+ "eventSource": "aws:sqs",
+ "eventSourceARN": "arn:aws:sqs:us-east-1:123456789012:queue-name",
+ "awsRegion": "us-east-1",
+ }
+ ]
+ }
+
+ result = lambda_function.handler(event, None)
+
+ assert result["num_failure_eicrs"] == 1
+ assert len(result["failures"]) == 1
+
+ def test_handler_error_missing_ttc_output(
+ self, example_s3_event_payload, mock_aws_setup
+ ) -> None:
+ """Test error when the TTC output is not found in S3."""
+ mock_aws_setup.delete_object(
+ Bucket=S3_BUCKET,
+ Key=f"{TTC_OUTPUT_PREFIX}{TEST_PERSISTENCE_ID}",
+ )
- assert result == {
- "results": [
- {
- "messageId": "message-5",
- "status": "success",
- "result": {
- "eicr_id": "source-eicr-id-1",
- "augmented_eicr": '',
- "metadata": {
- "original_eicr_id": "original-doc-id",
- "augmented_eicr_id": "augmented-doc-id",
- "nonstandard_codes": [],
- "error": None,
+ event = {
+ "Records": [
+ {
+ "messageId": "msg-missing-ttc",
+ "receiptHandle": "test-receipt-handle",
+ "body": json.dumps(example_s3_event_payload),
+ "attributes": {
+ "ApproximateReceiveCount": "1",
+ "SentTimestamp": "1752691260451",
+ "SenderId": "AIDAJXNJGGKNS7OSV23OI",
+ "ApproximateFirstReceiveTimestamp": "1752691260458",
+ },
+ "messageAttributes": {},
+ "md5OfBody": "dummy-md5",
+ "eventSource": "aws:sqs",
+ "eventSourceARN": "arn:aws:sqs:us-east-1:123456789012:queue-name",
+ "awsRegion": "us-east-1",
+ }
+ ]
+ }
+
+ result = lambda_function.handler(event, None)
+
+ assert result["num_failure_eicrs"] == 1
+ assert len(result["failures"]) == 1
+
+ def test_handler_mixed_batch_results(self, example_s3_event_payload, mock_aws_setup) -> None:
+ """Test batch with both success and failure records."""
+ # Create a second event pointing to a non-existent persistence ID
+ bad_event_payload = example_s3_event_payload.copy()
+ bad_event_payload = json.loads(json.dumps(example_s3_event_payload))
+ bad_event_payload["detail"]["object"]["key"] = f"{TTC_OUTPUT_PREFIX}nonexistent/id"
+
+ event = {
+ "Records": [
+ {
+ "messageId": "msg-success",
+ "receiptHandle": "test-receipt-handle",
+ "body": json.dumps(example_s3_event_payload),
+ "attributes": {
+ "ApproximateReceiveCount": "1",
+ "SentTimestamp": "1752691260451",
+ "SenderId": "AIDAJXNJGGKNS7OSV23OI",
+ "ApproximateFirstReceiveTimestamp": "1752691260458",
},
+ "messageAttributes": {},
+ "md5OfBody": "dummy-md5",
+ "eventSource": "aws:sqs",
+ "eventSourceARN": "arn:aws:sqs:us-east-1:123456789012:queue-name",
+ "awsRegion": "us-east-1",
},
- },
- {
- "messageId": "message-6",
- "status": "error",
- "error": "broken document",
- },
- ],
- "batchItemFailures": [{"itemIdentifier": "message-6"}],
- }
-
-
-def test_handler_returns_input_eicr_id_in_output(mocker) -> None:
- """Tests that the handler returns the input eicr_id in the success result.
-
- :param mocker: The pytest-mock fixture for mocking objects.
- """
- mocker.patch.object(lambda_function, "EICRAugmenter", FakeAugmenter)
-
- event = {
- "Records": [
- {
- "messageId": "message-7",
- "body": json.dumps(
- {
- "eicr_id": "traceable-eicr-id",
- "eicr": "",
- "nonstandard_codes": [],
- }
- ),
- }
- ]
- }
+ {
+ "messageId": "msg-fail",
+ "receiptHandle": "test-receipt-handle-2",
+ "body": json.dumps(bad_event_payload),
+ "attributes": {
+ "ApproximateReceiveCount": "1",
+ "SentTimestamp": "1752691260451",
+ "SenderId": "AIDAJXNJGGKNS7OSV23OI",
+ "ApproximateFirstReceiveTimestamp": "1752691260458",
+ },
+ "messageAttributes": {},
+ "md5OfBody": "dummy-md5",
+ "eventSource": "aws:sqs",
+ "eventSourceARN": "arn:aws:sqs:us-east-1:123456789012:queue-name",
+ "awsRegion": "us-east-1",
+ },
+ ]
+ }
- result = lambda_function.handler(event, None)
+ result = lambda_function.handler(event, None)
- assert result["results"][0]["result"]["eicr_id"] == "traceable-eicr-id"
+ assert result["num_success_eicrs"] == 1
+ assert result["num_failure_eicrs"] == 1
diff --git a/terraform/main.tf b/terraform/main.tf
index d613d5e..e76e20c 100644
--- a/terraform/main.tf
+++ b/terraform/main.tf
@@ -511,12 +511,107 @@ resource "aws_lambda_function" "augmentation_lambda" {
environment {
variables = {
S3_BUCKET = var.s3_bucket
+ EICR_INPUT_PREFIX = var.eicr_input_prefix
+ TTC_OUTPUT_PREFIX = var.ttc_output_prefix
AUGMENTED_EICR_PREFIX = var.augmented_eicr_prefix
AUGMENTATION_METADATA_PREFIX = var.augmentation_metadata_prefix
- REGION = var.region
+ AWS_REGION = var.region
}
}
tags = { Name = var.augmentation_lambda_function_name }
}
+#############
+# Augmentation Lambda SQS Queue
+#############
+
+resource "aws_sqs_queue" "augmentation_dlq" {
+ name = "${var.augmentation_lambda_function_name}-dlq"
+ tags = local.tags
+}
+
+resource "aws_sqs_queue" "augmentation_queue" {
+ name = "${var.augmentation_lambda_function_name}-queue"
+ visibility_timeout_seconds = var.augmentation_lambda_timeout * 6
+
+ redrive_policy = jsonencode({
+ deadLetterTargetArn = aws_sqs_queue.augmentation_dlq.arn
+ maxReceiveCount = 3
+ })
+
+ tags = local.tags
+}
+
+resource "aws_sqs_queue_policy" "augmentation_queue_policy" {
+ queue_url = aws_sqs_queue.augmentation_queue.id
+
+ policy = jsonencode({
+ Version = "2012-10-17"
+ Statement = [
+ {
+ Effect = "Allow"
+ Principal = { Service = "events.amazonaws.com" }
+ Action = "sqs:SendMessage"
+ Resource = aws_sqs_queue.augmentation_queue.arn
+ }
+ ]
+ })
+}
+
+#############
+# Augmentation Lambda EventBridge Rule
+#############
+
+resource "aws_cloudwatch_event_rule" "augmentation_s3_trigger" {
+ name = "${var.augmentation_lambda_function_name}-s3-trigger"
+ description = "Trigger augmentation Lambda when TTC output is created in S3"
+
+ event_pattern = jsonencode({
+ source = ["aws.s3"]
+ detail-type = ["Object Created"]
+ detail = {
+ bucket = { name = [var.s3_bucket] }
+ object = { key = [{ prefix = var.ttc_output_prefix }] }
+ }
+ })
+
+ tags = local.tags
+}
+
+resource "aws_cloudwatch_event_target" "augmentation_sqs_target" {
+ rule = aws_cloudwatch_event_rule.augmentation_s3_trigger.name
+ target_id = "${var.augmentation_lambda_function_name}-sqs"
+ arn = aws_sqs_queue.augmentation_queue.arn
+}
+
+#############
+# Augmentation Lambda Event Source Mapping
+#############
+
+resource "aws_lambda_event_source_mapping" "augmentation_sqs" {
+ event_source_arn = aws_sqs_queue.augmentation_queue.arn
+ function_name = aws_lambda_function.augmentation_lambda.arn
+ batch_size = 1
+}
+
+resource "aws_iam_role_policy" "augmentation_sqs_policy" {
+ name = "augmentation-sqs-inline-policy"
+ role = aws_iam_role.lambda_role.id
+
+ policy = jsonencode({
+ Version = "2012-10-17"
+ Statement = [
+ {
+ Effect = "Allow"
+ Action = [
+ "sqs:ReceiveMessage",
+ "sqs:DeleteMessage",
+ "sqs:GetQueueAttributes",
+ ]
+ Resource = aws_sqs_queue.augmentation_queue.arn
+ }
+ ]
+ })
+}
+
diff --git a/terraform/s3.tf b/terraform/s3.tf
index 71a6563..0c41dab 100644
--- a/terraform/s3.tf
+++ b/terraform/s3.tf
@@ -12,3 +12,8 @@ resource "aws_s3_bucket" "ttc" {
tags = local.tags
}
+
+resource "aws_s3_bucket_notification" "ttc_eventbridge" {
+ bucket = aws_s3_bucket.ttc.id
+ eventbridge = true
+}
diff --git a/uv.lock b/uv.lock
index 76bfc43..fb60cfd 100644
--- a/uv.lock
+++ b/uv.lock
@@ -114,14 +114,29 @@ name = "augmentation-lambda"
version = "0.1.0"
source = { editable = "packages/augmentation-lambda" }
dependencies = [
+ { name = "aws-lambda-powertools" },
+ { name = "structlog" },
+]
+
+[package.dev-dependencies]
+dev = [
{ name = "aws-lambda-typing" },
+ { name = "boto3" },
+ { name = "moto" },
]
[package.metadata]
-requires-dist = [{ name = "aws-lambda-typing", specifier = ">=2.20.0" }]
+requires-dist = [
+ { name = "aws-lambda-powertools", specifier = ">=2.0.0" },
+ { name = "structlog", specifier = ">=24.0.0" },
+]
[package.metadata.requires-dev]
-dev = []
+dev = [
+ { name = "aws-lambda-typing", specifier = ">=2.20.0" },
+ { name = "boto3", specifier = ">=1.40.60" },
+ { name = "moto" },
+]
[[package]]
name = "aws-lambda-powertools"
@@ -1960,6 +1975,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/51/da/545b75d420bb23b5d494b0517757b351963e974e79933f01e05c929f20a6/starlette-0.49.1-py3-none-any.whl", hash = "sha256:d92ce9f07e4a3caa3ac13a79523bd18e3bc0042bb8ff2d759a8e7dd0e1859875", size = 74175, upload-time = "2025-10-28T17:34:09.13Z" },
]
+[[package]]
+name = "structlog"
+version = "25.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ef/52/9ba0f43b686e7f3ddfeaa78ac3af750292662284b3661e91ad5494f21dbc/structlog-25.5.0.tar.gz", hash = "sha256:098522a3bebed9153d4570c6d0288abf80a031dfdb2048d59a49e9dc2190fc98", size = 1460830, upload-time = "2025-10-27T08:28:23.028Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a8/45/a132b9074aa18e799b891b91ad72133c98d8042c70f6240e4c5f9dabee2f/structlog-25.5.0-py3-none-any.whl", hash = "sha256:a8453e9b9e636ec59bd9e79bbd4a72f025981b3ba0f5837aebf48f02f37a7f9f", size = 72510, upload-time = "2025-10-27T08:28:21.535Z" },
+]
+
[[package]]
name = "sympy"
version = "1.14.0"