From 5639642d4c3924edf12a9155d7b87a75c90ec44d Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Tue, 19 Aug 2025 11:11:40 -0400 Subject: [PATCH 01/23] Applying jsonata --- cdisc_rules_engine/enums/rule_types.py | 1 + cdisc_rules_engine/rules_engine.py | 5 +++++ .../data_readers/data_reader_factory.py | 4 ++-- .../{usdm_json_reader.py => json_reader.py} | 2 +- .../utilities/jsonata_processor.py | 18 ++++++++++++++++++ pyproject.toml | 1 + requirements.txt | 1 + 7 files changed, 29 insertions(+), 3 deletions(-) rename cdisc_rules_engine/services/data_readers/{usdm_json_reader.py => json_reader.py} (85%) create mode 100644 cdisc_rules_engine/utilities/jsonata_processor.py diff --git a/cdisc_rules_engine/enums/rule_types.py b/cdisc_rules_engine/enums/rule_types.py index 1c50193af..0680659d5 100644 --- a/cdisc_rules_engine/enums/rule_types.py +++ b/cdisc_rules_engine/enums/rule_types.py @@ -11,6 +11,7 @@ class RuleTypes(BaseEnum): DEFINE_ITEM_GROUP_METADATA_CHECK = "Define Item Group Metadata Check" DEFINE_ITEM_METADATA_CHECK = "Define Item Metadata Check" DOMAIN_PRESENCE_CHECK = "Domain Presence Check" + JSONATA = "JSONata" VALUE_LEVEL_METADATA_CHECK_AGAINST_DEFINE = ( "Value Level Metadata Check against Define XML" ) diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py index 291bcf6c7..d3b2e628e 100644 --- a/cdisc_rules_engine/rules_engine.py +++ b/cdisc_rules_engine/rules_engine.py @@ -36,6 +36,7 @@ from cdisc_rules_engine.services.define_xml.define_xml_reader_factory import ( DefineXMLReaderFactory, ) +from cdisc_rules_engine.utilities.jsonata_processor import JSONataProcessor from cdisc_rules_engine.utilities.data_processor import DataProcessor from cdisc_rules_engine.utilities.dataset_preprocessor import DatasetPreprocessor from cdisc_rules_engine.utilities.rule_processor import RuleProcessor @@ -283,6 +284,10 @@ def validate_rule( return self.execute_rule( rule_copy, dataset, datasets, dataset_metadata, **kwargs ) + elif rule.get("rule_type") == RuleTypes.JSONATA.value: + return JSONataProcessor.execute_jsonata_rule( + rule, dataset, datasets, dataset_metadata, **kwargs + ) kwargs["ct_packages"] = list(self.ct_packages) diff --git a/cdisc_rules_engine/services/data_readers/data_reader_factory.py b/cdisc_rules_engine/services/data_readers/data_reader_factory.py index 66d177770..5fb718975 100644 --- a/cdisc_rules_engine/services/data_readers/data_reader_factory.py +++ b/cdisc_rules_engine/services/data_readers/data_reader_factory.py @@ -12,7 +12,7 @@ DatasetNDJSONReader, ) from cdisc_rules_engine.services.data_readers.parquet_reader import ParquetReader -from cdisc_rules_engine.services.data_readers.usdm_json_reader import USDMJSONReader +from cdisc_rules_engine.services.data_readers.json_reader import JSONReader from cdisc_rules_engine.enums.dataformat_types import DataFormatTypes from cdisc_rules_engine.models.dataset import PandasDataset @@ -23,7 +23,7 @@ class DataReaderFactory(FactoryInterface): DataFormatTypes.PARQUET.value: ParquetReader, DataFormatTypes.JSON.value: DatasetJSONReader, DataFormatTypes.NDJSON.value: DatasetNDJSONReader, - DataFormatTypes.USDM.value: USDMJSONReader, + DataFormatTypes.USDM.value: JSONReader, } def __init__(self, service_name: str = None, dataset_implementation=PandasDataset): diff --git a/cdisc_rules_engine/services/data_readers/usdm_json_reader.py b/cdisc_rules_engine/services/data_readers/json_reader.py similarity index 85% rename from cdisc_rules_engine/services/data_readers/usdm_json_reader.py rename to cdisc_rules_engine/services/data_readers/json_reader.py index e4ebf4f75..9554c90ab 100644 --- a/cdisc_rules_engine/services/data_readers/usdm_json_reader.py +++ b/cdisc_rules_engine/services/data_readers/json_reader.py @@ -4,7 +4,7 @@ ) -class USDMJSONReader(DataReaderInterface): +class JSONReader(DataReaderInterface): def from_file(self, file_path): with open(file_path) as fp: json = load(fp) diff --git a/cdisc_rules_engine/utilities/jsonata_processor.py b/cdisc_rules_engine/utilities/jsonata_processor.py new file mode 100644 index 000000000..6e1d46ed4 --- /dev/null +++ b/cdisc_rules_engine/utilities/jsonata_processor.py @@ -0,0 +1,18 @@ +from jsonata import Jsonata + +from cdisc_rules_engine.models.rule_conditions.condition_composite import ( + ConditionComposite, +) +from cdisc_rules_engine.models.rule_conditions.single_condition import SingleCondition + + +class JSONataProcessor: + + @staticmethod + def execute_jsonata_rule(rule, dataset, datasets, dataset_metadata, **kwargs): + conditions: ConditionComposite = rule.get("conditions") + condition: SingleCondition = conditions.get_conditions().get("all", [])[0] + check = condition.get_conditions().get("operator") + expr = Jsonata(check) + result = expr.evaluate(dataset) + return result diff --git a/pyproject.toml b/pyproject.toml index 7b122c9ca..e973e4f0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "business_rules_enhanced==1.4.8", "cdisc-library-client==0.1.6", "importlib-metadata==8.5.0", + "jsonata-python==0.6.0", "jsonpath-ng==1.6.1", "jsonschema==4.18.5", "numpy~=1.23.2", diff --git a/requirements.txt b/requirements.txt index 54a15b968..dfbecad9f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ cdisc-library-client==0.1.6 click==8.1.7 flake8==6.1.0 importlib-metadata==8.5.0 +jsonata-python==0.6.0 jsonpath-ng==1.6.1 jsonschema==4.18.5 numpy~=1.26.0 From edd1255d97839eedecaf5adf211684aeee7620e7 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Tue, 19 Aug 2025 12:17:49 -0400 Subject: [PATCH 02/23] Need the builder --- .../dataset_builders/dataset_builder_factory.py | 4 ++++ .../dataset_builders/jsonata_dataset_builder.py | 10 ++++++++++ 2 files changed, 14 insertions(+) create mode 100644 cdisc_rules_engine/dataset_builders/jsonata_dataset_builder.py diff --git a/cdisc_rules_engine/dataset_builders/dataset_builder_factory.py b/cdisc_rules_engine/dataset_builders/dataset_builder_factory.py index 2831d26fe..84bea6f9b 100644 --- a/cdisc_rules_engine/dataset_builders/dataset_builder_factory.py +++ b/cdisc_rules_engine/dataset_builders/dataset_builder_factory.py @@ -1,6 +1,9 @@ # flake8: noqa from typing import Type +from cdisc_rules_engine.dataset_builders.jsonata_dataset_builder import ( + JSONataDatasetBuilder, +) from cdisc_rules_engine.interfaces import FactoryInterface from cdisc_rules_engine.dataset_builders.contents_dataset_builder import ( ContentsDatasetBuilder, @@ -73,6 +76,7 @@ class DatasetBuilderFactory(FactoryInterface): RuleTypes.VARIABLE_METADATA_CHECK_AGAINST_DEFINE_XML_AND_LIBRARY.value: VariablesMetadataWithDefineAndLibraryDatasetBuilder, RuleTypes.VALUE_CHECK_WITH_DATASET_METADATA.value: ValueCheckDatasetMetadataDatasetBuilder, RuleTypes.VALUE_CHECK_WITH_VARIABLE_METADATA.value: ValueCheckVariableMetadataDatasetBuilder, + RuleTypes.JSONATA.value: JSONataDatasetBuilder, } @classmethod diff --git a/cdisc_rules_engine/dataset_builders/jsonata_dataset_builder.py b/cdisc_rules_engine/dataset_builders/jsonata_dataset_builder.py new file mode 100644 index 000000000..b7eb49cd2 --- /dev/null +++ b/cdisc_rules_engine/dataset_builders/jsonata_dataset_builder.py @@ -0,0 +1,10 @@ +from json import load +from cdisc_rules_engine.dataset_builders.base_dataset_builder import BaseDatasetBuilder + + +class JSONataDatasetBuilder(BaseDatasetBuilder): + + def get_dataset(self, **kwargs): + with self.data_service.read_data(self.data_service.dataset_path) as fp: + json = load(fp) + return json From 7a401b4d0a84299c88dab6b3389f873dfb3a929f Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Thu, 21 Aug 2025 10:13:29 -0400 Subject: [PATCH 03/23] reporting works --- .../utilities/jsonata_processor.py | 37 ++++++++++++++++++- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/cdisc_rules_engine/utilities/jsonata_processor.py b/cdisc_rules_engine/utilities/jsonata_processor.py index 6e1d46ed4..42803cd6d 100644 --- a/cdisc_rules_engine/utilities/jsonata_processor.py +++ b/cdisc_rules_engine/utilities/jsonata_processor.py @@ -1,5 +1,12 @@ from jsonata import Jsonata +from cdisc_rules_engine.enums.execution_status import ExecutionStatus +from cdisc_rules_engine.models.validation_error_container import ( + ValidationErrorContainer, +) +from cdisc_rules_engine.models.validation_error_entity import ( + ValidationErrorEntity, +) from cdisc_rules_engine.models.rule_conditions.condition_composite import ( ConditionComposite, ) @@ -14,5 +21,31 @@ def execute_jsonata_rule(rule, dataset, datasets, dataset_metadata, **kwargs): condition: SingleCondition = conditions.get_conditions().get("all", [])[0] check = condition.get_conditions().get("operator") expr = Jsonata(check) - result = expr.evaluate(dataset) - return result + results = expr.evaluate(dataset) + errors = ( + [ + ValidationErrorEntity( + value=result, + dataset=dataset_metadata.name, + row=result.get("path"), + usubjid=result.get("id"), + sequence=result.get("iid"), + ) + for result in results + ] + if results + else [] + ) + validation_error_container = ValidationErrorContainer( + dataset=dataset_metadata.name, + domain=dataset_metadata.domain, + targets=rule.get("output_variables"), + errors=errors, + message=rule.get("message"), + status=( + ExecutionStatus.SUCCESS.value + if results + else ExecutionStatus.EXECUTION_ERROR.value + ), + ) + return [validation_error_container.to_representation()] From 3eef17c743b4b9cfa2a4f409e6ebad229140615d Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Fri, 22 Aug 2025 16:39:03 -0400 Subject: [PATCH 04/23] jsonata can now be direct value of the Check property --- cdisc_rules_engine/models/rule.py | 4 +++- .../models/rule_conditions/condition_composite_factory.py | 6 +++++- cdisc_rules_engine/utilities/jsonata_processor.py | 8 +------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cdisc_rules_engine/models/rule.py b/cdisc_rules_engine/models/rule.py index 7e7e3fa5d..03c4f75e1 100644 --- a/cdisc_rules_engine/models/rule.py +++ b/cdisc_rules_engine/models/rule.py @@ -98,9 +98,11 @@ def is_cdisc_rule_metadata(cls, rule_metadata: dict) -> bool: return "Core" in rule_metadata @classmethod - def parse_conditions(cls, conditions: dict) -> dict: + def parse_conditions(cls, conditions: dict | str) -> dict | str: if not conditions: raise ValueError("No check data provided") + if isinstance(conditions, str): + return conditions all_conditions = conditions.get("all") any_conditions = conditions.get("any") not_condition = conditions.get("not") diff --git a/cdisc_rules_engine/models/rule_conditions/condition_composite_factory.py b/cdisc_rules_engine/models/rule_conditions/condition_composite_factory.py index 1b9a46ae7..60880941c 100644 --- a/cdisc_rules_engine/models/rule_conditions/condition_composite_factory.py +++ b/cdisc_rules_engine/models/rule_conditions/condition_composite_factory.py @@ -14,7 +14,11 @@ class ConditionCompositeFactory: """ @classmethod - def get_condition_composite(cls, conditions: dict) -> ConditionInterface: + def get_condition_composite( + cls, conditions: dict | str + ) -> ConditionInterface | str: + if isinstance(conditions, str): + return conditions composite = ConditionComposite() for key, condition_list in conditions.items(): # validate the rule structure diff --git a/cdisc_rules_engine/utilities/jsonata_processor.py b/cdisc_rules_engine/utilities/jsonata_processor.py index 42803cd6d..c6d0c4325 100644 --- a/cdisc_rules_engine/utilities/jsonata_processor.py +++ b/cdisc_rules_engine/utilities/jsonata_processor.py @@ -7,19 +7,13 @@ from cdisc_rules_engine.models.validation_error_entity import ( ValidationErrorEntity, ) -from cdisc_rules_engine.models.rule_conditions.condition_composite import ( - ConditionComposite, -) -from cdisc_rules_engine.models.rule_conditions.single_condition import SingleCondition class JSONataProcessor: @staticmethod def execute_jsonata_rule(rule, dataset, datasets, dataset_metadata, **kwargs): - conditions: ConditionComposite = rule.get("conditions") - condition: SingleCondition = conditions.get_conditions().get("all", [])[0] - check = condition.get_conditions().get("operator") + check = rule.get("conditions") expr = Jsonata(check) results = expr.evaluate(dataset) errors = ( From a52cf9fb629fa7669a26728a83e348f2eea710c0 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Tue, 26 Aug 2025 14:08:02 -0400 Subject: [PATCH 05/23] Ability to load custom functions --- .pre-commit-config.yaml | 2 +- README.md | 1 + cdisc_rules_engine/models/validation_args.py | 1 + cdisc_rules_engine/rules_engine.py | 3 ++- .../utilities/jsonata_processor.py | 22 +++++++++++++++++-- core.py | 12 ++++++++++ scripts/run_validation.py | 1 + 7 files changed, 38 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d7d98fa12..fb6213a38 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,7 +5,7 @@ repos: - id: black language_version: python3 - repo: https://github.com/pycqa/flake8 - rev: 5.0.4 + rev: 6.1.0 hooks: - id: flake8 language_version: python3 diff --git a/README.md b/README.md index 020b1f2ee..77e4e33e4 100644 --- a/README.md +++ b/README.md @@ -180,6 +180,7 @@ Run `python core.py validate --help` to see the list of validation options. progress. By default a progress bar like "[████████████████████████████--------] 78%"is printed. + -jfp, --jsonata-functions-path Path to directory containing a set of custom JSONata functions. --help Show this message and exit. ``` diff --git a/cdisc_rules_engine/models/validation_args.py b/cdisc_rules_engine/models/validation_args.py index 4d8846c9c..09f74c5a7 100644 --- a/cdisc_rules_engine/models/validation_args.py +++ b/cdisc_rules_engine/models/validation_args.py @@ -23,5 +23,6 @@ "progress", "define_xml_path", "validate_xml", + "jsonata_functions_path", ], ) diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py index 6a34ac7f4..b56fc1eb9 100644 --- a/cdisc_rules_engine/rules_engine.py +++ b/cdisc_rules_engine/rules_engine.py @@ -93,6 +93,7 @@ def __init__( self.external_dictionaries = external_dictionaries self.define_xml_path: str = kwargs.get("define_xml_path") self.validate_xml: bool = kwargs.get("validate_xml") + self.jsonata_functions_path: str = kwargs.get("jsonata_functions_path") def get_schema(self): return export_rule_data(DatasetVariable, COREActions) @@ -286,7 +287,7 @@ def validate_rule( ) elif rule.get("rule_type") == RuleTypes.JSONATA.value: return JSONataProcessor.execute_jsonata_rule( - rule, dataset, datasets, dataset_metadata, **kwargs + rule, dataset, dataset_metadata, self.jsonata_functions_path ) kwargs["ct_packages"] = list(self.ct_packages) diff --git a/cdisc_rules_engine/utilities/jsonata_processor.py b/cdisc_rules_engine/utilities/jsonata_processor.py index c6d0c4325..550c67549 100644 --- a/cdisc_rules_engine/utilities/jsonata_processor.py +++ b/cdisc_rules_engine/utilities/jsonata_processor.py @@ -1,3 +1,5 @@ +from functools import cache +from glob import glob from jsonata import Jsonata from cdisc_rules_engine.enums.execution_status import ExecutionStatus @@ -12,9 +14,11 @@ class JSONataProcessor: @staticmethod - def execute_jsonata_rule(rule, dataset, datasets, dataset_metadata, **kwargs): + def execute_jsonata_rule(rule, dataset, dataset_metadata, jsonata_functions_path): + custom_functions = JSONataProcessor.get_custom_functions(jsonata_functions_path) check = rule.get("conditions") - expr = Jsonata(check) + full_string = f"(\n{custom_functions}{check}\n)" + expr = Jsonata(full_string) results = expr.evaluate(dataset) errors = ( [ @@ -43,3 +47,17 @@ def execute_jsonata_rule(rule, dataset, datasets, dataset_metadata, **kwargs): ), ) return [validation_error_container.to_representation()] + + @staticmethod + @cache + def get_custom_functions(jsonata_functions_path): + if not jsonata_functions_path: + return "" + functions = [] + for filepath in glob(f"{jsonata_functions_path}/*.jsonata"): + with open(filepath, "r") as file: + function_definition = file.read() + function_definition = function_definition.replace("{", "", 1) + function_definition = "".join(function_definition.rsplit("}", 1)) + functions.append(function_definition) + return f"$utils:={{\n{',\n'.join(functions)}\n}};\n" diff --git a/core.py b/core.py index 250561c00..147b24a9e 100644 --- a/core.py +++ b/core.py @@ -204,6 +204,13 @@ def cli(): default="y", help="Enable XML validation (default 'y' to enable, otherwise disable)", ) +@click.option( + "-jfp", + "--jsonata-functions-path", + required=False, + type=click.Path(exists=True, file_okay=False, readable=True, resolve_path=True), + help="Path to directory containing a set of custom JSONata functions.", +) @click.pass_context def validate( ctx, @@ -235,6 +242,7 @@ def validate( progress: str, define_xml_path: str, validate_xml: str, + jsonata_functions_path: str, ): """ Validate data using CDISC Rules Engine @@ -321,6 +329,7 @@ def validate( progress, define_xml_path, validate_xml_bool, + jsonata_functions_path, ) ) @@ -667,6 +676,7 @@ def test_validate(): define_xml_path = None validate_xml = False json_output = os.path.join(temp_dir, "json_validation_output") + jsonata_functions_path = None run_validation( Validation_args( cache_path, @@ -689,6 +699,7 @@ def test_validate(): progress, define_xml_path, validate_xml, + jsonata_functions_path, ) ) print("JSON validation completed successfully!") @@ -715,6 +726,7 @@ def test_validate(): progress, define_xml_path, validate_xml, + jsonata_functions_path, ) ) print("XPT validation completed successfully!") diff --git a/scripts/run_validation.py b/scripts/run_validation.py index ee6e47477..ec43c3bef 100644 --- a/scripts/run_validation.py +++ b/scripts/run_validation.py @@ -89,6 +89,7 @@ def validate_single_rule( max_dataset_size=max_dataset_size, dataset_paths=args.dataset_paths, validate_xml=args.validate_xml, + jsonata_functions_path=args.jsonata_functions_path, ) results = engine.validate_single_rule(rule, datasets) results = list(itertools.chain(*results.values())) From 3d1439f05692279ba93c0bb7c615aafb674e9027 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Tue, 26 Aug 2025 14:48:36 -0400 Subject: [PATCH 06/23] fix unit test missing arg --- tests/unit/test_services/test_data_service/test_data_service.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test_services/test_data_service/test_data_service.py b/tests/unit/test_services/test_data_service/test_data_service.py index 7b8077d68..1e4c4570e 100644 --- a/tests/unit/test_services/test_data_service/test_data_service.py +++ b/tests/unit/test_services/test_data_service/test_data_service.py @@ -279,6 +279,7 @@ def test_get_dataset_class_associated_domains(): "", None, False, + None, ) ) data_service = LocalDataService( From ed8da7a100d011621b1961b2a1918676c647b4d2 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Tue, 26 Aug 2025 14:49:25 -0400 Subject: [PATCH 07/23] Fix unit test missing arg --- tests/unit/test_services/test_data_service/test_data_service.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test_services/test_data_service/test_data_service.py b/tests/unit/test_services/test_data_service/test_data_service.py index 1e4c4570e..fb3634eee 100644 --- a/tests/unit/test_services/test_data_service/test_data_service.py +++ b/tests/unit/test_services/test_data_service/test_data_service.py @@ -202,6 +202,7 @@ def test_get_dataset_class(dataset_metadata, data, expected_class): "", None, False, + None, ) ) data_service = LocalDataService( From 20fbfe1743d0ece2615a2f5a4fb063c2778a98a2 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Wed, 3 Sep 2025 15:43:28 -0400 Subject: [PATCH 08/23] jsonata unit test --- .../jsonata_dataset_builder.py | 11 ++- cdisc_rules_engine/models/rule.py | 12 +++ .../utilities/jsonata_processor.py | 10 ++- .../test_utilities/test_jsonata_processor.py | 78 +++++++++++++++++++ 4 files changed, 108 insertions(+), 3 deletions(-) create mode 100644 tests/unit/test_utilities/test_jsonata_processor.py diff --git a/cdisc_rules_engine/dataset_builders/jsonata_dataset_builder.py b/cdisc_rules_engine/dataset_builders/jsonata_dataset_builder.py index b7eb49cd2..04d9fd65f 100644 --- a/cdisc_rules_engine/dataset_builders/jsonata_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/jsonata_dataset_builder.py @@ -5,6 +5,15 @@ class JSONataDatasetBuilder(BaseDatasetBuilder): def get_dataset(self, **kwargs): - with self.data_service.read_data(self.data_service.dataset_path) as fp: + if hasattr(self.data_service, "dataset_path"): + dataset_path = self.data_service.dataset_path + elif ( + hasattr(self.data_service, "dataset_paths") + and len(self.data_service.dataset_paths) == 1 + ): + dataset_path = self.data_service.dataset_paths[0] + else: + return None + with self.data_service.read_data(dataset_path) as fp: json = load(fp) return json diff --git a/cdisc_rules_engine/models/rule.py b/cdisc_rules_engine/models/rule.py index 466898614..0140c8151 100644 --- a/cdisc_rules_engine/models/rule.py +++ b/cdisc_rules_engine/models/rule.py @@ -37,6 +37,7 @@ def __init__(self, record_params: dict): @classmethod def from_cdisc_metadata(cls, rule_metadata: dict) -> dict: if cls.is_cdisc_rule_metadata(rule_metadata): + rule_metadata = cls.spaces_to_underscores(rule_metadata) authorities = rule_metadata.get("Authorities", []) executable_rule = { "core_id": rule_metadata.get("Core", {}).get("Id"), @@ -72,6 +73,17 @@ def from_cdisc_metadata(cls, rule_metadata: dict) -> dict: else: return rule_metadata + @classmethod + def spaces_to_underscores(cls, obj): + if isinstance(obj, dict): + return { + key.replace(" ", "_"): cls.spaces_to_underscores(value) + for key, value in obj.items() + } + if isinstance(obj, list): + return [cls.spaces_to_underscores(item) for item in obj] + return obj + @classmethod def parse_standards(cls, authorities: List[dict]) -> List[dict]: standards = [] diff --git a/cdisc_rules_engine/utilities/jsonata_processor.py b/cdisc_rules_engine/utilities/jsonata_processor.py index 550c67549..6a1cbe096 100644 --- a/cdisc_rules_engine/utilities/jsonata_processor.py +++ b/cdisc_rules_engine/utilities/jsonata_processor.py @@ -3,6 +3,7 @@ from jsonata import Jsonata from cdisc_rules_engine.enums.execution_status import ExecutionStatus +from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata from cdisc_rules_engine.models.validation_error_container import ( ValidationErrorContainer, ) @@ -14,7 +15,12 @@ class JSONataProcessor: @staticmethod - def execute_jsonata_rule(rule, dataset, dataset_metadata, jsonata_functions_path): + def execute_jsonata_rule( + rule: dict, + dataset: dict, + dataset_metadata: SDTMDatasetMetadata, + jsonata_functions_path: str, + ): custom_functions = JSONataProcessor.get_custom_functions(jsonata_functions_path) check = rule.get("conditions") full_string = f"(\n{custom_functions}{check}\n)" @@ -60,4 +66,4 @@ def get_custom_functions(jsonata_functions_path): function_definition = function_definition.replace("{", "", 1) function_definition = "".join(function_definition.rsplit("}", 1)) functions.append(function_definition) - return f"$utils:={{\n{',\n'.join(functions)}\n}};\n" + return f"$utils:={{\n{f',\n'.join(functions)}\n}};\n" diff --git a/tests/unit/test_utilities/test_jsonata_processor.py b/tests/unit/test_utilities/test_jsonata_processor.py new file mode 100644 index 000000000..150e5dabb --- /dev/null +++ b/tests/unit/test_utilities/test_jsonata_processor.py @@ -0,0 +1,78 @@ +from unittest.mock import MagicMock, patch +from yaml import safe_load +from cdisc_rules_engine.models.rule import Rule +from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata +from cdisc_rules_engine.utilities.jsonata_processor import JSONataProcessor + + +@patch( + "cdisc_rules_engine.utilities.jsonata_processor.JSONataProcessor.get_custom_functions" +) +def test_jsonata_processor(mock_get_custom_functions: MagicMock): + rule = """ + Check: | + **.$filter($, $utils.equals).{"path":path, "A":A, "B":B} + Core: + Id: JSONATA Test + Status: Draft + Outcome: + Message: "A equals B" + Output Variables: + - id + - name + - path + - A + - B + Rule Type: JSONata + Scope: + Entities: + Include: + - ALL + Sensitivity: Record + """ + mock_get_custom_functions.return_value = """ + $utils:={ + "equals": function($v){ $v.A=$v.B } + }; + """ + dataset = { + "path": "", + "A": "same value 1", + "B": "same value 1", + "C": { + "path": "C", + "A": "different value 1", + "B": "different value 2", + "C": {"path": "C.C", "A": "same value 2", "B": "same value 2"}, + }, + } + expected = [ + { + "executionStatus": "success", + "dataset": "", + "domain": None, + "variables": ["A", "B", "id", "name", "path"], + "message": None, + "errors": [ + { + "value": {"path": "", "A": "same value 1", "B": "same value 1"}, + "dataset": "", + "row": "", + }, + { + "value": {"path": "C.C", "A": "same value 2", "B": "same value 2"}, + "dataset": "", + "row": "C.C", + }, + ], + } + ] + rule = Rule.from_cdisc_metadata(safe_load(rule)) + result = JSONataProcessor.execute_jsonata_rule( + rule=rule, + dataset=dataset, + dataset_metadata=SDTMDatasetMetadata(), + jsonata_functions_path="", + ) + + assert result == expected From fe20f975a98c49c1bff6d241a029a087df05f0e7 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Thu, 4 Sep 2025 13:41:48 -0400 Subject: [PATCH 09/23] Added docs and fixed report message --- .../utilities/jsonata_processor.py | 7 +- resources/schema/CORE-base.json | 3 + resources/schema/Rule_Type.json | 5 +- resources/schema/Rule_Type.md | 91 +++++++++++++++++++ .../test_utilities/test_jsonata_processor.py | 2 +- 5 files changed, 104 insertions(+), 4 deletions(-) diff --git a/cdisc_rules_engine/utilities/jsonata_processor.py b/cdisc_rules_engine/utilities/jsonata_processor.py index 6a1cbe096..f887c2b0d 100644 --- a/cdisc_rules_engine/utilities/jsonata_processor.py +++ b/cdisc_rules_engine/utilities/jsonata_processor.py @@ -45,7 +45,9 @@ def execute_jsonata_rule( domain=dataset_metadata.domain, targets=rule.get("output_variables"), errors=errors, - message=rule.get("message"), + message=next(iter(rule.get("actions", [])), {}) + .get("params", {}) + .get("message"), status=( ExecutionStatus.SUCCESS.value if results @@ -66,4 +68,5 @@ def get_custom_functions(jsonata_functions_path): function_definition = function_definition.replace("{", "", 1) function_definition = "".join(function_definition.rsplit("}", 1)) functions.append(function_definition) - return f"$utils:={{\n{f',\n'.join(functions)}\n}};\n" + functions_str = ",\n".join(functions) + return f"$utils:={{\n{functions_str}\n}};\n" diff --git a/resources/schema/CORE-base.json b/resources/schema/CORE-base.json index 6bcbc9d53..dae4092a7 100644 --- a/resources/schema/CORE-base.json +++ b/resources/schema/CORE-base.json @@ -41,6 +41,9 @@ }, { "$ref": "Operator.json" + }, + { + "type": "string" } ] }, diff --git a/resources/schema/Rule_Type.json b/resources/schema/Rule_Type.json index e8f98a2a6..ca1270b90 100644 --- a/resources/schema/Rule_Type.json +++ b/resources/schema/Rule_Type.json @@ -34,6 +34,10 @@ "const": "Domain Presence Check", "title": "Content domain presence at study level" }, + { + "const": "JSONata", + "title": "Apply a JSONata query to a JSON file" + }, { "const": "Record Data", "title": "Content data at record level. Most common Rule Type" @@ -50,7 +54,6 @@ "const": "Value Level Metadata Check against Define XML", "title": "Content data at record level and define xml metadata at value level" }, - { "const": "Variable Metadata Check", "title": "Content metadata at variable level" diff --git a/resources/schema/Rule_Type.md b/resources/schema/Rule_Type.md index 5cc64dd10..710642b25 100644 --- a/resources/schema/Rule_Type.md +++ b/resources/schema/Rule_Type.md @@ -179,6 +179,97 @@ all: operator: not_exists ``` +## JSONata + +Apply a JSONata query to a JSON file. [JSONata documentation](https://docs.jsonata.org) + +### Example + +#### Rule + +```yaml +Check: | + **.$filter($, $utils.equals).{"path":path, "A":A, "B":B} +Core: + Id: JSONATA Test +Status: Draft +Outcome: + Message: "A equals B" + Output Variables: + - id + - name + - path + - A + - B +Rule Type: JSONata +Scope: + Entities: + Include: + - ALL +Sensitivity: Record +``` + +#### Custom user function contained in external file "equals.jsonata" + +\* Note that in the CLI, you can pass a directory of such files using `-jfp` or `--jsonata-functions-path` + +```yaml +{ + "equals": function($v){ $v.A=$v.B } +} +``` + +#### JSON Data + +```json +{ + "path": "", + "A": "same value 1", + "B": "same value 1", + "C": { + "path": "C", + "A": "different value 1", + "B": "different value 2", + "C": { "path": "C.C", "A": "same value 2", "B": "same value 2" } + } +} +``` + +#### Result + +```json +[ + { + "executionStatus": "success", + "dataset": "", + "domain": "", + "variables": ["A", "B", "id", "name", "path"], + "message": "A equals B", + "errors": [ + { + "value": { "path": "", "A": "same value 1", "B": "same value 1" }, + "dataset": "", + "row": "" + }, + { + "value": { "path": "C.C", "A": "same value 2", "B": "same value 2" }, + "dataset": "", + "row": "C.C" + } + ] + } +] +``` + +### Output Variables and Report column mapping + +You can use `Outcome.Output Variables` to specify which properties to display from the result JSON. The following result property names will map to the column names in the Excel output report. + +Result property name -> Report Issue Details Column Name: + +- `id` -> `USUBJID` +- `path` -> `Record` + ## Record Data #### Columns diff --git a/tests/unit/test_utilities/test_jsonata_processor.py b/tests/unit/test_utilities/test_jsonata_processor.py index 150e5dabb..5955107dd 100644 --- a/tests/unit/test_utilities/test_jsonata_processor.py +++ b/tests/unit/test_utilities/test_jsonata_processor.py @@ -52,7 +52,7 @@ def test_jsonata_processor(mock_get_custom_functions: MagicMock): "dataset": "", "domain": None, "variables": ["A", "B", "id", "name", "path"], - "message": None, + "message": "A equals B", "errors": [ { "value": {"path": "", "A": "same value 1", "B": "same value 1"}, From fddb998d6af23196c2ca14f36d5d453e3adfcfaa Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Mon, 15 Sep 2025 16:33:19 -0400 Subject: [PATCH 10/23] process rule once for jsonata --- cdisc_rules_engine/rules_engine.py | 23 ++++++--- .../utilities/jsonata_processor.py | 51 +++++++++---------- .../test_utilities/test_jsonata_processor.py | 2 - 3 files changed, 40 insertions(+), 36 deletions(-) diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py index b56fc1eb9..adfa032eb 100644 --- a/cdisc_rules_engine/rules_engine.py +++ b/cdisc_rules_engine/rules_engine.py @@ -103,16 +103,23 @@ def validate_single_rule(self, rule: dict, datasets: Iterable[SDTMDatasetMetadat rule["conditions"] = ConditionCompositeFactory.get_condition_composite( rule["conditions"] ) - for dataset_metadata in datasets: - if dataset_metadata.unsplit_name in results and "domains" in rule: - include_split = rule["domains"].get("include_split_datasets", False) - if not include_split: - continue # handling split datasets - results[dataset_metadata.unsplit_name] = self.validate_single_dataset( + if rule.get("rule_type") == RuleTypes.JSONATA.value: + results["json"] = self.validate_single_dataset( rule, datasets, - dataset_metadata, + SDTMDatasetMetadata(name="json"), ) + else: + for dataset_metadata in datasets: + if dataset_metadata.unsplit_name in results and "domains" in rule: + include_split = rule["domains"].get("include_split_datasets", False) + if not include_split: + continue # handling split datasets + results[dataset_metadata.unsplit_name] = self.validate_single_dataset( + rule, + datasets, + dataset_metadata, + ) return results def validate_single_dataset( @@ -287,7 +294,7 @@ def validate_rule( ) elif rule.get("rule_type") == RuleTypes.JSONATA.value: return JSONataProcessor.execute_jsonata_rule( - rule, dataset, dataset_metadata, self.jsonata_functions_path + rule, dataset, self.jsonata_functions_path ) kwargs["ct_packages"] = list(self.ct_packages) diff --git a/cdisc_rules_engine/utilities/jsonata_processor.py b/cdisc_rules_engine/utilities/jsonata_processor.py index f887c2b0d..f9cbeeee4 100644 --- a/cdisc_rules_engine/utilities/jsonata_processor.py +++ b/cdisc_rules_engine/utilities/jsonata_processor.py @@ -1,9 +1,9 @@ +from collections import defaultdict from functools import cache from glob import glob from jsonata import Jsonata from cdisc_rules_engine.enums.execution_status import ExecutionStatus -from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata from cdisc_rules_engine.models.validation_error_container import ( ValidationErrorContainer, ) @@ -18,7 +18,6 @@ class JSONataProcessor: def execute_jsonata_rule( rule: dict, dataset: dict, - dataset_metadata: SDTMDatasetMetadata, jsonata_functions_path: str, ): custom_functions = JSONataProcessor.get_custom_functions(jsonata_functions_path) @@ -26,35 +25,35 @@ def execute_jsonata_rule( full_string = f"(\n{custom_functions}{check}\n)" expr = Jsonata(full_string) results = expr.evaluate(dataset) - errors = ( - [ - ValidationErrorEntity( + errors = defaultdict(list) + if results: + for result in results: + error_entity = ValidationErrorEntity( value=result, - dataset=dataset_metadata.name, + dataset=result.get("dataset") or "", row=result.get("path"), usubjid=result.get("id"), sequence=result.get("iid"), ) - for result in results - ] - if results - else [] - ) - validation_error_container = ValidationErrorContainer( - dataset=dataset_metadata.name, - domain=dataset_metadata.domain, - targets=rule.get("output_variables"), - errors=errors, - message=next(iter(rule.get("actions", [])), {}) - .get("params", {}) - .get("message"), - status=( - ExecutionStatus.SUCCESS.value - if results - else ExecutionStatus.EXECUTION_ERROR.value - ), - ) - return [validation_error_container.to_representation()] + errors[result.get("dataset")].append(error_entity) + validation_error_container = [ + ValidationErrorContainer( + dataset=dataset, + domain=dataset, + targets=rule.get("output_variables"), + errors=error, + message=next(iter(rule.get("actions", [])), {}) + .get("params", {}) + .get("message"), + status=( + ExecutionStatus.SUCCESS.value + if results + else ExecutionStatus.EXECUTION_ERROR.value + ), + ).to_representation() + for dataset, error in errors.items() + ] + return validation_error_container @staticmethod @cache diff --git a/tests/unit/test_utilities/test_jsonata_processor.py b/tests/unit/test_utilities/test_jsonata_processor.py index 5955107dd..73e98db77 100644 --- a/tests/unit/test_utilities/test_jsonata_processor.py +++ b/tests/unit/test_utilities/test_jsonata_processor.py @@ -1,7 +1,6 @@ from unittest.mock import MagicMock, patch from yaml import safe_load from cdisc_rules_engine.models.rule import Rule -from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata from cdisc_rules_engine.utilities.jsonata_processor import JSONataProcessor @@ -71,7 +70,6 @@ def test_jsonata_processor(mock_get_custom_functions: MagicMock): result = JSONataProcessor.execute_jsonata_rule( rule=rule, dataset=dataset, - dataset_metadata=SDTMDatasetMetadata(), jsonata_functions_path="", ) From 5a063107a3babd9918c5ba70be7575c3667c00bc Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Tue, 16 Sep 2025 11:24:12 -0400 Subject: [PATCH 11/23] Update result property names --- .../utilities/jsonata_processor.py | 6 ++--- resources/schema/Rule_Type.md | 26 ++++++++++++------- .../test_utilities/test_jsonata_processor.py | 18 +++++++------ 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/cdisc_rules_engine/utilities/jsonata_processor.py b/cdisc_rules_engine/utilities/jsonata_processor.py index f9cbeeee4..143adadc3 100644 --- a/cdisc_rules_engine/utilities/jsonata_processor.py +++ b/cdisc_rules_engine/utilities/jsonata_processor.py @@ -31,9 +31,9 @@ def execute_jsonata_rule( error_entity = ValidationErrorEntity( value=result, dataset=result.get("dataset") or "", - row=result.get("path"), - usubjid=result.get("id"), - sequence=result.get("iid"), + row=result.get("record"), + usubjid=result.get("usubjid"), + sequence=result.get("sequence"), ) errors[result.get("dataset")].append(error_entity) validation_error_container = [ diff --git a/resources/schema/Rule_Type.md b/resources/schema/Rule_Type.md index 710642b25..1f1f57ef8 100644 --- a/resources/schema/Rule_Type.md +++ b/resources/schema/Rule_Type.md @@ -189,16 +189,14 @@ Apply a JSONata query to a JSON file. [JSONata documentation](https://docs.jsona ```yaml Check: | - **.$filter($, $utils.equals).{"path":path, "A":A, "B":B} + **.$filter($, $utils.equals).{"record":path, "A":A, "B":B} Core: Id: JSONATA Test Status: Draft Outcome: Message: "A equals B" Output Variables: - - id - - name - - path + - record - A - B Rule Type: JSONata @@ -243,16 +241,20 @@ Sensitivity: Record "executionStatus": "success", "dataset": "", "domain": "", - "variables": ["A", "B", "id", "name", "path"], + "variables": ["A", "B", "record"], "message": "A equals B", "errors": [ { - "value": { "path": "", "A": "same value 1", "B": "same value 1" }, + "value": { "record": "", "A": "same value 1", "B": "same value 1" }, "dataset": "", "row": "" }, { - "value": { "path": "C.C", "A": "same value 2", "B": "same value 2" }, + "value": { + "record": "C.C", + "A": "same value 2", + "B": "same value 2" + }, "dataset": "", "row": "C.C" } @@ -267,8 +269,14 @@ You can use `Outcome.Output Variables` to specify which properties to display fr Result property name -> Report Issue Details Column Name: -- `id` -> `USUBJID` -- `path` -> `Record` +- `dataset` -> `Dataset` +- `usubjid` -> `USUBJID` +- `record` -> `Record` +- `sequence` -> `Sequence` + +### Scope + +`Scope` should always `Include` `ALL` to ensure the rule will be run. The rule is only run once for the entire JSON file. The `Dataset` determination must come from the rule's jsonata result property. ## Record Data diff --git a/tests/unit/test_utilities/test_jsonata_processor.py b/tests/unit/test_utilities/test_jsonata_processor.py index 73e98db77..90ae647ac 100644 --- a/tests/unit/test_utilities/test_jsonata_processor.py +++ b/tests/unit/test_utilities/test_jsonata_processor.py @@ -10,16 +10,14 @@ def test_jsonata_processor(mock_get_custom_functions: MagicMock): rule = """ Check: | - **.$filter($, $utils.equals).{"path":path, "A":A, "B":B} + **.$filter($, $utils.equals).{"record":path, "A":A, "B":B} Core: Id: JSONATA Test Status: Draft Outcome: Message: "A equals B" Output Variables: - - id - - name - - path + - record - A - B Rule Type: JSONata @@ -48,18 +46,22 @@ def test_jsonata_processor(mock_get_custom_functions: MagicMock): expected = [ { "executionStatus": "success", - "dataset": "", + "dataset": None, "domain": None, - "variables": ["A", "B", "id", "name", "path"], + "variables": ["A", "B", "record"], "message": "A equals B", "errors": [ { - "value": {"path": "", "A": "same value 1", "B": "same value 1"}, + "value": {"record": "", "A": "same value 1", "B": "same value 1"}, "dataset": "", "row": "", }, { - "value": {"path": "C.C", "A": "same value 2", "B": "same value 2"}, + "value": { + "record": "C.C", + "A": "same value 2", + "B": "same value 2", + }, "dataset": "", "row": "C.C", }, From 84ee8fab42b497bcc8a80e7bb735796ffd3c2cde Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Thu, 18 Sep 2025 16:50:59 -0400 Subject: [PATCH 12/23] fixed json for utf8, added error handling --- .../exceptions/custom_exceptions.py | 5 +++++ cdisc_rules_engine/rules_engine.py | 8 ++++++++ .../services/data_readers/dataset_json_reader.py | 14 ++++++-------- .../data_readers/dataset_ndjson_reader.py | 9 +++++---- .../services/data_readers/json_reader.py | 12 +++++++++--- .../services/data_services/dummy_data_service.py | 16 ++++++++-------- .../services/data_services/usdm_data_service.py | 8 ++++---- .../services/datasetjson_metadata_reader.py | 11 ++++------- .../services/datasetndjson_metadata_reader.py | 7 +++---- 9 files changed, 52 insertions(+), 38 deletions(-) diff --git a/cdisc_rules_engine/exceptions/custom_exceptions.py b/cdisc_rules_engine/exceptions/custom_exceptions.py index 07bc5e3a2..ccb88c236 100644 --- a/cdisc_rules_engine/exceptions/custom_exceptions.py +++ b/cdisc_rules_engine/exceptions/custom_exceptions.py @@ -57,6 +57,11 @@ class InvalidDatasetFormat(EngineError): description = "Dataset data is malformed." +class InvalidJSONFormat(EngineError): + code = 400 + description = "JSON data is malformed." + + class NumberOfAttemptsExceeded(EngineError): pass diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py index adfa032eb..499baa0ab 100644 --- a/cdisc_rules_engine/rules_engine.py +++ b/cdisc_rules_engine/rules_engine.py @@ -10,6 +10,7 @@ from cdisc_rules_engine.exceptions.custom_exceptions import ( DatasetNotFoundError, DomainNotFoundInDefineXMLError, + InvalidJSONFormat, RuleFormatError, VariableMetadataNotFoundError, FailedSchemaValidation, @@ -425,6 +426,13 @@ def handle_validation_exceptions( # noqa message=exception.args[0], ) message = "rule execution error" + elif isinstance(exception, InvalidJSONFormat): + error_obj = FailedValidationEntity( + dataset=os.path.basename(dataset_path), + error=InvalidJSONFormat.description, + message=exception.args[0], + ) + message = "rule execution error" elif isinstance(exception, FailedSchemaValidation): if self.validate_xml: error_obj: ValidationErrorContainer = ValidationErrorContainer( diff --git a/cdisc_rules_engine/services/data_readers/dataset_json_reader.py b/cdisc_rules_engine/services/data_readers/dataset_json_reader.py index 4ea2dc695..937b7bf51 100644 --- a/cdisc_rules_engine/services/data_readers/dataset_json_reader.py +++ b/cdisc_rules_engine/services/data_readers/dataset_json_reader.py @@ -1,7 +1,6 @@ import pandas as pd import dask.dataframe as dd import os -import json import jsonschema from cdisc_rules_engine.interfaces import ( @@ -12,19 +11,18 @@ from cdisc_rules_engine.models.dataset.pandas_dataset import PandasDataset import tempfile +from cdisc_rules_engine.services.data_readers.json_reader import JSONReader + class DatasetJSONReader(DataReaderInterface): def get_schema(self) -> dict: - with open( + schema = JSONReader().from_file( os.path.join("resources", "schema", "dataset.schema.json") - ) as schemajson: - schema = schemajson.read() - return json.loads(schema) + ) + return schema def read_json_file(self, file_path: str) -> dict: - with open(file_path, "r") as file: - datasetjson = json.load(file) - return datasetjson + return JSONReader().from_file(file_path) def _raw_dataset_from_file(self, file_path) -> pd.DataFrame: # Load Dataset-JSON Schema diff --git a/cdisc_rules_engine/services/data_readers/dataset_ndjson_reader.py b/cdisc_rules_engine/services/data_readers/dataset_ndjson_reader.py index 04a7af66b..89f0c663b 100644 --- a/cdisc_rules_engine/services/data_readers/dataset_ndjson_reader.py +++ b/cdisc_rules_engine/services/data_readers/dataset_ndjson_reader.py @@ -12,14 +12,15 @@ from cdisc_rules_engine.models.dataset.pandas_dataset import PandasDataset import tempfile +from cdisc_rules_engine.services.data_readers.json_reader import JSONReader + class DatasetNDJSONReader(DataReaderInterface): def get_schema(self) -> dict: - with open( + schema = JSONReader().from_file( os.path.join("resources", "schema", "dataset-ndjson-schema.json") - ) as schemandjson: - schema = schemandjson.read() - return json.loads(schema) + ) + return schema def read_json_file(self, file_path: str) -> dict: with open(file_path, "r") as file: diff --git a/cdisc_rules_engine/services/data_readers/json_reader.py b/cdisc_rules_engine/services/data_readers/json_reader.py index 9554c90ab..a4e33aa9a 100644 --- a/cdisc_rules_engine/services/data_readers/json_reader.py +++ b/cdisc_rules_engine/services/data_readers/json_reader.py @@ -1,4 +1,5 @@ from json import load +from cdisc_rules_engine.exceptions.custom_exceptions import InvalidJSONFormat from cdisc_rules_engine.interfaces import ( DataReaderInterface, ) @@ -6,9 +7,14 @@ class JSONReader(DataReaderInterface): def from_file(self, file_path): - with open(file_path) as fp: - json = load(fp) - return json + try: + with open(file_path, "rb") as fp: + json = load(fp) + return json + except Exception as e: + raise InvalidJSONFormat( + f"\n Error reading JSON from: {file_path}\n {type(e).__name__}: {e}" + ) def read(self, data): pass diff --git a/cdisc_rules_engine/services/data_services/dummy_data_service.py b/cdisc_rules_engine/services/data_services/dummy_data_service.py index f6e0e27e3..f22247f97 100644 --- a/cdisc_rules_engine/services/data_services/dummy_data_service.py +++ b/cdisc_rules_engine/services/data_services/dummy_data_service.py @@ -1,17 +1,19 @@ from datetime import datetime from io import IOBase -from json import load from typing import List, Optional, Iterable, Sequence import os import pandas as pd from cdisc_rules_engine.dummy_models.dummy_dataset import DummyDataset -from cdisc_rules_engine.exceptions.custom_exceptions import DatasetNotFoundError +from cdisc_rules_engine.exceptions.custom_exceptions import ( + DatasetNotFoundError, +) from cdisc_rules_engine.interfaces import CacheServiceInterface, ConfigInterface from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata from cdisc_rules_engine.models.dataset_types import DatasetTypes from cdisc_rules_engine.services.data_readers import DataReaderFactory +from cdisc_rules_engine.services.data_readers.json_reader import JSONReader from cdisc_rules_engine.services.data_services import BaseDataService from cdisc_rules_engine.models.dataset import PandasDataset @@ -161,9 +163,8 @@ def get_datasets(self) -> Iterable[SDTMDatasetMetadata]: @staticmethod def get_data(dataset_paths: Sequence[str]): - with open(dataset_paths[0]) as fp: - json = load(fp) - return [DummyDataset(data) for data in json.get("datasets", [])] + json = JSONReader().from_file(dataset_paths[0]) + return [DummyDataset(data) for data in json.get("datasets", [])] @staticmethod def is_valid_data(dataset_paths: Sequence[str]): @@ -172,7 +173,6 @@ def is_valid_data(dataset_paths: Sequence[str]): and len(dataset_paths) == 1 and dataset_paths[0].lower().endswith(".json") ): - with open(dataset_paths[0]) as fp: - json = load(fp) - return "datasets" in json + json = JSONReader().from_file(dataset_paths[0]) + return "datasets" in json return False diff --git a/cdisc_rules_engine/services/data_services/usdm_data_service.py b/cdisc_rules_engine/services/data_services/usdm_data_service.py index 0f02e2c5a..004fb3a6c 100644 --- a/cdisc_rules_engine/services/data_services/usdm_data_service.py +++ b/cdisc_rules_engine/services/data_services/usdm_data_service.py @@ -2,7 +2,6 @@ from io import IOBase from typing import List, Sequence, Any from dataclasses import dataclass -from json import load from jsonpath_ng import DatumInContext from jsonpath_ng.ext import parse from datetime import datetime @@ -18,9 +17,11 @@ from cdisc_rules_engine.models.variable_metadata_container import ( VariableMetadataContainer, ) + from cdisc_rules_engine.services.data_readers.data_reader_factory import ( DataReaderFactory, ) +from cdisc_rules_engine.services.data_readers.json_reader import JSONReader from cdisc_rules_engine.utilities.utils import ( extract_file_name_from_path_string, ) @@ -475,7 +476,6 @@ def is_valid_data(dataset_paths: Sequence[str]): and len(dataset_paths) == 1 and dataset_paths[0].lower().endswith(".json") ): - with open(dataset_paths[0]) as fp: - json = load(fp) - return "study" in json and "datasetJSONVersion" not in json + json = JSONReader().from_file(dataset_paths[0]) + return "study" in json and "datasetJSONVersion" not in json return False diff --git a/cdisc_rules_engine/services/datasetjson_metadata_reader.py b/cdisc_rules_engine/services/datasetjson_metadata_reader.py index 832f0d0b1..f77856977 100644 --- a/cdisc_rules_engine/services/datasetjson_metadata_reader.py +++ b/cdisc_rules_engine/services/datasetjson_metadata_reader.py @@ -1,11 +1,11 @@ import os -import json import jsonschema import pandas as pd from cdisc_rules_engine.services import logger from cdisc_rules_engine.services.adam_variable_reader import AdamVariableReader +from cdisc_rules_engine.services.data_readers.json_reader import JSONReader class DatasetJSONMetadataReader: @@ -25,14 +25,11 @@ def read(self) -> dict: Extracts metadata from .json file. """ # Load Dataset-JSON Schema - with open( + schema = JSONReader().from_file( os.path.join("resources", "schema", "dataset.schema.json") - ) as schemajson: - schema = schemajson.read() - schema = json.loads(schema) + ) - with open(self._file_path, "r") as file: - datasetjson = json.load(file) + datasetjson = JSONReader().from_file(self._file_path) try: jsonschema.validate(datasetjson, schema) diff --git a/cdisc_rules_engine/services/datasetndjson_metadata_reader.py b/cdisc_rules_engine/services/datasetndjson_metadata_reader.py index 51c27644c..ded014fa5 100644 --- a/cdisc_rules_engine/services/datasetndjson_metadata_reader.py +++ b/cdisc_rules_engine/services/datasetndjson_metadata_reader.py @@ -6,6 +6,7 @@ from cdisc_rules_engine.services import logger from cdisc_rules_engine.services.adam_variable_reader import AdamVariableReader +from cdisc_rules_engine.services.data_readers.json_reader import JSONReader class DatasetNDJSONMetadataReader: @@ -25,11 +26,9 @@ def read(self) -> dict: Extracts metadata from .ndjson file. """ # Load Dataset-NDJSON Schema - with open( + schema = JSONReader().from_file( os.path.join("resources", "schema", "dataset-ndjson-schema.json") - ) as schemandjson: - schema = schemandjson.read() - schema = json.loads(schema) + ) with open(self._file_path, "r") as file: lines = file.readlines() From b89c812ce8470675d513bb15c758d162628ee6c9 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Sun, 21 Sep 2025 11:10:13 -0400 Subject: [PATCH 13/23] error handling for jsonata loading --- .../services/data_readers/json_reader.py | 3 +- .../utilities/jsonata_processor.py | 32 +++++++++++++++---- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/cdisc_rules_engine/services/data_readers/json_reader.py b/cdisc_rules_engine/services/data_readers/json_reader.py index a4e33aa9a..f7928ae07 100644 --- a/cdisc_rules_engine/services/data_readers/json_reader.py +++ b/cdisc_rules_engine/services/data_readers/json_reader.py @@ -13,7 +13,8 @@ def from_file(self, file_path): return json except Exception as e: raise InvalidJSONFormat( - f"\n Error reading JSON from: {file_path}\n {type(e).__name__}: {e}" + f"\n Error reading JSON from: {file_path}" + f"\n {type(e).__name__}: {e}" ) def read(self, data): diff --git a/cdisc_rules_engine/utilities/jsonata_processor.py b/cdisc_rules_engine/utilities/jsonata_processor.py index 143adadc3..02a290633 100644 --- a/cdisc_rules_engine/utilities/jsonata_processor.py +++ b/cdisc_rules_engine/utilities/jsonata_processor.py @@ -4,6 +4,9 @@ from jsonata import Jsonata from cdisc_rules_engine.enums.execution_status import ExecutionStatus +from cdisc_rules_engine.exceptions.custom_exceptions import ( + RuleFormatError, +) from cdisc_rules_engine.models.validation_error_container import ( ValidationErrorContainer, ) @@ -24,9 +27,20 @@ def execute_jsonata_rule( check = rule.get("conditions") full_string = f"(\n{custom_functions}{check}\n)" expr = Jsonata(full_string) - results = expr.evaluate(dataset) + try: + results = expr.evaluate(dataset) + except Exception as e: + raise RuleFormatError( + f"\n Error evaluating JSONata Rule with Core Id: {rule.get("core_id")}" + f"\n {type(e).__name__}: {e}" + ) errors = defaultdict(list) if results: + if not isinstance(results, list): + raise RuleFormatError( + f"\n Error in return type of JSONata Rule with Core Id: {rule.get('core_id')}" + f"\n Expected a list, but got: {results}" + ) for result in results: error_entity = ValidationErrorEntity( value=result, @@ -62,10 +76,16 @@ def get_custom_functions(jsonata_functions_path): return "" functions = [] for filepath in glob(f"{jsonata_functions_path}/*.jsonata"): - with open(filepath, "r") as file: - function_definition = file.read() - function_definition = function_definition.replace("{", "", 1) - function_definition = "".join(function_definition.rsplit("}", 1)) - functions.append(function_definition) + try: + with open(filepath, "r") as file: + function_definition = file.read() + function_definition = function_definition.replace("{", "", 1) + function_definition = "".join(function_definition.rsplit("}", 1)) + functions.append(function_definition) + except Exception as e: + raise RuleFormatError( + f"\n Error loading JSONata custom functions at path: {filepath}" + f"\n {type(e).__name__}: {e}" + ) functions_str = ",\n".join(functions) return f"$utils:={{\n{functions_str}\n}};\n" From dacf981abacd8de887255d9a701735592c6c68d3 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Sun, 21 Sep 2025 13:31:31 -0400 Subject: [PATCH 14/23] Schema fix --- resources/schema/CORE-base.json | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/resources/schema/CORE-base.json b/resources/schema/CORE-base.json index dae4092a7..e28dd6de3 100644 --- a/resources/schema/CORE-base.json +++ b/resources/schema/CORE-base.json @@ -41,9 +41,6 @@ }, { "$ref": "Operator.json" - }, - { - "type": "string" } ] }, @@ -363,7 +360,14 @@ "type": "array" }, "Check": { - "$ref": "#/$defs/Boolean" + "anyOf": [ + { + "$ref": "#/$defs/Boolean" + }, + { + "type": "string" + } + ] }, "Core": { "properties": { From c5a2de917a9a6acce8236bede4d37e7086f4ed60 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Tue, 23 Sep 2025 13:24:20 -0400 Subject: [PATCH 15/23] Added builtin jsonata functions, cli multi function paths, better error handling and tests --- README.md | 2 +- .../enums/default_file_paths.py | 6 +- cdisc_rules_engine/models/validation_args.py | 2 +- cdisc_rules_engine/rules_engine.py | 6 +- .../utilities/jsonata_processor.py | 46 ++++++-- core.py | 38 ++++--- resources/jsonata/del_reps.jsonata | 11 ++ resources/jsonata/get_idmap.jsonata | 35 ++++++ resources/jsonata/get_maxids.jsonata | 12 ++ resources/jsonata/get_path.jsonata | 45 ++++++++ resources/jsonata/get_ref_value.jsonata | 16 +++ resources/jsonata/get_rule.jsonata | 11 ++ resources/jsonata/make_dup.jsonata | 31 ++++++ resources/jsonata/parent_rel.jsonata | 14 +++ resources/jsonata/parse_refs.jsonata | 30 +++++ resources/jsonata/report.jsonata | 56 ++++++++++ resources/jsonata/sift_tree.jsonata | 34 ++++++ resources/schema/Rule_Type.md | 5 +- scripts/run_validation.py | 2 +- .../test_utilities/test_jsonata_processor.py | 104 ++++++++++++++++-- 20 files changed, 460 insertions(+), 46 deletions(-) create mode 100644 resources/jsonata/del_reps.jsonata create mode 100644 resources/jsonata/get_idmap.jsonata create mode 100644 resources/jsonata/get_maxids.jsonata create mode 100644 resources/jsonata/get_path.jsonata create mode 100644 resources/jsonata/get_ref_value.jsonata create mode 100644 resources/jsonata/get_rule.jsonata create mode 100644 resources/jsonata/make_dup.jsonata create mode 100644 resources/jsonata/parent_rel.jsonata create mode 100644 resources/jsonata/parse_refs.jsonata create mode 100644 resources/jsonata/report.jsonata create mode 100644 resources/jsonata/sift_tree.jsonata diff --git a/README.md b/README.md index a3172ef00..974160657 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,7 @@ Run `python core.py validate --help` to see the list of validation options. progress. By default a progress bar like "[████████████████████████████--------] 78%"is printed. - -jfp, --jsonata-functions-path Path to directory containing a set of custom JSONata functions. + -jcf, --jsonata-custom-functions Pair containing a variable name and a Path to directory containing a set of custom JSONata functions. Can be specified multiple times --help Show this message and exit. ``` diff --git a/cdisc_rules_engine/enums/default_file_paths.py b/cdisc_rules_engine/enums/default_file_paths.py index ecebbf2fb..053698870 100644 --- a/cdisc_rules_engine/enums/default_file_paths.py +++ b/cdisc_rules_engine/enums/default_file_paths.py @@ -1,9 +1,11 @@ +from os.path import join from cdisc_rules_engine.enums.base_enum import BaseEnum class DefaultFilePaths(BaseEnum): - CACHE = "resources/cache" - EXCEL_TEMPLATE_FILE = "resources/templates/report-template.xlsx" + CACHE = join("resources", "cache") + EXCEL_TEMPLATE_FILE = join("resources", "templates", "report-template.xlsx") + JSONATA_UTILS = join("resources", "jsonata") RULES_CACHE_FILE = "rules.pkl" RULES_DICTIONARY = "rules_dictionary.pkl" STANDARD_DETAILS_CACHE_FILE = "standards_details.pkl" diff --git a/cdisc_rules_engine/models/validation_args.py b/cdisc_rules_engine/models/validation_args.py index 09f74c5a7..08d915747 100644 --- a/cdisc_rules_engine/models/validation_args.py +++ b/cdisc_rules_engine/models/validation_args.py @@ -23,6 +23,6 @@ "progress", "define_xml_path", "validate_xml", - "jsonata_functions_path", + "jsonata_custom_functions", ], ) diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py index 499baa0ab..9f40e95f6 100644 --- a/cdisc_rules_engine/rules_engine.py +++ b/cdisc_rules_engine/rules_engine.py @@ -94,7 +94,9 @@ def __init__( self.external_dictionaries = external_dictionaries self.define_xml_path: str = kwargs.get("define_xml_path") self.validate_xml: bool = kwargs.get("validate_xml") - self.jsonata_functions_path: str = kwargs.get("jsonata_functions_path") + self.jsonata_custom_functions: tuple[()] | tuple[tuple[str, str], ...] = ( + kwargs.get("jsonata_custom_functions", ()) + ) def get_schema(self): return export_rule_data(DatasetVariable, COREActions) @@ -295,7 +297,7 @@ def validate_rule( ) elif rule.get("rule_type") == RuleTypes.JSONATA.value: return JSONataProcessor.execute_jsonata_rule( - rule, dataset, self.jsonata_functions_path + rule, dataset, self.jsonata_custom_functions ) kwargs["ct_packages"] = list(self.ct_packages) diff --git a/cdisc_rules_engine/utilities/jsonata_processor.py b/cdisc_rules_engine/utilities/jsonata_processor.py index 02a290633..44b5dfab2 100644 --- a/cdisc_rules_engine/utilities/jsonata_processor.py +++ b/cdisc_rules_engine/utilities/jsonata_processor.py @@ -3,8 +3,11 @@ from glob import glob from jsonata import Jsonata +from cdisc_rules_engine.enums.default_file_paths import DefaultFilePaths from cdisc_rules_engine.enums.execution_status import ExecutionStatus from cdisc_rules_engine.exceptions.custom_exceptions import ( + MissingDataError, + RuleExecutionError, RuleFormatError, ) from cdisc_rules_engine.models.validation_error_container import ( @@ -21,16 +24,24 @@ class JSONataProcessor: def execute_jsonata_rule( rule: dict, dataset: dict, - jsonata_functions_path: str, + jsonata_custom_functions: tuple[()] | tuple[tuple[str, str], ...], ): - custom_functions = JSONataProcessor.get_custom_functions(jsonata_functions_path) + custom_functions = JSONataProcessor.get_all_custom_functions( + jsonata_custom_functions + ) check = rule.get("conditions") full_string = f"(\n{custom_functions}{check}\n)" - expr = Jsonata(full_string) try: - results = expr.evaluate(dataset) + expr = Jsonata(full_string) except Exception as e: raise RuleFormatError( + f"\n Error parsing JSONata Rule for Core Id: {rule.get("core_id")}" + f"\n {type(e).__name__}: {e}" + ) + try: + results = expr.evaluate(dataset) + except Exception as e: + raise RuleExecutionError( f"\n Error evaluating JSONata Rule with Core Id: {rule.get("core_id")}" f"\n {type(e).__name__}: {e}" ) @@ -71,11 +82,28 @@ def execute_jsonata_rule( @staticmethod @cache - def get_custom_functions(jsonata_functions_path): - if not jsonata_functions_path: - return "" + def get_all_custom_functions( + jsonata_custom_functions: tuple[()] | tuple[tuple[str, str], ...] + ): + builtins_and_customs = [ + ("utils", DefaultFilePaths.JSONATA_UTILS.value), + *jsonata_custom_functions, + ] + functions = [ + JSONataProcessor.get_custom_functions(name, path) + for name, path in builtins_and_customs + ] + return "\n".join(functions) + + @staticmethod + def get_custom_functions(jsonata_functions_name: str, jsonata_functions_path: str): functions = [] - for filepath in glob(f"{jsonata_functions_path}/*.jsonata"): + filepaths = glob(f"{jsonata_functions_path}/*.jsonata") + if not filepaths: + raise MissingDataError( + f"\n No JSONata custom functions found at path: {jsonata_functions_path}" + ) + for filepath in filepaths: try: with open(filepath, "r") as file: function_definition = file.read() @@ -88,4 +116,4 @@ def get_custom_functions(jsonata_functions_path): f"\n {type(e).__name__}: {e}" ) functions_str = ",\n".join(functions) - return f"$utils:={{\n{functions_str}\n}};\n" + return f"${jsonata_functions_name}:={{\n{functions_str}\n}};\n" diff --git a/core.py b/core.py index 147b24a9e..36305df12 100644 --- a/core.py +++ b/core.py @@ -6,7 +6,6 @@ import tempfile from datetime import datetime from multiprocessing import freeze_support -from typing import Tuple import click from pathlib import Path @@ -32,7 +31,7 @@ from version import __version__ -def valid_data_file(data_path: list) -> Tuple[list, set]: +def valid_data_file(data_path: list) -> tuple[list, set]: allowed_formats = [format.value for format in DataFormatTypes] found_formats = set() file_list = [] @@ -205,11 +204,16 @@ def cli(): help="Enable XML validation (default 'y' to enable, otherwise disable)", ) @click.option( - "-jfp", - "--jsonata-functions-path", + "-jcf", + "--jsonata-custom-functions", + default=[], + multiple=True, required=False, - type=click.Path(exists=True, file_okay=False, readable=True, resolve_path=True), - help="Path to directory containing a set of custom JSONata functions.", + type=( + str, + click.Path(exists=True, file_okay=False, readable=True, resolve_path=True), + ), + help="Variable Name and Path to directory containing a set of custom JSONata functions.", ) @click.pass_context def validate( @@ -217,15 +221,15 @@ def validate( cache: str, pool_size: int, data: str, - dataset_path: Tuple[str], + dataset_path: tuple[str], log_level: str, report_template: str, standard: str, version: str, substandard: str, - controlled_terminology_package: Tuple[str], + controlled_terminology_package: tuple[str], output: str, - output_format: Tuple[str], + output_format: tuple[str], raw_report: bool, define_version: str, whodrug: str, @@ -236,13 +240,13 @@ def validate( snomed_version: str, snomed_edition: str, snomed_url: str, - rules: Tuple[str], + rules: tuple[str], local_rules: str, custom_standard: bool, progress: str, define_xml_path: str, validate_xml: str, - jsonata_functions_path: str, + jsonata_custom_functions: tuple[()] | tuple[tuple[str, str], ...], ): """ Validate data using CDISC Rules Engine @@ -329,7 +333,7 @@ def validate( progress, define_xml_path, validate_xml_bool, - jsonata_functions_path, + jsonata_custom_functions, ) ) @@ -568,7 +572,7 @@ def list_rule_sets(ctx: click.Context, cache_path: str, custom: bool): multiple=True, ) @click.pass_context -def list_dataset_metadata(ctx: click.Context, dataset_path: Tuple[str]): +def list_dataset_metadata(ctx: click.Context, dataset_path: tuple[str]): """ Command that lists metadata of given datasets. @@ -617,7 +621,7 @@ def version(): required=False, multiple=True, ) -def list_ct(cache_path: str, subsets: Tuple[str]): +def list_ct(cache_path: str, subsets: tuple[str]): """ Command to list the ct packages available in the cache. """ @@ -676,7 +680,7 @@ def test_validate(): define_xml_path = None validate_xml = False json_output = os.path.join(temp_dir, "json_validation_output") - jsonata_functions_path = None + jsonata_custom_functions = () run_validation( Validation_args( cache_path, @@ -699,7 +703,7 @@ def test_validate(): progress, define_xml_path, validate_xml, - jsonata_functions_path, + jsonata_custom_functions, ) ) print("JSON validation completed successfully!") @@ -726,7 +730,7 @@ def test_validate(): progress, define_xml_path, validate_xml, - jsonata_functions_path, + jsonata_custom_functions, ) ) print("XPT validation completed successfully!") diff --git a/resources/jsonata/del_reps.jsonata b/resources/jsonata/del_reps.jsonata new file mode 100644 index 000000000..58ac0cb4b --- /dev/null +++ b/resources/jsonata/del_reps.jsonata @@ -0,0 +1,11 @@ +{ + /* + Delete repeated sequential elements from an array. + - Input: [1, 1, 2, 2, 2, 1, 3, 3] + - Output: [1, 2, 1, 3] + */ + "del_reps": function($lst) + { + $lst ~> $filter(function($v,$i,$a){$i = 0 or ($i > 0 and $v != $a[$i-1])}) + } +} \ No newline at end of file diff --git a/resources/jsonata/get_idmap.jsonata b/resources/jsonata/get_idmap.jsonata new file mode 100644 index 000000000..4e0c3342f --- /dev/null +++ b/resources/jsonata/get_idmap.jsonata @@ -0,0 +1,35 @@ +{ + "get_idmap" : function($obj,$mxids) + { + $reduce( + ($obj.**[id]). + { + "id": id, + "instanceType": instanceType + }, + function($idx,$v) + { + ( + $nxt := $v.instanceType in $keys($idx) ? $lookup($idx,$v.instanceType).next + 1 : 1; + $idx := $idx ~> |$| + { + $v.instanceType: + { + "max": $lookup($idx,$v.instanceType).max, + "next": $nxt, + "ids": $append( + $lookup($idx,$v.instanceType).ids, + { + "old": $v.id, + "new": $join([$v.instanceType,$string($nxt)],"_") + } + ) + } + }| ; + + ) + }, + $mxids + ).**.${old: new} + } +} \ No newline at end of file diff --git a/resources/jsonata/get_maxids.jsonata b/resources/jsonata/get_maxids.jsonata new file mode 100644 index 000000000..e4bee26c1 --- /dev/null +++ b/resources/jsonata/get_maxids.jsonata @@ -0,0 +1,12 @@ +{ + "get_maxids" : function($wrapper) + { + $wrapper.(**[id]).$ + { + instanceType: $.["max","next"]@$k + { + $k: $max($.$number($substringAfter($string(id),"_"))) + } + } + } +} \ No newline at end of file diff --git a/resources/jsonata/get_path.jsonata b/resources/jsonata/get_path.jsonata new file mode 100644 index 000000000..1bf2e753a --- /dev/null +++ b/resources/jsonata/get_path.jsonata @@ -0,0 +1,45 @@ +{ + "get_path": function($tgtobj,$within) + { + ( + $pref := function($this, $parent) + { + ( + $pa := $filter($keys($parent), + function($k) + { + ( + $v := $lookup($parent,$k); + $type($v) = "array" ? $this in $v : $v = $this + ) + } + ); + $pav := $lookup($parent,$pa); + $type($pav) = "array" + ? $map( + $pav, + function($av,$ai) + { + $av=$this ? $ak := $pa & "[" & $ai & "]" + } + ) + : $pa + ) + }; + $iter := function($t,$acc,$cnt) + { + ( + $t = $within or $cnt > 20 + ? $acc + : + ( + $p := $within.**.*[$=$t].%; + $pa := $pref($t,$p); + $iter($p,$append([$pa],$acc),$cnt + 1) + ) + ) + }; + $iter($tgtobj,[],1) ~> $join(".") + ) + } +} \ No newline at end of file diff --git a/resources/jsonata/get_ref_value.jsonata b/resources/jsonata/get_ref_value.jsonata new file mode 100644 index 000000000..4c8f7d495 --- /dev/null +++ b/resources/jsonata/get_ref_value.jsonata @@ -0,0 +1,16 @@ +{ + "get_ref_value": function($usdm_ref,$within,$not_found_value) + { + ( + $found_obj := $within.** + [ + id=$usdm_ref.id and + instanceType=$usdm_ref.klass and + $usdm_ref.attribute in $keys($) + ]; + $found_obj + ? $found_obj.$lookup($,$usdm_ref.attribute) + : $not_found_value + ) + } +} \ No newline at end of file diff --git a/resources/jsonata/get_rule.jsonata b/resources/jsonata/get_rule.jsonata new file mode 100644 index 000000000..af2a69d6c --- /dev/null +++ b/resources/jsonata/get_rule.jsonata @@ -0,0 +1,11 @@ +{ + "get_rule": function($def) + { + { + "message": $def.rule.Outcome.Message, + "check_func": $exists($def.rule.Check.all.operator) ? $def.rule.Check.all.operator : $def.rule.Check, + "variables": $def.rule.Outcome.Output_Variables, + "scope": $def.rule.Scope.Entities + } + } +} \ No newline at end of file diff --git a/resources/jsonata/make_dup.jsonata b/resources/jsonata/make_dup.jsonata new file mode 100644 index 000000000..bd965d833 --- /dev/null +++ b/resources/jsonata/make_dup.jsonata @@ -0,0 +1,31 @@ +{ + "make_dup" : function($obj,$imap) + { + $obj ~> |**[id]|( + $merge( + $each( + $sift( + function($v,$k) + { + $contains($k,/id(s)?$/i) and $type($v) != "null" + } + ), + function($v,$k) + { + ( + $get_id := function($id){($id in $keys($imap)) ? $lookup($imap, $id) : $id}; + { + + $k: $contains($k,/Ids$/) + ? [$map($v,$get_id)] + : $get_id($v) + + + } + ) + } + ) + ) + )| + } +} \ No newline at end of file diff --git a/resources/jsonata/parent_rel.jsonata b/resources/jsonata/parent_rel.jsonata new file mode 100644 index 000000000..f6809a454 --- /dev/null +++ b/resources/jsonata/parent_rel.jsonata @@ -0,0 +1,14 @@ +{ + "parent_rel" : function($this, $parent) + { + $filter($keys($parent), + function($k) + { + ( + $v := $lookup($parent,$k); + $type($v) = "array" ? $this in $v : $v = $this + ) + } + ) + } +} \ No newline at end of file diff --git a/resources/jsonata/parse_refs.jsonata b/resources/jsonata/parse_refs.jsonata new file mode 100644 index 000000000..8ccc2f9c8 --- /dev/null +++ b/resources/jsonata/parse_refs.jsonata @@ -0,0 +1,30 @@ +{ + "parse_refs": function($from,$to,$within) + { + ( + $iter := function($dict) + { + $count($dict.**[$from in $keys($)]) > 0 + ? ( + $d := $dict ~> |**[$from in $keys($)]| + { + $to: + ( + $refs:=$lookup($,$from); + $map($refs,function($v) + { + $v in $dict[id=$v].**.$lookup($,$from) + ? {"circularReference": $v} + : $dict[id=$v] + } + ) + ) + },[$from]|; + $iter($d) + ) + : $dict + }; + $iter($within) + ) + } +} \ No newline at end of file diff --git a/resources/jsonata/report.jsonata b/resources/jsonata/report.jsonata new file mode 100644 index 000000000..041a9f809 --- /dev/null +++ b/resources/jsonata/report.jsonata @@ -0,0 +1,56 @@ +{ + "report": function($rule,$data) + { + ( + $errors := $append(null,$type($rule.check_func) = "string" ? $data.$eval($rule.check_func) : $rule.check_func($data)); + $rule_applies := function($instyp,$rs) + { + ( + $incent := [$rs.Include]; + $excent := [$rs.Exclude]; + $is_inc := $instyp in $incent or "ALL" in $incent; + $is_exc := $instyp in $excent or "ALL" in $excent; + $is_inc and $not($is_exc) + ) + }; + $data.**.instanceType@$it.($errors) + { + $it: + { + "executionStatus": "success", + "class": $it[0] + } + } + ~> |*| + ( + $class := $.class; + $rule_applies($class,$rule.scope) + ? + $class in $errors.dataset + ? + { + "message": $rule.message, + "errors" : $map($errors[dataset = $class], + function($obj) + { + $merge($map($rule.variables, function($var) + { + { + $var: $var in $keys($obj) ? $lookup($obj,$var) : "Not in output" + } + } + ) + ) + } + )[] + } + : {} + : { + "executionStatus": "skipped", + "message": "Rule skipped - doesn't apply to entity for rule id=, dataset=" & $class + } + )| + ~> $spread() ~> $sort(function($l,$r){$keys($l)[0]>$keys($r)[0]}) ~> $merge() + ) + } +} \ No newline at end of file diff --git a/resources/jsonata/sift_tree.jsonata b/resources/jsonata/sift_tree.jsonata new file mode 100644 index 000000000..e0faeeefa --- /dev/null +++ b/resources/jsonata/sift_tree.jsonata @@ -0,0 +1,34 @@ +{ + "sift_tree" : function($tree,$find_val,$include,$prefix) + { + ( + $iter := function($t) + { + $type($t) = "array" + ? [ + $map($t,function($v) + { + $v.**.[$find_val in $] + ? $iter($v) + } + ) + ] + : $type($t) = "object" + ? $t ~> + $sift(function($v,$k) + { + $k in $include or $v.**.[$find_val in $] + } + ) ~> + $each(function($v, $k) + { + $k in $include + ? {$join([$prefix ? $t.instanceType,$k],"."): $v} + : {$join([$prefix ? $t.instanceType,$k],"."): $iter($v)} + }) ~> $merge + : $t + }; + $iter($tree) + ) + } +} \ No newline at end of file diff --git a/resources/schema/Rule_Type.md b/resources/schema/Rule_Type.md index 1f1f57ef8..5535c038f 100644 --- a/resources/schema/Rule_Type.md +++ b/resources/schema/Rule_Type.md @@ -189,7 +189,7 @@ Apply a JSONata query to a JSON file. [JSONata documentation](https://docs.jsona ```yaml Check: | - **.$filter($, $utils.equals).{"record":path, "A":A, "B":B} + **.$filter($, $myutils.equals).{"record":path, "A":A, "B":B} Core: Id: JSONATA Test Status: Draft @@ -209,7 +209,8 @@ Sensitivity: Record #### Custom user function contained in external file "equals.jsonata" -\* Note that in the CLI, you can pass a directory of such files using `-jfp` or `--jsonata-functions-path` +\* Note that in the CLI, you can pass a variable name and directory of such files using `-jcf` or `--jsonata-custom-functions`. The engine's built-in JSONata functions are accessible from the `$utils` variable. For example to load two more directories containing functions into `$myutils` and `$yourutils`, add the options: +`-jcf myutils path/to/myutils -jcf yourutils path/to/yourutils` ```yaml { diff --git a/scripts/run_validation.py b/scripts/run_validation.py index ec43c3bef..72d15ee06 100644 --- a/scripts/run_validation.py +++ b/scripts/run_validation.py @@ -89,7 +89,7 @@ def validate_single_rule( max_dataset_size=max_dataset_size, dataset_paths=args.dataset_paths, validate_xml=args.validate_xml, - jsonata_functions_path=args.jsonata_functions_path, + jsonata_custom_functions=args.jsonata_custom_functions, ) results = engine.validate_single_rule(rule, datasets) results = list(itertools.chain(*results.values())) diff --git a/tests/unit/test_utilities/test_jsonata_processor.py b/tests/unit/test_utilities/test_jsonata_processor.py index 90ae647ac..037ac8560 100644 --- a/tests/unit/test_utilities/test_jsonata_processor.py +++ b/tests/unit/test_utilities/test_jsonata_processor.py @@ -1,13 +1,17 @@ +from unittest import TestCase from unittest.mock import MagicMock, patch from yaml import safe_load +from cdisc_rules_engine.exceptions.custom_exceptions import ( + MissingDataError, + RuleExecutionError, + RuleFormatError, +) from cdisc_rules_engine.models.rule import Rule from cdisc_rules_engine.utilities.jsonata_processor import JSONataProcessor -@patch( - "cdisc_rules_engine.utilities.jsonata_processor.JSONataProcessor.get_custom_functions" -) -def test_jsonata_processor(mock_get_custom_functions: MagicMock): +class TestJSONataProcessor(TestCase): + rule = """ Check: | **.$filter($, $utils.equals).{"record":path, "A":A, "B":B} @@ -27,7 +31,7 @@ def test_jsonata_processor(mock_get_custom_functions: MagicMock): - ALL Sensitivity: Record """ - mock_get_custom_functions.return_value = """ + get_custom_functions = """ $utils:={ "equals": function($v){ $v.A=$v.B } }; @@ -68,11 +72,89 @@ def test_jsonata_processor(mock_get_custom_functions: MagicMock): ], } ] - rule = Rule.from_cdisc_metadata(safe_load(rule)) - result = JSONataProcessor.execute_jsonata_rule( - rule=rule, - dataset=dataset, - jsonata_functions_path="", + + @patch( + "cdisc_rules_engine.utilities.jsonata_processor.JSONataProcessor.get_custom_functions" + ) + def test_jsonata_processor(self, mock_get_custom_functions: MagicMock): + mock_get_custom_functions.return_value = self.get_custom_functions + rule = Rule.from_cdisc_metadata(safe_load(self.rule)) + result = JSONataProcessor.execute_jsonata_rule( + rule=rule, + dataset=self.dataset, + jsonata_custom_functions=(), + ) + assert result == self.expected + + @patch( + "cdisc_rules_engine.utilities.jsonata_processor.JSONataProcessor.get_custom_functions" + ) + def test_jsonata_rule_parsing_error(self, mock_get_custom_functions: MagicMock): + rule = """ + Check: | + Bad jsonata rule + Core: + Id: JSONATA Test + Status: Draft + Outcome: + Message: "A equals B" + Output Variables: + - record + - A + - B + Rule Type: JSONata + Scope: + Entities: + Include: + - ALL + Sensitivity: Record + """ + mock_get_custom_functions.return_value = self.get_custom_functions + rule = Rule.from_cdisc_metadata(safe_load(rule)) + with self.assertRaises(RuleFormatError): + JSONataProcessor.execute_jsonata_rule( + rule=rule, + dataset=self.dataset, + jsonata_custom_functions=(), + ) + + @patch( + "cdisc_rules_engine.utilities.jsonata_processor.JSONataProcessor.get_custom_functions" ) + def test_jsonata_rule_execution_error(self, mock_get_custom_functions: MagicMock): + rule = """ + Check: | + **.$filter($, $missing_utils.equals).{"record":path, "A":A, "B":B} + Core: + Id: JSONATA Test + Status: Draft + Outcome: + Message: "A equals B" + Output Variables: + - record + - A + - B + Rule Type: JSONata + Scope: + Entities: + Include: + - ALL + Sensitivity: Record + """ + mock_get_custom_functions.return_value = self.get_custom_functions + rule = Rule.from_cdisc_metadata(safe_load(rule)) + with self.assertRaises(RuleExecutionError): + JSONataProcessor.execute_jsonata_rule( + rule=rule, + dataset=self.dataset, + jsonata_custom_functions=(), + ) - assert result == expected + def test_jsonata_rule_custom_load_error(self): + rule = Rule.from_cdisc_metadata(safe_load(self.rule)) + with self.assertRaises(MissingDataError): + JSONataProcessor.execute_jsonata_rule( + rule=rule, + dataset=self.dataset, + jsonata_custom_functions=(("utils_name", "bad_path"),), + ) From 3ed3311415b8467a3a85e18d385b5ddb48958377 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Mon, 29 Sep 2025 17:16:25 -0400 Subject: [PATCH 16/23] removed unneeded jsonata files. fixed md indentation --- resources/jsonata/get_idmap.jsonata | 35 ----------------- resources/jsonata/get_maxids.jsonata | 12 ------ resources/jsonata/get_rule.jsonata | 11 ------ resources/jsonata/make_dup.jsonata | 31 --------------- resources/jsonata/report.jsonata | 56 ---------------------------- resources/schema/Rule_Type.md | 2 +- 6 files changed, 1 insertion(+), 146 deletions(-) delete mode 100644 resources/jsonata/get_idmap.jsonata delete mode 100644 resources/jsonata/get_maxids.jsonata delete mode 100644 resources/jsonata/get_rule.jsonata delete mode 100644 resources/jsonata/make_dup.jsonata delete mode 100644 resources/jsonata/report.jsonata diff --git a/resources/jsonata/get_idmap.jsonata b/resources/jsonata/get_idmap.jsonata deleted file mode 100644 index 4e0c3342f..000000000 --- a/resources/jsonata/get_idmap.jsonata +++ /dev/null @@ -1,35 +0,0 @@ -{ - "get_idmap" : function($obj,$mxids) - { - $reduce( - ($obj.**[id]). - { - "id": id, - "instanceType": instanceType - }, - function($idx,$v) - { - ( - $nxt := $v.instanceType in $keys($idx) ? $lookup($idx,$v.instanceType).next + 1 : 1; - $idx := $idx ~> |$| - { - $v.instanceType: - { - "max": $lookup($idx,$v.instanceType).max, - "next": $nxt, - "ids": $append( - $lookup($idx,$v.instanceType).ids, - { - "old": $v.id, - "new": $join([$v.instanceType,$string($nxt)],"_") - } - ) - } - }| ; - - ) - }, - $mxids - ).**.${old: new} - } -} \ No newline at end of file diff --git a/resources/jsonata/get_maxids.jsonata b/resources/jsonata/get_maxids.jsonata deleted file mode 100644 index e4bee26c1..000000000 --- a/resources/jsonata/get_maxids.jsonata +++ /dev/null @@ -1,12 +0,0 @@ -{ - "get_maxids" : function($wrapper) - { - $wrapper.(**[id]).$ - { - instanceType: $.["max","next"]@$k - { - $k: $max($.$number($substringAfter($string(id),"_"))) - } - } - } -} \ No newline at end of file diff --git a/resources/jsonata/get_rule.jsonata b/resources/jsonata/get_rule.jsonata deleted file mode 100644 index af2a69d6c..000000000 --- a/resources/jsonata/get_rule.jsonata +++ /dev/null @@ -1,11 +0,0 @@ -{ - "get_rule": function($def) - { - { - "message": $def.rule.Outcome.Message, - "check_func": $exists($def.rule.Check.all.operator) ? $def.rule.Check.all.operator : $def.rule.Check, - "variables": $def.rule.Outcome.Output_Variables, - "scope": $def.rule.Scope.Entities - } - } -} \ No newline at end of file diff --git a/resources/jsonata/make_dup.jsonata b/resources/jsonata/make_dup.jsonata deleted file mode 100644 index bd965d833..000000000 --- a/resources/jsonata/make_dup.jsonata +++ /dev/null @@ -1,31 +0,0 @@ -{ - "make_dup" : function($obj,$imap) - { - $obj ~> |**[id]|( - $merge( - $each( - $sift( - function($v,$k) - { - $contains($k,/id(s)?$/i) and $type($v) != "null" - } - ), - function($v,$k) - { - ( - $get_id := function($id){($id in $keys($imap)) ? $lookup($imap, $id) : $id}; - { - - $k: $contains($k,/Ids$/) - ? [$map($v,$get_id)] - : $get_id($v) - - - } - ) - } - ) - ) - )| - } -} \ No newline at end of file diff --git a/resources/jsonata/report.jsonata b/resources/jsonata/report.jsonata deleted file mode 100644 index 041a9f809..000000000 --- a/resources/jsonata/report.jsonata +++ /dev/null @@ -1,56 +0,0 @@ -{ - "report": function($rule,$data) - { - ( - $errors := $append(null,$type($rule.check_func) = "string" ? $data.$eval($rule.check_func) : $rule.check_func($data)); - $rule_applies := function($instyp,$rs) - { - ( - $incent := [$rs.Include]; - $excent := [$rs.Exclude]; - $is_inc := $instyp in $incent or "ALL" in $incent; - $is_exc := $instyp in $excent or "ALL" in $excent; - $is_inc and $not($is_exc) - ) - }; - $data.**.instanceType@$it.($errors) - { - $it: - { - "executionStatus": "success", - "class": $it[0] - } - } - ~> |*| - ( - $class := $.class; - $rule_applies($class,$rule.scope) - ? - $class in $errors.dataset - ? - { - "message": $rule.message, - "errors" : $map($errors[dataset = $class], - function($obj) - { - $merge($map($rule.variables, function($var) - { - { - $var: $var in $keys($obj) ? $lookup($obj,$var) : "Not in output" - } - } - ) - ) - } - )[] - } - : {} - : { - "executionStatus": "skipped", - "message": "Rule skipped - doesn't apply to entity for rule id=, dataset=" & $class - } - )| - ~> $spread() ~> $sort(function($l,$r){$keys($l)[0]>$keys($r)[0]}) ~> $merge() - ) - } -} \ No newline at end of file diff --git a/resources/schema/Rule_Type.md b/resources/schema/Rule_Type.md index 5535c038f..7836cfa4c 100644 --- a/resources/schema/Rule_Type.md +++ b/resources/schema/Rule_Type.md @@ -192,7 +192,7 @@ Check: | **.$filter($, $myutils.equals).{"record":path, "A":A, "B":B} Core: Id: JSONATA Test -Status: Draft + Status: Draft Outcome: Message: "A equals B" Output Variables: From 3de61a496d9a340db3c3b9cf5d9dd2bac2b500cc Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Tue, 30 Sep 2025 11:43:51 -0400 Subject: [PATCH 17/23] jsonata ignore scope --- .../utilities/rule_processor.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py index 789b2988e..d98959db6 100644 --- a/cdisc_rules_engine/utilities/rule_processor.py +++ b/cdisc_rules_engine/utilities/rule_processor.py @@ -1,5 +1,6 @@ import re from typing import Iterable, List, Optional, Set, Union, Tuple +from cdisc_rules_engine.enums.rule_types import RuleTypes from cdisc_rules_engine.interfaces.cache_service_interface import ( CacheServiceInterface, ) @@ -587,6 +588,14 @@ def duplicate_conditions_for_all_targets( new_conditions_dict[key] = new_conditions_list return new_conditions_dict + @staticmethod + def log_suitable_for_validation(rule_id: str, dataset_name: str): + logger.info( + f"is_suitable_for_validation. rule id={rule_id}, " + f"dataset={dataset_name}, result=True" + ) + return True, "" + def is_suitable_for_validation( self, rule: dict, @@ -602,6 +611,11 @@ def is_suitable_for_validation( reason = f"Rule skipped - invalid rule structure for rule id={rule_id}" logger.info(f"is_suitable_for_validation. {reason}, result=False") return False, reason + if ( + rule.get("rule_type") == RuleTypes.JSONATA.value + and dataset_metadata.name == "json" + ): + return self.log_suitable_for_validation(rule_id, dataset_name) if not self.rule_applies_to_use_case( dataset_metadata, rule, standard, standard_substandard ): @@ -632,11 +646,7 @@ def is_suitable_for_validation( ) logger.info(f"is_suitable_for_validation. {reason}, result=False") return False, reason - logger.info( - f"is_suitable_for_validation. rule id={rule_id}, " - f"dataset={dataset_name}, result=True" - ) - return True, "" + return self.log_suitable_for_validation(rule_id, dataset_name) @staticmethod def extract_target_names_from_rule( From 69bb7a2e05291b0f7188c3628d1293977f58bb97 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Tue, 30 Sep 2025 15:19:53 -0400 Subject: [PATCH 18/23] tweak output variables mapping --- .../utilities/jsonata_processor.py | 6 ++--- resources/schema/Rule_Type.md | 22 ++++++++++--------- .../test_utilities/test_jsonata_processor.py | 16 +++++++------- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/cdisc_rules_engine/utilities/jsonata_processor.py b/cdisc_rules_engine/utilities/jsonata_processor.py index 44b5dfab2..6ae73ebf1 100644 --- a/cdisc_rules_engine/utilities/jsonata_processor.py +++ b/cdisc_rules_engine/utilities/jsonata_processor.py @@ -56,9 +56,9 @@ def execute_jsonata_rule( error_entity = ValidationErrorEntity( value=result, dataset=result.get("dataset") or "", - row=result.get("record"), - usubjid=result.get("usubjid"), - sequence=result.get("sequence"), + row=result.get("row"), + usubjid=result.get("USUBJID"), + sequence=result.get("SEQ"), ) errors[result.get("dataset")].append(error_entity) validation_error_container = [ diff --git a/resources/schema/Rule_Type.md b/resources/schema/Rule_Type.md index 7836cfa4c..aa31c8764 100644 --- a/resources/schema/Rule_Type.md +++ b/resources/schema/Rule_Type.md @@ -189,14 +189,14 @@ Apply a JSONata query to a JSON file. [JSONata documentation](https://docs.jsona ```yaml Check: | - **.$filter($, $myutils.equals).{"record":path, "A":A, "B":B} + **.$filter($, $myutils.equals).{"row":path, "A":A, "B":B} Core: Id: JSONATA Test Status: Draft Outcome: Message: "A equals B" Output Variables: - - record + - row - A - B Rule Type: JSONata @@ -242,17 +242,17 @@ Sensitivity: Record "executionStatus": "success", "dataset": "", "domain": "", - "variables": ["A", "B", "record"], + "variables": ["A", "B", "row"], "message": "A equals B", "errors": [ { - "value": { "record": "", "A": "same value 1", "B": "same value 1" }, + "value": { "row": "", "A": "same value 1", "B": "same value 1" }, "dataset": "", "row": "" }, { "value": { - "record": "C.C", + "row": "C.C", "A": "same value 2", "B": "same value 2" }, @@ -268,12 +268,14 @@ Sensitivity: Record You can use `Outcome.Output Variables` to specify which properties to display from the result JSON. The following result property names will map to the column names in the Excel output report. -Result property name -> Report Issue Details Column Name: +Mapping of Result property names to Report Issue Details Column Names: -- `dataset` -> `Dataset` -- `usubjid` -> `USUBJID` -- `record` -> `Record` -- `sequence` -> `Sequence` +| JSONata Result Name | JSON report property | Excel Column | +| ------------------- | -------------------- | ------------ | +| dataset | dataset | Dataset | +| row | row | Record | +| SEQ | SEQ | Sequence | +| USUBJID | USUBJID | USUBJID | ### Scope diff --git a/tests/unit/test_utilities/test_jsonata_processor.py b/tests/unit/test_utilities/test_jsonata_processor.py index 037ac8560..b978de3ec 100644 --- a/tests/unit/test_utilities/test_jsonata_processor.py +++ b/tests/unit/test_utilities/test_jsonata_processor.py @@ -14,14 +14,14 @@ class TestJSONataProcessor(TestCase): rule = """ Check: | - **.$filter($, $utils.equals).{"record":path, "A":A, "B":B} + **.$filter($, $utils.equals).{"row":path, "A":A, "B":B} Core: Id: JSONATA Test Status: Draft Outcome: Message: "A equals B" Output Variables: - - record + - row - A - B Rule Type: JSONata @@ -52,17 +52,17 @@ class TestJSONataProcessor(TestCase): "executionStatus": "success", "dataset": None, "domain": None, - "variables": ["A", "B", "record"], + "variables": ["A", "B", "row"], "message": "A equals B", "errors": [ { - "value": {"record": "", "A": "same value 1", "B": "same value 1"}, + "value": {"row": "", "A": "same value 1", "B": "same value 1"}, "dataset": "", "row": "", }, { "value": { - "record": "C.C", + "row": "C.C", "A": "same value 2", "B": "same value 2", }, @@ -99,7 +99,7 @@ def test_jsonata_rule_parsing_error(self, mock_get_custom_functions: MagicMock): Outcome: Message: "A equals B" Output Variables: - - record + - row - A - B Rule Type: JSONata @@ -124,14 +124,14 @@ def test_jsonata_rule_parsing_error(self, mock_get_custom_functions: MagicMock): def test_jsonata_rule_execution_error(self, mock_get_custom_functions: MagicMock): rule = """ Check: | - **.$filter($, $missing_utils.equals).{"record":path, "A":A, "B":B} + **.$filter($, $missing_utils.equals).{"row":path, "A":A, "B":B} Core: Id: JSONATA Test Status: Draft Outcome: Message: "A equals B" Output Variables: - - record + - row - A - B Rule Type: JSONata From d755646df24049ac2e419c4f4b51e00f3a18f825 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Tue, 30 Sep 2025 15:30:01 -0400 Subject: [PATCH 19/23] jsonata scope readme update --- resources/schema/Rule_Type.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/schema/Rule_Type.md b/resources/schema/Rule_Type.md index aa31c8764..e657e6b5a 100644 --- a/resources/schema/Rule_Type.md +++ b/resources/schema/Rule_Type.md @@ -279,7 +279,7 @@ Mapping of Result property names to Report Issue Details Column Names: ### Scope -`Scope` should always `Include` `ALL` to ensure the rule will be run. The rule is only run once for the entire JSON file. The `Dataset` determination must come from the rule's jsonata result property. +A JSONata rule will always run once for the entire JSON file, regardless of the Scope. The `Dataset` determination must come from the rule's JSONata result property. ## Record Data From 17295c5c301b5f17b32ae4da93a1a45c7deb794d Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Fri, 10 Oct 2025 15:17:39 -0400 Subject: [PATCH 20/23] Add _path preprocessing --- .../jsonata_dataset_builder.py | 16 +++++++ resources/jsonata/get_path.jsonata | 45 ------------------- resources/jsonata/parent_rel.jsonata | 14 ------ resources/schema/Rule_Type.md | 14 +++--- .../test_utilities/test_jsonata_processor.py | 14 +++--- 5 files changed, 32 insertions(+), 71 deletions(-) delete mode 100644 resources/jsonata/get_path.jsonata delete mode 100644 resources/jsonata/parent_rel.jsonata diff --git a/cdisc_rules_engine/dataset_builders/jsonata_dataset_builder.py b/cdisc_rules_engine/dataset_builders/jsonata_dataset_builder.py index 04d9fd65f..9802b59fc 100644 --- a/cdisc_rules_engine/dataset_builders/jsonata_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/jsonata_dataset_builder.py @@ -2,6 +2,21 @@ from cdisc_rules_engine.dataset_builders.base_dataset_builder import BaseDatasetBuilder +def add_json_pointer_paths(node, path=""): + """ + Recursively adds a '_path' attribute to each dict node in the JSON structure, + using JSON Pointer syntax. + """ + if isinstance(node, dict): + node["_path"] = path + for key, value in node.items(): + if key != "_path": + add_json_pointer_paths(value, f"{path}/{key}") + elif isinstance(node, list): + for idx, item in enumerate(node): + add_json_pointer_paths(item, f"{path}/{idx}") + + class JSONataDatasetBuilder(BaseDatasetBuilder): def get_dataset(self, **kwargs): @@ -16,4 +31,5 @@ def get_dataset(self, **kwargs): return None with self.data_service.read_data(dataset_path) as fp: json = load(fp) + add_json_pointer_paths(json) return json diff --git a/resources/jsonata/get_path.jsonata b/resources/jsonata/get_path.jsonata deleted file mode 100644 index 1bf2e753a..000000000 --- a/resources/jsonata/get_path.jsonata +++ /dev/null @@ -1,45 +0,0 @@ -{ - "get_path": function($tgtobj,$within) - { - ( - $pref := function($this, $parent) - { - ( - $pa := $filter($keys($parent), - function($k) - { - ( - $v := $lookup($parent,$k); - $type($v) = "array" ? $this in $v : $v = $this - ) - } - ); - $pav := $lookup($parent,$pa); - $type($pav) = "array" - ? $map( - $pav, - function($av,$ai) - { - $av=$this ? $ak := $pa & "[" & $ai & "]" - } - ) - : $pa - ) - }; - $iter := function($t,$acc,$cnt) - { - ( - $t = $within or $cnt > 20 - ? $acc - : - ( - $p := $within.**.*[$=$t].%; - $pa := $pref($t,$p); - $iter($p,$append([$pa],$acc),$cnt + 1) - ) - ) - }; - $iter($tgtobj,[],1) ~> $join(".") - ) - } -} \ No newline at end of file diff --git a/resources/jsonata/parent_rel.jsonata b/resources/jsonata/parent_rel.jsonata deleted file mode 100644 index f6809a454..000000000 --- a/resources/jsonata/parent_rel.jsonata +++ /dev/null @@ -1,14 +0,0 @@ -{ - "parent_rel" : function($this, $parent) - { - $filter($keys($parent), - function($k) - { - ( - $v := $lookup($parent,$k); - $type($v) = "array" ? $this in $v : $v = $this - ) - } - ) - } -} \ No newline at end of file diff --git a/resources/schema/Rule_Type.md b/resources/schema/Rule_Type.md index e657e6b5a..c40ad9a84 100644 --- a/resources/schema/Rule_Type.md +++ b/resources/schema/Rule_Type.md @@ -189,7 +189,7 @@ Apply a JSONata query to a JSON file. [JSONata documentation](https://docs.jsona ```yaml Check: | - **.$filter($, $myutils.equals).{"row":path, "A":A, "B":B} + **.$filter($, $myutils.equals).{"row":_path, "A":A, "B":B} Core: Id: JSONATA Test Status: Draft @@ -222,14 +222,12 @@ Sensitivity: Record ```json { - "path": "", "A": "same value 1", "B": "same value 1", "C": { - "path": "C", "A": "different value 1", "B": "different value 2", - "C": { "path": "C.C", "A": "same value 2", "B": "same value 2" } + "C": { "A": "same value 2", "B": "same value 2" } } } ``` @@ -252,18 +250,22 @@ Sensitivity: Record }, { "value": { - "row": "C.C", + "row": "/C/C", "A": "same value 2", "B": "same value 2" }, "dataset": "", - "row": "C.C" + "row": "/C/C" } ] } ] ``` +### Preprocessing + +When the JSONata Rule Type is used, the input JSON file will be preprocessed to assign a `_path` attribute to each node in the JSON tree. The syntax for this path value will use the [JSON Pointer](https://datatracker.ietf.org/doc/html/rfc6901) syntax. This `_path` attribute can be referenced throughout the JSONata query. + ### Output Variables and Report column mapping You can use `Outcome.Output Variables` to specify which properties to display from the result JSON. The following result property names will map to the column names in the Excel output report. diff --git a/tests/unit/test_utilities/test_jsonata_processor.py b/tests/unit/test_utilities/test_jsonata_processor.py index b978de3ec..dbeaf1f20 100644 --- a/tests/unit/test_utilities/test_jsonata_processor.py +++ b/tests/unit/test_utilities/test_jsonata_processor.py @@ -1,6 +1,9 @@ from unittest import TestCase from unittest.mock import MagicMock, patch from yaml import safe_load +from cdisc_rules_engine.dataset_builders.jsonata_dataset_builder import ( + add_json_pointer_paths, +) from cdisc_rules_engine.exceptions.custom_exceptions import ( MissingDataError, RuleExecutionError, @@ -14,7 +17,7 @@ class TestJSONataProcessor(TestCase): rule = """ Check: | - **.$filter($, $utils.equals).{"row":path, "A":A, "B":B} + **.$filter($, $utils.equals).{"row":_path, "A":A, "B":B} Core: Id: JSONATA Test Status: Draft @@ -37,14 +40,12 @@ class TestJSONataProcessor(TestCase): }; """ dataset = { - "path": "", "A": "same value 1", "B": "same value 1", "C": { - "path": "C", "A": "different value 1", "B": "different value 2", - "C": {"path": "C.C", "A": "same value 2", "B": "same value 2"}, + "C": {"A": "same value 2", "B": "same value 2"}, }, } expected = [ @@ -62,12 +63,12 @@ class TestJSONataProcessor(TestCase): }, { "value": { - "row": "C.C", + "row": "/C/C", "A": "same value 2", "B": "same value 2", }, "dataset": "", - "row": "C.C", + "row": "/C/C", }, ], } @@ -79,6 +80,7 @@ class TestJSONataProcessor(TestCase): def test_jsonata_processor(self, mock_get_custom_functions: MagicMock): mock_get_custom_functions.return_value = self.get_custom_functions rule = Rule.from_cdisc_metadata(safe_load(self.rule)) + add_json_pointer_paths(self.dataset) result = JSONataProcessor.execute_jsonata_rule( rule=rule, dataset=self.dataset, From 07fa834417e3703bb07486dc36dc4e58b769f468 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Mon, 13 Oct 2025 14:10:07 -0400 Subject: [PATCH 21/23] format fix --- cdisc_rules_engine/rules_engine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py index 6f096a697..c2998d3d6 100644 --- a/cdisc_rules_engine/rules_engine.py +++ b/cdisc_rules_engine/rules_engine.py @@ -116,7 +116,10 @@ def validate_single_rule(self, rule: dict, datasets: Iterable[SDTMDatasetMetadat else: total_errors = 0 for dataset_metadata in datasets: - if self.max_errors_per_rule and total_errors >= self.max_errors_per_rule: + if ( + self.max_errors_per_rule + and total_errors >= self.max_errors_per_rule + ): logger.info( f"Rule {rule.get('core_id')}: Error limit ({self.max_errors_per_rule}) reached. " f"Skipping remaining datasets." From 6ab501500e3b545db311bfd00dcf1d81af897d9c Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Mon, 13 Oct 2025 14:33:33 -0400 Subject: [PATCH 22/23] fix merge bug --- cdisc_rules_engine/rules_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py index c2998d3d6..ee2723a97 100644 --- a/cdisc_rules_engine/rules_engine.py +++ b/cdisc_rules_engine/rules_engine.py @@ -132,7 +132,7 @@ def validate_single_rule(self, rule: dict, datasets: Iterable[SDTMDatasetMetadat dataset_results = self.validate_single_dataset( rule, datasets, - SDTMDatasetMetadata(name="json"), + dataset_metadata, ) results[dataset_metadata.unsplit_name] = dataset_results for result in dataset_results: From 770f8fa12e933158ab1e1276bbf388d6f50426da Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Mon, 13 Oct 2025 14:37:40 -0400 Subject: [PATCH 23/23] missing test new args --- tests/unit/test_services/test_data_service/test_data_service.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unit/test_services/test_data_service/test_data_service.py b/tests/unit/test_services/test_data_service/test_data_service.py index f129057a7..00b4cb208 100644 --- a/tests/unit/test_services/test_data_service/test_data_service.py +++ b/tests/unit/test_services/test_data_service/test_data_service.py @@ -205,6 +205,7 @@ def test_get_dataset_class(dataset_metadata, data, expected_class): False, None, None, + None, ) ) data_service = LocalDataService( @@ -285,6 +286,7 @@ def test_get_dataset_class_associated_domains(): False, None, None, + None, ) ) data_service = LocalDataService(