From 98c8208280fee8593a728300fd47af20bd2d397b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 16 Jul 2025 14:46:14 +0000 Subject: [PATCH 1/2] Initial plan From 2bdd75427e2d1d03deafb19aba0dc8533eb8ec87 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 16 Jul 2025 15:03:42 +0000 Subject: [PATCH 2/2] Fix missing metadata fields in label_referenced_variable_metadata operation Co-authored-by: gerrycampion <85252124+gerrycampion@users.noreply.github.com> --- .../label_referenced_variable_metadata.py | 8 + ...test_label_referenced_variable_metadata.py | 145 ++++++++++++++++++ 2 files changed, 153 insertions(+) diff --git a/cdisc_rules_engine/operations/label_referenced_variable_metadata.py b/cdisc_rules_engine/operations/label_referenced_variable_metadata.py index 5d8952e74..76deb3f16 100644 --- a/cdisc_rules_engine/operations/label_referenced_variable_metadata.py +++ b/cdisc_rules_engine/operations/label_referenced_variable_metadata.py @@ -9,6 +9,14 @@ def _execute_operation(self): found in the column provided in self.params.target. """ variables_metadata = self._get_variables_metadata_from_standard() + + # Ensure all expected metadata fields are present + expected_fields = ["name", "role", "ordinal", "label"] + for metadata in variables_metadata: + for field in expected_fields: + if field not in metadata: + metadata[field] = "" + df = self.evaluation_dataset.__class__.from_records(variables_metadata) df.data = df.data.add_prefix(f"{self.params.operation_id}_") target_columns = df.columns diff --git a/tests/unit/test_operations/test_label_referenced_variable_metadata.py b/tests/unit/test_operations/test_label_referenced_variable_metadata.py index 62ec54006..7feeebd6f 100644 --- a/tests/unit/test_operations/test_label_referenced_variable_metadata.py +++ b/tests/unit/test_operations/test_label_referenced_variable_metadata.py @@ -221,3 +221,148 @@ def mock_cached_method(*args, **kwargs): ].values == "AENEW" ) + + +@pytest.mark.parametrize("dataset_type", [(PandasDataset)]) +def test_get_label_referenced_variable_metadata_missing_role_field( + operation_params: OperationParams, dataset_type +): + """Test that the operation works correctly when no variables have a role field.""" + model_metadata = { + "datasets": [ + { + "name": "FA", + "datasetVariables": [ + { + "name": "FATERM", + "ordinal": 4, + }, # No role field + ], + } + ], + "classes": [ + { + "name": "FINDINGS ABOUT", + "classVariables": [ + {"name": "--TERM", "ordinal": 1}, # No role field + ], + }, + { + "name": "FINDINGS", + "classVariables": [ + {"name": "--TEST", "ordinal": 1}, # No role field + {"name": "--TESTCD", "ordinal": 2}, # No role field + ], + }, + { + "name": GENERAL_OBSERVATIONS_CLASS, + "classVariables": [ + { + "name": "DOMAIN", + "ordinal": 2, + }, # No role field + { + "name": "STUDYID", + "ordinal": 1, + }, # No role field + ], + }, + ], + } + standard_metadata = { + "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"FA"}, + "classes": [ + { + "name": "FINDINGS ABOUT", + "datasets": [ + { + "name": "FA", + "label": "Findings About", + "datasetVariables": [ + {"name": "FATESTCD", "ordinal": 1, "label": "Test Code"}, + {"name": "FATEST", "ordinal": 2, "label": "Test Name"}, + ], + } + ], + } + ], + } + operation_params.dataframe = dataset_type.from_dict( + { + "STUDYID": ["TEST_STUDY", "TEST_STUDY"], + "DOMAIN": ["FA", "FA"], + "FATERM": ["Test Code", "Test Name"], + } + ) + operation_params.domain = "FA" + operation_params.standard = "sdtmig" + operation_params.standard_version = "3-4" + operation_params.target = "FATERM" + operation_params.operation_id = "$label_referenced_variable" + + # save model metadata to cache + cache = InMemoryCacheService.get_instance() + + library_metadata = LibraryMetadataContainer( + standard_metadata=standard_metadata, model_metadata=model_metadata + ) + mock_dataset_class = Mock() + mock_dataset_class.name = "Findings About" + # execute operation + data_service = LocalDataService.get_instance( + cache_service=cache, config=ConfigService() + ) + data_service.get_dataset_class = Mock(return_value=mock_dataset_class) + operation = LabelReferencedVariableMetadata( + operation_params, + operation_params.dataframe, + cache, + data_service, + library_metadata, + ) + + def mock_cached_method(*args, **kwargs): + return operation_params.dataframe + + with patch( + "cdisc_rules_engine.services.data_services.LocalDataService.get_raw_dataset_metadata", + side_effect=mock_cached_method, + ): + result: pd.DataFrame = operation.execute() + + # Test that all expected columns are present, even when no variables have a role field + expected_columns = [ + "STUDYID", + "DOMAIN", + "FATERM", + "$label_referenced_variable_name", + "$label_referenced_variable_role", + "$label_referenced_variable_ordinal", + "$label_referenced_variable_label", + ] + + actual_columns = result.columns.to_list() + + # Check that all expected columns are present (order doesn't matter) + for col in expected_columns: + assert col in actual_columns, f"Expected column {col} not found in result" + + # Test that the role column exists and can be accessed (even if empty) + role_values = result.data["$label_referenced_variable_role"].tolist() + assert isinstance(role_values, list) + assert len(role_values) == 2 # Should have 2 entries for the 2 test rows + + # Test that the operation correctly matched the labels to variable names + assert ( + result.data[result["$label_referenced_variable_label"] == "Test Code"][ + "$label_referenced_variable_name" + ].values + == "FATESTCD" + ) + assert ( + result.data[result["$label_referenced_variable_label"] == "Test Name"][ + "$label_referenced_variable_name" + ].values + == "FATEST" + )