From 3d0a1ba61faef66e2136aae130a908228135ceaa Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 29 Jan 2025 19:43:40 +0000 Subject: [PATCH] Add validators for bboxes annotation files (#32) * Add json schemas * Add validators for VIA and COCO files * Update MANIFEST * Add tests for supported validators * Combine validators tests * Simplify JSON check * Delete placeholder * Rename schemas * Small edits caps * Update docstrings * Incorporate Niko's comments * Remove spell check suggestion from error message * Combine get_default_schema into 1 * Clarify init=False * Missing verb * log_message --> error_message * A simplified version of _extract_properties_keys based on Niko's --- MANIFEST.in | 4 + ethology/annotations/json_schemas/__init__.py | 0 .../json_schemas/schemas/COCO_schema.json | 78 ++++ .../json_schemas/schemas/README.md | 32 ++ .../json_schemas/schemas/VIA_schema.json | 88 ++++ ethology/annotations/json_schemas/utils.py | 130 ++++++ ethology/annotations/validators.py | 193 +++++++++ .../test_annotations/test_placeholder.py | 2 - .../test_annotations/test_validators.py | 408 ++++++++++++++++++ 9 files changed, 933 insertions(+), 2 deletions(-) create mode 100644 ethology/annotations/json_schemas/__init__.py create mode 100644 ethology/annotations/json_schemas/schemas/COCO_schema.json create mode 100644 ethology/annotations/json_schemas/schemas/README.md create mode 100644 ethology/annotations/json_schemas/schemas/VIA_schema.json create mode 100644 ethology/annotations/json_schemas/utils.py create mode 100644 ethology/annotations/validators.py delete mode 100644 tests/test_unit/test_annotations/test_placeholder.py create mode 100644 tests/test_unit/test_annotations/test_validators.py diff --git a/MANIFEST.in b/MANIFEST.in index e16ea33..63adff3 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,3 +6,7 @@ recursive-exclude * __pycache__ recursive-exclude * *.py[co] recursive-exclude docs * recursive-exclude tests * + +# Include json schemas +recursive-include ethology/annotations/json_schemas/schemas *.json +recursive-include ethology/annotations/json_schemas/schemas *.md diff --git a/ethology/annotations/json_schemas/__init__.py b/ethology/annotations/json_schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ethology/annotations/json_schemas/schemas/COCO_schema.json b/ethology/annotations/json_schemas/schemas/COCO_schema.json new file mode 100644 index 0000000..3793027 --- /dev/null +++ b/ethology/annotations/json_schemas/schemas/COCO_schema.json @@ -0,0 +1,78 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "info": { + "type": "object" + }, + "licenses": { + "type": "array" + }, + "images": { + "type": "array", + "items": { + "type": "object", + "properties": { + "file_name": { + "type": "string" + }, + "id": { + "type": "integer" + }, + "width": { + "type": "integer" + }, + "height": { + "type": "integer" + } + } + } + }, + "annotations": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "image_id": { + "type": "integer" + }, + "bbox": { + "type": "array", + "items": { + "type": "integer" + } + }, + "category_id": { + "type": "integer" + }, + "area": { + "type": "number" + }, + "iscrowd": { + "type": "integer" + } + } + } + }, + "categories": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "name": { + "type": "string" + }, + "supercategory": { + "type": "string" + } + } + } + } + } +} diff --git a/ethology/annotations/json_schemas/schemas/README.md b/ethology/annotations/json_schemas/schemas/README.md new file mode 100644 index 0000000..5976fb2 --- /dev/null +++ b/ethology/annotations/json_schemas/schemas/README.md @@ -0,0 +1,32 @@ +## JSON schemas for manual annotations files. + +We use JSON schemas to validate the types of a supported annotation file. + +Note that the schema validation only checks the type of a key if that key is present. It does not check for the presence of the keys. + +If the meta-schema (under $schema) is not provided, the jsonschema validator uses the the latest released draft of the JSON schema specification. + +## VIA schema + +The VIA schema corresponds to the format exported by VGG Image Annotator 2.x.y (VIA) for object detection annotations. + +Each image under `_via_img_metadata` is indexed using a unique key: FILENAME-FILESIZE. We use "additionalProperties" to allow for any key name, see https://stackoverflow.com/a/69811612/24834957. + +The section `_via_image_id_list` contains an ordered list of image keys using a unique key: `FILENAME-FILESIZE`, the position in the list defines the image ID. + +The section `_via_attributes` contains region attributes and file attributes, to display in VIA's UI and to classify the data. + +The section `_via_data_format_version` contains the version of the VIA tool used. + + +## COCO schema +The COCO schema follows the COCO dataset format for object detection, see https://cocodataset.org/#format-data. + +Box coordinates are measured from the top left corner of the image, and are 0-indexed. +### References +---------- +- https://github.com/python-jsonschema/jsonschema +- https://json-schema.org/understanding-json-schema/ +- https://cocodataset.org/#format-data +- https://gitlab.com/vgg/via/-/blob/master/via-2.x.y/CodeDoc.md?ref_type=heads#description-of-via-project-json-file +- https://python-jsonschema.readthedocs.io/en/stable/api/#jsonschema.validate diff --git a/ethology/annotations/json_schemas/schemas/VIA_schema.json b/ethology/annotations/json_schemas/schemas/VIA_schema.json new file mode 100644 index 0000000..8017a90 --- /dev/null +++ b/ethology/annotations/json_schemas/schemas/VIA_schema.json @@ -0,0 +1,88 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "_via_settings": { + "type": "object", + "properties": { + "ui": { + "type": "object" + }, + "core": { + "type": "object" + }, + "project": { + "type": "object" + } + } + }, + "_via_img_metadata": { + "type": "object", + "additionalProperties": { + "type": "object", + "properties": { + "filename": { + "type": "string" + }, + "size": { + "type": "integer" + }, + "regions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "shape_attributes": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "x": { + "type": "integer" + }, + "y": { + "type": "integer" + }, + "width": { + "type": "integer" + }, + "height": { + "type": "integer" + } + } + }, + "region_attributes": { + "type": "object" + } + } + } + }, + "file_attributes": { + "type": "object" + } + } + } + }, + "_via_image_id_list": { + "type": "array", + "items": { + "type": "string" + } + }, + "_via_attributes": { + "type": "object", + "properties": { + "region": { + "type": "object" + }, + "file": { + "type": "object" + } + } + }, + "_via_data_format_version": { + "type": "string" + } + } +} diff --git a/ethology/annotations/json_schemas/utils.py b/ethology/annotations/json_schemas/utils.py new file mode 100644 index 0000000..84ca7ce --- /dev/null +++ b/ethology/annotations/json_schemas/utils.py @@ -0,0 +1,130 @@ +"""Utility functions for JSON schema files.""" + +import json +from pathlib import Path + +import jsonschema + + +def _get_default_schema(schema_name: str) -> dict: + """Get the default VIA or COCO schema as a dictionary.""" + schema_path = ( + Path(__file__).parent / "schemas" / f"{schema_name}_schema.json" + ) + with open(schema_path) as file: + schema_dict = json.load(file) + return schema_dict + + +def _check_file_is_json(filepath: Path): + """Check the input file can be read as a JSON.""" + try: + with open(filepath) as file: + json.load(file) + except json.JSONDecodeError as decode_error: + # We override the error message for clarity + raise ValueError( + f"Error decoding JSON data from file: {filepath}. " + "The data being deserialized is not a valid JSON. " + ) from decode_error + + +def _check_file_matches_schema(filepath: Path, schema: dict | None): + """Check the input JSON file matches the given schema. + + The schema validation only checks the type for each specified + key if the key exists. It does not check that the keys in the + schema are present in the JSON file. + """ + # Read json file + with open(filepath) as file: + data = json.load(file) + + # Check against schema if provided + if schema: + jsonschema.validate(instance=data, schema=schema) + + +def _check_required_properties_keys( + required_properties_keys: list, schema: dict +): + """Check the input schema includes the required "properties" keys.""" + # Get keys of "properties" dictionaries in schema + properties_keys_in_schema = _extract_properties_keys(schema) + + # Get list of "properties" keys that are required but not in schema + missing_keys = set(required_properties_keys) - set( + properties_keys_in_schema + ) + + # Raise error if there are missing keys in the schema + if missing_keys: + raise ValueError( + f"Required key(s) {sorted(missing_keys)} not found " "in schema." + ) + + +def _check_required_keys_in_dict( + list_required_keys: list[str], + data: dict, + additional_message: str = "", +): + """Check if the required keys are present in the input dictionary.""" + missing_keys = set(list_required_keys) - set(data.keys()) + if missing_keys: + raise ValueError( + f"Required key(s) {sorted(missing_keys)} not " + f"found{additional_message}." + ) + + +def _extract_properties_keys(input_schema: dict, prefix: str = "") -> list: + """Extract keys from all "properties" subdictionaries in a JSON schema. + + Recursively extract the keys of all subdictionaries in the input + dictionary that are values to a "properties" key. The input dictionary + represents a JSON schema dictionary + (see https://json-schema.org/understanding-json-schema/about). The output + is a sorted list of strings with full paths (e.g. 'parent/child'). + + The "properties" key always appears as part of a set of dictionary keys + with at least another key being "type" or "item". We use this to find the + relevant subdictionaries. + + """ + result: list[str] = [] + + # Skip if "type" key is missing in the schema + if "type" not in input_schema: + return result + + # If the input dictionary has a "properties" key: extract keys + # and recurse into nested dictionaries + if "properties" in input_schema: + for key, value in input_schema["properties"].items(): + full_key = f"{prefix}/{key}" if prefix else key + result.append(full_key) + # Recurse into nested dictionaries to look for more "properties" + # dicts + result.extend(_extract_properties_keys(value, full_key)) + + # If dictionary has "additionalProperties" key: recurse into it + if "additionalProperties" in input_schema: + result.extend( + _extract_properties_keys( + input_schema["additionalProperties"], + prefix, + ) + ) + + # If dictionary has "items" key: recurse into it + if "items" in input_schema: + result.extend( + _extract_properties_keys( + input_schema["items"], + prefix, + ) + ) + + # Return sorted list of keys with full paths + return sorted(result) diff --git a/ethology/annotations/validators.py b/ethology/annotations/validators.py new file mode 100644 index 0000000..e830ce8 --- /dev/null +++ b/ethology/annotations/validators.py @@ -0,0 +1,193 @@ +"""Validators for supported annotation files.""" + +import json +from pathlib import Path + +from attrs import define, field + +from ethology.annotations.json_schemas.utils import ( + _check_file_is_json, + _check_file_matches_schema, + _check_required_keys_in_dict, + _get_default_schema, +) + + +@define +class ValidVIA: + """Class for valid VIA JSON files. + + It checks the input file is a valid JSON file, matches + the VIA schema and contains the required keys. + + + Attributes + ---------- + path : pathlib.Path + Path to the VIA JSON file, passed as an input. + schema : dict + The JSON schema is set to the default VIA schema. + required_keys : dict + The required keys for the VIA JSON file. + + Raises + ------ + ValueError + If the JSON file cannot be decoded. + jsonschema.exceptions.ValidationError + If the type of any of the keys in the JSON file + does not match the type specified in the schema. + jsonschema.exceptions.SchemaError + If the schema is invalid. + ValueError + If the VIA JSON file is missing any of the required keys. + + """ + + path: Path = field() + schema: dict = field( + default=_get_default_schema("VIA"), + init=False, + ) + required_keys: dict = field( + default={ + "main": ["_via_img_metadata", "_via_image_id_list"], + "images": ["filename", "regions"], + "regions": ["shape_attributes", "region_attributes"], + "shape_attributes": ["x", "y", "width", "height"], + }, + init=False, + # with init=False the attribute is always initialized + # with the default value + ) + + # Note: the validators are applied in order + @path.validator + def _file_is_json(self, attribute, value): + _check_file_is_json(value) + + @path.validator + def _file_matches_JSON_schema(self, attribute, value): + _check_file_matches_schema(value, self.schema) + + @path.validator + def _file_contains_required_keys(self, attribute, value): + """Ensure that the VIA JSON file contains the required keys.""" + # Read data as dict + with open(value) as file: + data = json.load(file) + + # Check first level keys + _check_required_keys_in_dict(self.required_keys["main"], data) + + # Check keys in nested dicts + for img_str, img_dict in data["_via_img_metadata"].items(): + # Check keys for each image dictionary + _check_required_keys_in_dict( + self.required_keys["images"], + img_dict, + additional_message=f" for {img_str}", + ) + + # Check keys for each region in an image + for i, region in enumerate(img_dict["regions"]): + # Check keys under first level per region + _check_required_keys_in_dict( + self.required_keys["regions"], + region, + additional_message=f" for region {i} under {img_str}", + ) + + # Check keys under "shape_attributes" per region + _check_required_keys_in_dict( + self.required_keys["shape_attributes"], + region["shape_attributes"], + additional_message=f" for region {i} under {img_str}", + ) + + +@define +class ValidCOCO: + """Class for valid COCO JSON files. + + It checks the input file is a valid JSON file, matches + the COCO schema and contains the required keys. + + Attributes + ---------- + path : pathlib.Path + Path to the COCO JSON file, passed as an input. + schema : dict + The JSON schema is set to the default COCO schema. + required_keys : dict + The required keys for the COCO JSON file. + + Raises + ------ + ValueError + If the JSON file cannot be decoded. + jsonschema.exceptions.ValidationError + If the type of any of the keys in the JSON file + does not match the type specified in the schema. + jsonschema.exceptions.SchemaError + If the schema is invalid. + ValueError + If the COCO JSON file is missing any of the required keys. + + """ + + path: Path = field() + schema: dict = field( + default=_get_default_schema("COCO"), + init=False, + # with init=False the attribute is always initialized + # with the default value + ) + + # The keys of "required_keys" match the 1st level keys in a COCO JSON file + required_keys: dict = field( + default={ + "main": ["images", "annotations", "categories"], + "images": ["id", "file_name"], + "annotations": ["id", "image_id", "bbox", "category_id"], + "categories": ["id", "name", "supercategory"], + }, + init=False, + ) + + # Note: the validators are applied in order + @path.validator + def _file_is_json(self, attribute, value): + _check_file_is_json(value) + + @path.validator + def _file_matches_JSON_schema(self, attribute, value): + _check_file_matches_schema(value, self.schema) + + @path.validator + def _file_contains_required_keys(self, attribute, value): + """Ensure that the COCO JSON file contains the required keys.""" + + # Helper function to singularise the input key for the + # error message + def _singularise_err_msg(key): + return key[:-1] if key != "categories" else key[:-3] + "y" + + # Read file as dict + with open(value) as file: + data = json.load(file) + + # Check first level keys + _check_required_keys_in_dict(self.required_keys["main"], data) + + # Check keys in every dict listed under the "images", "annotations" + # and "categories" keys + for ky in list(self.required_keys.keys())[1:]: + for instance_dict in data[ky]: + _check_required_keys_in_dict( + self.required_keys[ky], + instance_dict, + additional_message=( + f" for {_singularise_err_msg(ky)} {instance_dict}" + ), + ) diff --git a/tests/test_unit/test_annotations/test_placeholder.py b/tests/test_unit/test_annotations/test_placeholder.py deleted file mode 100644 index 3ada1ee..0000000 --- a/tests/test_unit/test_annotations/test_placeholder.py +++ /dev/null @@ -1,2 +0,0 @@ -def test_placeholder(): - assert True diff --git a/tests/test_unit/test_annotations/test_validators.py b/tests/test_unit/test_annotations/test_validators.py new file mode 100644 index 0000000..07ac436 --- /dev/null +++ b/tests/test_unit/test_annotations/test_validators.py @@ -0,0 +1,408 @@ +import json +from contextlib import nullcontext as does_not_raise +from pathlib import Path + +import jsonschema +import pytest + +from ethology.annotations.json_schemas.utils import ( + _check_required_keys_in_dict, + _check_required_properties_keys, + _extract_properties_keys, +) +from ethology.annotations.validators import ValidCOCO, ValidVIA + + +@pytest.fixture() +def json_file_decode_error(tmp_path: Path) -> Path: + """Return path to a JSON file with a decoding error.""" + json_file = tmp_path / "JSON_decode_error.json" + with open(json_file, "w") as f: + f.write("just-a-string") + return json_file + + +@pytest.fixture() +def json_file_not_found_error(tmp_path: Path) -> Path: + """Return path to a JSON file that does not exist.""" + return tmp_path / "JSON_file_not_found.json" + + +@pytest.fixture() +def VIA_file_schema_mismatch( + annotations_test_data: dict, + tmp_path: Path, +) -> Path: + """Return path to a VIA JSON file that does not match its schema. + + Specifically, we modify the type of the "width" of the first bounding box + in the first image, from "int" to "str" + """ + # Read valid JSON file + valid_VIA_file_sample_1 = annotations_test_data["VIA_JSON_sample_1.json"] + with open(valid_VIA_file_sample_1) as f: + data = json.load(f) + + # Modify file so that it doesn't match the corresponding schema + # (make width a string) + _, img_dict = list(data["_via_img_metadata"].items())[0] + img_dict["regions"][0]["shape_attributes"]["width"] = "49" + + # Save the modified JSON to a new file + out_json = tmp_path / f"{valid_VIA_file_sample_1.stem}_schema_error.json" + with open(out_json, "w") as f: + json.dump(data, f) + return out_json + + +@pytest.fixture() +def COCO_file_schema_mismatch( + annotations_test_data: dict, + tmp_path: Path, +) -> Path: + """Return path to a COCO JSON file that doesn't match its schema. + + Specifically, we modify the type of the object under the "annotations" + key from "list of dicts" to "list" + """ + # Read valid JSON file + valid_COCO_file_sample_1 = annotations_test_data["COCO_JSON_sample_1.json"] + with open(valid_COCO_file_sample_1) as f: + data = json.load(f) + + # Modify file so that it doesn't match the corresponding schema + data["annotations"] = [1, 2, 3] # [d] for d in data["annotations"]] + + # save the modified json to a new file + out_json = tmp_path / f"{valid_COCO_file_sample_1.stem}_schema_error.json" + with open(out_json, "w") as f: + json.dump(data, f) + return out_json + + +@pytest.fixture() +def small_schema() -> dict: + """Small schema with properties keys: + ["a", "b", "b/b1", "c", "c/c1", "c/c2"]. + """ + return { + "type": "object", + "properties": { + "a": { + "type": "array", + "items": {"type": "string"}, + }, + "b": { + "type": "object", + "properties": {"b1": {"type": "string"}}, + }, + "c": { + "type": "object", + "properties": { + "c1": {"type": "string"}, + "c2": {"type": "string"}, + }, + }, + }, + } + + +@pytest.fixture() +def default_VIA_schema() -> dict: + """Get default VIA schema.""" + from ethology.annotations.json_schemas.utils import _get_default_schema + + return _get_default_schema("VIA") + + +@pytest.fixture() +def default_COCO_schema() -> dict: + """Get default COCO schema.""" + from ethology.annotations.json_schemas.utils import ( + _get_default_schema, + ) + + return _get_default_schema("COCO") + + +@pytest.mark.parametrize( + "input_file, validator", + [ + ("VIA_JSON_sample_1.json", ValidVIA), + ("VIA_JSON_sample_2.json", ValidVIA), + ("COCO_JSON_sample_1.json", ValidCOCO), + ("COCO_JSON_sample_2.json", ValidCOCO), + ], +) +def test_validators_valid_input_files( + input_file: str, + validator: type[ValidVIA | ValidCOCO], + annotations_test_data: dict, +): + """Test the file validator with valid inputs.""" + filepath = annotations_test_data[input_file] + with does_not_raise(): + validator(path=filepath) + + +@pytest.mark.parametrize( + "invalid_input_file, validator, expected_exception, error_message", + [ + ( + "json_file_decode_error", + ValidVIA, + pytest.raises(ValueError), + "Error decoding JSON data from file", + ), + ( + "json_file_not_found_error", + ValidVIA, + pytest.raises(FileNotFoundError), + "No such file or directory: ", + ), + ( + "json_file_decode_error", + ValidCOCO, + pytest.raises(ValueError), + "Error decoding JSON data from file", + ), + ( + "json_file_not_found_error", + ValidCOCO, + pytest.raises(FileNotFoundError), + "No such file or directory: ", + ), + ( + "VIA_file_schema_mismatch", + ValidVIA, + pytest.raises(jsonschema.exceptions.ValidationError), + "'49' is not of type 'integer'", + ), + ( + "COCO_file_schema_mismatch", + ValidCOCO, + pytest.raises(jsonschema.exceptions.ValidationError), + "3 is not of type 'object'", + ), + ], +) +def test_validators_invalid_input_files( + invalid_input_file: str, + validator: type[ValidVIA | ValidCOCO], + expected_exception: pytest.raises, + error_message: str, + request: pytest.FixtureRequest, +): + """Test the validators throw the expected errors when passed invalid + inputs. + """ + invalid_json_file = request.getfixturevalue(invalid_input_file) + + with expected_exception as excinfo: + validator(path=invalid_json_file) + + # Check that the error message contains expected string + assert error_message in str(excinfo.value) + + # Check the error message contains file path + if not isinstance(excinfo.value, jsonschema.exceptions.ValidationError): + assert invalid_json_file.name in str(excinfo.value) + + +@pytest.mark.parametrize( + "schema, expected_properties_keys", + [ + ("small_schema", ["a", "b", "b/b1", "c", "c/c1", "c/c2"]), + ( + "default_VIA_schema", + [ + "_via_attributes", + "_via_attributes/file", + "_via_attributes/region", + "_via_data_format_version", + "_via_image_id_list", + "_via_img_metadata", + "_via_img_metadata/file_attributes", + "_via_img_metadata/filename", + "_via_img_metadata/regions", + "_via_img_metadata/regions/region_attributes", + "_via_img_metadata/regions/shape_attributes", + "_via_img_metadata/regions/shape_attributes/height", + "_via_img_metadata/regions/shape_attributes/name", + "_via_img_metadata/regions/shape_attributes/width", + "_via_img_metadata/regions/shape_attributes/x", + "_via_img_metadata/regions/shape_attributes/y", + "_via_img_metadata/size", + "_via_settings", + "_via_settings/core", + "_via_settings/project", + "_via_settings/ui", + ], + ), + ( + "default_COCO_schema", + [ + "annotations", + "annotations/area", + "annotations/bbox", + "annotations/category_id", + "annotations/id", + "annotations/image_id", + "annotations/iscrowd", + "categories", + "categories/id", + "categories/name", + "categories/supercategory", + "images", + "images/file_name", + "images/height", + "images/id", + "images/width", + "info", + "licenses", + ], + ), + ], +) +def test_extract_properties_keys( + schema: dict, + expected_properties_keys: list, + request: pytest.FixtureRequest, +): + """Test the _extract_properties_keys helper function.""" + schema = request.getfixturevalue(schema) + assert _extract_properties_keys(schema) == sorted(expected_properties_keys) + + +@pytest.mark.parametrize( + "list_required_keys, data_dict, additional_message, expected_exception", + [ + ( + ["images", "annotations", "categories"], + {"images": "", "annotations": "", "categories": ""}, + "", + does_not_raise(), + ), # zero missing keys + ( + ["images", "annotations", "categories"], + {"annotations": "", "categories": ""}, + "", + pytest.raises(ValueError), + ), # one missing key + ( + ["images", "annotations", "categories"], + {"annotations": ""}, + "", + pytest.raises(ValueError), + ), # two missing keys + ( + ["images", "annotations", "categories"], + {"annotations": "", "categories": ""}, + "FOO", + pytest.raises(ValueError), + ), # one missing key with additional message + ], +) +def test_check_required_keys_in_dict( + list_required_keys: list, + data_dict: dict, + additional_message: str, + expected_exception: pytest.raises, +): + """Test the _check_required_keys_in_dict helper function.""" + with expected_exception as excinfo: + _check_required_keys_in_dict( + list_required_keys, data_dict, additional_message + ) + + if excinfo: + missing_keys = set(list_required_keys) - data_dict.keys() + assert str(excinfo.value) == ( + f"Required key(s) {sorted(missing_keys)} " + f"not found{additional_message}." + ) + + +def test_check_required_properties_keys(small_schema: dict): + """Test the _check_required_keys helper function.""" + # Define a sample schema from "small_schema" + # with a "properties" key missing (e.g. "c/c2") + small_schema["properties"]["c"]["properties"].pop("c2") + + # Define required "properties" keys + required_keys = ["a", "b", "c/c2"] + + # Run check + with pytest.raises(ValueError) as excinfo: + _check_required_properties_keys(required_keys, small_schema) + + # Check error message + assert "Required key(s) ['c/c2'] not found in schema" in str(excinfo.value) + + +@pytest.mark.parametrize( + "input_file,", + [ + "VIA_JSON_sample_1.json", + "VIA_JSON_sample_2.json", + ], +) +def test_required_keys_in_VIA_schema( + input_file: str, default_VIA_schema: dict, annotations_test_data: dict +): + """Check the provided VIA schema contains the ValidVIA required keys.""" + # Get required keys from a VIA valid file + filepath = annotations_test_data[input_file] + valid_VIA = ValidVIA(path=filepath) + required_VIA_keys = valid_VIA.required_keys + + # Map required keys to "properties" keys in schema + map_required_to_properties_keys = { + "main": "", + "images": "_via_img_metadata", + "regions": "_via_img_metadata/regions", + "shape_attributes": "_via_img_metadata/regions/shape_attributes", + } + + # Express required keys as required "properties" keys + required_property_keys = [ + val if ky == "main" else f"{map_required_to_properties_keys[ky]}/{val}" + for ky, values in required_VIA_keys.items() + for val in values + ] + + # Run check + _check_required_properties_keys( + required_property_keys, + default_VIA_schema, + ) + + +@pytest.mark.parametrize( + "input_file,", + [ + "COCO_JSON_sample_1.json", + "COCO_JSON_sample_2.json", + ], +) +def test_required_keys_in_COCO_schema( + input_file: str, default_COCO_schema: dict, annotations_test_data: dict +): + """Check the provided COCO schema contains the ValidCOCO required keys.""" + # Get required keys from a COCO valid file + filepath = annotations_test_data[input_file] + valid_COCO = ValidCOCO(path=filepath) + required_COCO_keys = valid_COCO.required_keys + + # Prepare list of required "properties" keys with full paths + required_properties_keys = [ + f"{level}/{ky}" if level != "main" else ky + for level, required_keys in required_COCO_keys.items() + for ky in required_keys + ] + + # Run check + _check_required_properties_keys( + required_properties_keys, + default_COCO_schema, + )