Skip to content

Commit

Permalink
Add validators for VIA and COCO files
Browse files Browse the repository at this point in the history
  • Loading branch information
sfmig committed Jan 21, 2025
1 parent 9bdcffd commit 289e6d1
Showing 1 changed file with 183 additions and 0 deletions.
183 changes: 183 additions & 0 deletions ethology/annotations/validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""Validators for supported annotation files."""

import json
from pathlib import Path

from attrs import define, field

from ethology.annotations.json_schemas.utils import (
_check_file_is_json,
_check_file_matches_schema,
_check_required_keys_in_dict,
_get_default_COCO_schema,
_get_default_VIA_schema,
)


@define
class ValidVIA:
"""Class for valid VIA JSON files.
It checks the input file is a `ValidJSON` and additionally checks the
file contains the required keys.
Attributes
----------
path : pathlib.Path
Path to the VIA JSON file.
schema : dict
The JSON schema is set to VIA_SCHEMA.
Raises
------
FileNotFoundError
If the file does not exist.
ValueError
If the JSON file cannot be decoded.
jsonschema.exceptions.ValidationError
If the type of any of the keys in the JSON file
does not match the type specified in the schema.
ValueError
If the VIA JSON file misses any of the required keys.
"""

path: Path = field()
schema: dict = field(
default=_get_default_VIA_schema(),
init=False,
)
required_keys: dict = field(
default={
"main": ["_via_img_metadata", "_via_image_id_list"],
"images": ["filename", "regions"],
"regions": ["shape_attributes", "region_attributes"],
"shape_attributes": ["x", "y", "width", "height"],
},
init=False,
)

@path.validator
def _file_is_json(self, attribute, value):
_check_file_is_json(value)

@path.validator
def _file_matches_JSON_schema(self, attribute, value):
_check_file_matches_schema(value, self.schema)

@path.validator
def _file_contains_required_keys(self, attribute, value):
"""Ensure that the VIA JSON file contains the required keys."""
# Read data as dict
with open(value) as file:
data = json.load(file)

# Check first level keys
_check_required_keys_in_dict(self.required_keys["main"], data)

# Check keys in nested dicts
for img_str, img_dict in data["_via_img_metadata"].items():
# Check keys for each image dictionary
_check_required_keys_in_dict(
self.required_keys["images"],
img_dict,
additional_message=f" for {img_str}",
)

# Check keys for each region in an image
for i, region in enumerate(img_dict["regions"]):
# Check keys under first level per region
_check_required_keys_in_dict(
self.required_keys["regions"],
region,
additional_message=f" for region {i} under {img_str}",
)

# Check keys under "shape_attributes" per region
_check_required_keys_in_dict(
self.required_keys["shape_attributes"],
region["shape_attributes"],
additional_message=f" for region {i} under {img_str}",
)


@define
class ValidCOCO:
"""Class valid COCO JSON files for untracked data.
It checks the input COCO JSON file contains the required keys.
Attributes
----------
path : pathlib.Path
Path to the COCO JSON file.
Raises
------
FileNotFoundError
If the file does not exist.
ValueError
If the JSON file cannot be decoded.
jsonschema.exceptions.ValidationError
If the type of any of the keys in the JSON file
does not match the type specified in the schema.
ValueError
If the COCO JSON file misses any of the required keys.
"""

path: Path = field()
schema: dict = field(
default=_get_default_COCO_schema(),
init=False,
# init=False makes the attribute to be unconditionally initialized
# with the specified default
)

# The keys of "required_keys" match the 1st level keys in a COCO JSON file
required_keys: dict = field(
default={
"main": ["images", "annotations", "categories"],
"images": ["id", "file_name"],
"annotations": ["id", "image_id", "bbox", "category_id"],
"categories": ["id", "name", "supercategory"],
},
init=False,
)

@path.validator
def _file_is_json(self, attribute, value):
_check_file_is_json(value)

@path.validator
def _file_matches_JSON_schema(self, attribute, value):
_check_file_matches_schema(value, self.schema)

@path.validator
def _file_contains_required_keys(self, attribute, value):
"""Ensure that the COCO JSON file contains the required keys."""

# Helper function to singularise the input key for the
# error message
def _singularise_err_msg(key):
return key[:-1] if key != "categories" else key[:-3] + "y"

# Read file as dict
with open(value) as file:
data = json.load(file)

# Check first level keys
_check_required_keys_in_dict(self.required_keys["main"], data)

# Check keys in every dict listed under the "images", "annotations"
# and "categories" keys
for ky in list(self.required_keys.keys())[1:]:
for instance_dict in data[ky]:
_check_required_keys_in_dict(
self.required_keys[ky],
instance_dict,
additional_message=(
f" for {_singularise_err_msg(ky)} {instance_dict}"
),
)

0 comments on commit 289e6d1

Please sign in to comment.