-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add validators for VIA and COCO files
- Loading branch information
Showing
1 changed file
with
183 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
"""Validators for supported annotation files.""" | ||
|
||
import json | ||
from pathlib import Path | ||
|
||
from attrs import define, field | ||
|
||
from ethology.annotations.json_schemas.utils import ( | ||
_check_file_is_json, | ||
_check_file_matches_schema, | ||
_check_required_keys_in_dict, | ||
_get_default_COCO_schema, | ||
_get_default_VIA_schema, | ||
) | ||
|
||
|
||
@define | ||
class ValidVIA: | ||
"""Class for valid VIA JSON files. | ||
It checks the input file is a `ValidJSON` and additionally checks the | ||
file contains the required keys. | ||
Attributes | ||
---------- | ||
path : pathlib.Path | ||
Path to the VIA JSON file. | ||
schema : dict | ||
The JSON schema is set to VIA_SCHEMA. | ||
Raises | ||
------ | ||
FileNotFoundError | ||
If the file does not exist. | ||
ValueError | ||
If the JSON file cannot be decoded. | ||
jsonschema.exceptions.ValidationError | ||
If the type of any of the keys in the JSON file | ||
does not match the type specified in the schema. | ||
ValueError | ||
If the VIA JSON file misses any of the required keys. | ||
""" | ||
|
||
path: Path = field() | ||
schema: dict = field( | ||
default=_get_default_VIA_schema(), | ||
init=False, | ||
) | ||
required_keys: dict = field( | ||
default={ | ||
"main": ["_via_img_metadata", "_via_image_id_list"], | ||
"images": ["filename", "regions"], | ||
"regions": ["shape_attributes", "region_attributes"], | ||
"shape_attributes": ["x", "y", "width", "height"], | ||
}, | ||
init=False, | ||
) | ||
|
||
@path.validator | ||
def _file_is_json(self, attribute, value): | ||
_check_file_is_json(value) | ||
|
||
@path.validator | ||
def _file_matches_JSON_schema(self, attribute, value): | ||
_check_file_matches_schema(value, self.schema) | ||
|
||
@path.validator | ||
def _file_contains_required_keys(self, attribute, value): | ||
"""Ensure that the VIA JSON file contains the required keys.""" | ||
# Read data as dict | ||
with open(value) as file: | ||
data = json.load(file) | ||
|
||
# Check first level keys | ||
_check_required_keys_in_dict(self.required_keys["main"], data) | ||
|
||
# Check keys in nested dicts | ||
for img_str, img_dict in data["_via_img_metadata"].items(): | ||
# Check keys for each image dictionary | ||
_check_required_keys_in_dict( | ||
self.required_keys["images"], | ||
img_dict, | ||
additional_message=f" for {img_str}", | ||
) | ||
|
||
# Check keys for each region in an image | ||
for i, region in enumerate(img_dict["regions"]): | ||
# Check keys under first level per region | ||
_check_required_keys_in_dict( | ||
self.required_keys["regions"], | ||
region, | ||
additional_message=f" for region {i} under {img_str}", | ||
) | ||
|
||
# Check keys under "shape_attributes" per region | ||
_check_required_keys_in_dict( | ||
self.required_keys["shape_attributes"], | ||
region["shape_attributes"], | ||
additional_message=f" for region {i} under {img_str}", | ||
) | ||
|
||
|
||
@define | ||
class ValidCOCO: | ||
"""Class valid COCO JSON files for untracked data. | ||
It checks the input COCO JSON file contains the required keys. | ||
Attributes | ||
---------- | ||
path : pathlib.Path | ||
Path to the COCO JSON file. | ||
Raises | ||
------ | ||
FileNotFoundError | ||
If the file does not exist. | ||
ValueError | ||
If the JSON file cannot be decoded. | ||
jsonschema.exceptions.ValidationError | ||
If the type of any of the keys in the JSON file | ||
does not match the type specified in the schema. | ||
ValueError | ||
If the COCO JSON file misses any of the required keys. | ||
""" | ||
|
||
path: Path = field() | ||
schema: dict = field( | ||
default=_get_default_COCO_schema(), | ||
init=False, | ||
# init=False makes the attribute to be unconditionally initialized | ||
# with the specified default | ||
) | ||
|
||
# The keys of "required_keys" match the 1st level keys in a COCO JSON file | ||
required_keys: dict = field( | ||
default={ | ||
"main": ["images", "annotations", "categories"], | ||
"images": ["id", "file_name"], | ||
"annotations": ["id", "image_id", "bbox", "category_id"], | ||
"categories": ["id", "name", "supercategory"], | ||
}, | ||
init=False, | ||
) | ||
|
||
@path.validator | ||
def _file_is_json(self, attribute, value): | ||
_check_file_is_json(value) | ||
|
||
@path.validator | ||
def _file_matches_JSON_schema(self, attribute, value): | ||
_check_file_matches_schema(value, self.schema) | ||
|
||
@path.validator | ||
def _file_contains_required_keys(self, attribute, value): | ||
"""Ensure that the COCO JSON file contains the required keys.""" | ||
|
||
# Helper function to singularise the input key for the | ||
# error message | ||
def _singularise_err_msg(key): | ||
return key[:-1] if key != "categories" else key[:-3] + "y" | ||
|
||
# Read file as dict | ||
with open(value) as file: | ||
data = json.load(file) | ||
|
||
# Check first level keys | ||
_check_required_keys_in_dict(self.required_keys["main"], data) | ||
|
||
# Check keys in every dict listed under the "images", "annotations" | ||
# and "categories" keys | ||
for ky in list(self.required_keys.keys())[1:]: | ||
for instance_dict in data[ky]: | ||
_check_required_keys_in_dict( | ||
self.required_keys[ky], | ||
instance_dict, | ||
additional_message=( | ||
f" for {_singularise_err_msg(ky)} {instance_dict}" | ||
), | ||
) |