-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
361 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
## JSON schemas for manual annotations files. | ||
|
||
We use JSON schemas to validate the types of a supported annotation file. | ||
|
||
Note that the schema validation only checks the type of a key if that key is present. It does not check for the presence of the keys. | ||
|
||
If the meta-schema (under $schema) is not provided, the jsonschema validator uses the the latest released draft of the JSON schema specification. | ||
|
||
## VIA schema | ||
|
||
The VIA schema corresponds to the format exported by VGG Image Annotator 2.x.y (VIA) for object detection annotations. | ||
|
||
Each image under `_via_img_metadata` is indexed using a unique key: FILENAME-FILESIZE. We use "additionalProperties" to allow for any key name, see https://stackoverflow.com/a/69811612/24834957. | ||
|
||
The section `_via_image_id_list` contains an ordered list of image keys using a unique key: `FILENAME-FILESIZE`, the position in the list defines the image ID. | ||
|
||
The section `_via_attributes` region attributes and file attributes, to display in VIA's UI and to classify the data. | ||
|
||
The section `_via_data_format_version` contains the version of the VIA tool used. | ||
|
||
|
||
## COCO schema | ||
The COCO schema follows the COCO dataset format for object detection, see https://cocodataset.org/#format-data. | ||
|
||
Box coordinates are measured from the top left corner of the image, and are 0-indexed. | ||
### References | ||
---------- | ||
- https://github.com/python-jsonschema/jsonschema | ||
- https://json-schema.org/understanding-json-schema/ | ||
- https://cocodataset.org/#format-data | ||
- https://gitlab.com/vgg/via/-/blob/master/via-2.x.y/CodeDoc.md?ref_type=heads#description-of-via-project-json-file | ||
- https://python-jsonschema.readthedocs.io/en/stable/api/#jsonschema.validate |
78 changes: 78 additions & 0 deletions
78
ethology/annotations/json_schemas/schemas/coco_schema.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
{ | ||
"$schema": "https://json-schema.org/draft/2020-12/schema", | ||
"type": "object", | ||
"properties": { | ||
"info": { | ||
"type": "object" | ||
}, | ||
"licenses": { | ||
"type": "array" | ||
}, | ||
"images": { | ||
"type": "array", | ||
"items": { | ||
"type": "object", | ||
"properties": { | ||
"file_name": { | ||
"type": "string" | ||
}, | ||
"id": { | ||
"type": "integer" | ||
}, | ||
"width": { | ||
"type": "integer" | ||
}, | ||
"height": { | ||
"type": "integer" | ||
} | ||
} | ||
} | ||
}, | ||
"annotations": { | ||
"type": "array", | ||
"items": { | ||
"type": "object", | ||
"properties": { | ||
"id": { | ||
"type": "integer" | ||
}, | ||
"image_id": { | ||
"type": "integer" | ||
}, | ||
"bbox": { | ||
"type": "array", | ||
"items": { | ||
"type": "integer" | ||
} | ||
}, | ||
"category_id": { | ||
"type": "integer" | ||
}, | ||
"area": { | ||
"type": "number" | ||
}, | ||
"iscrowd": { | ||
"type": "integer" | ||
} | ||
} | ||
} | ||
}, | ||
"categories": { | ||
"type": "array", | ||
"items": { | ||
"type": "object", | ||
"properties": { | ||
"id": { | ||
"type": "integer" | ||
}, | ||
"name": { | ||
"type": "string" | ||
}, | ||
"supercategory": { | ||
"type": "string" | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
{ | ||
"$schema": "https://json-schema.org/draft/2020-12/schema", | ||
"type": "object", | ||
"properties": { | ||
"_via_settings": { | ||
"type": "object", | ||
"properties": { | ||
"ui": { | ||
"type": "object" | ||
}, | ||
"core": { | ||
"type": "object" | ||
}, | ||
"project": { | ||
"type": "object" | ||
} | ||
} | ||
}, | ||
"_via_img_metadata": { | ||
"type": "object", | ||
"additionalProperties": { | ||
"type": "object", | ||
"properties": { | ||
"filename": { | ||
"type": "string" | ||
}, | ||
"size": { | ||
"type": "integer" | ||
}, | ||
"regions": { | ||
"type": "array", | ||
"items": { | ||
"type": "object", | ||
"properties": { | ||
"shape_attributes": { | ||
"type": "object", | ||
"properties": { | ||
"name": { | ||
"type": "string" | ||
}, | ||
"x": { | ||
"type": "integer" | ||
}, | ||
"y": { | ||
"type": "integer" | ||
}, | ||
"width": { | ||
"type": "integer" | ||
}, | ||
"height": { | ||
"type": "integer" | ||
} | ||
} | ||
}, | ||
"region_attributes": { | ||
"type": "object" | ||
} | ||
} | ||
} | ||
}, | ||
"file_attributes": { | ||
"type": "object" | ||
} | ||
} | ||
} | ||
}, | ||
"_via_image_id_list": { | ||
"type": "array", | ||
"items": { | ||
"type": "string" | ||
} | ||
}, | ||
"_via_attributes": { | ||
"type": "object", | ||
"properties": { | ||
"region": { | ||
"type": "object" | ||
}, | ||
"file": { | ||
"type": "object" | ||
} | ||
} | ||
}, | ||
"_via_data_format_version": { | ||
"type": "string" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
"""Utility functions for JSON schema files.""" | ||
|
||
import json | ||
from pathlib import Path | ||
|
||
import jsonschema | ||
import jsonschema.exceptions | ||
|
||
|
||
def _get_default_VIA_schema() -> dict: | ||
"""Read a VIA schema file.""" | ||
via_schema_path = Path(__file__).parent / "schemas" / "via_schema.json" | ||
with open(via_schema_path) as file: | ||
via_schema_dict = json.load(file) | ||
return via_schema_dict | ||
|
||
|
||
def _get_default_COCO_schema() -> dict: | ||
"""Read a COCO schema file.""" | ||
coco_schema_path = Path(__file__).parent / "schemas" / "coco_schema.json" | ||
with open(coco_schema_path) as file: | ||
coco_schema_dict = json.load(file) | ||
return coco_schema_dict | ||
|
||
|
||
def _check_file_is_json(filepath: Path): | ||
"""Ensure that the file is a JSON file.""" | ||
try: | ||
with open(filepath) as file: | ||
json.load(file) | ||
except FileNotFoundError as not_found_error: | ||
raise FileNotFoundError( | ||
f"File not found: {filepath}." | ||
) from not_found_error | ||
except json.JSONDecodeError as decode_error: | ||
raise ValueError( | ||
f"Error decoding JSON data from file: {filepath}." | ||
) from decode_error | ||
|
||
|
||
def _check_file_matches_schema(filepath: Path, schema: dict): | ||
"""Ensure that the JSON file matches the expected schema. | ||
The schema validation only checks the type for each specified | ||
key if the key exists. It does not check that the keys in the | ||
schema are present in the JSON file. | ||
""" | ||
# read json file | ||
with open(filepath) as file: | ||
data = json.load(file) | ||
|
||
# check against schema if provided | ||
if schema: | ||
try: | ||
jsonschema.validate(instance=data, schema=schema) | ||
except jsonschema.exceptions.ValidationError as val_err: | ||
raise val_err | ||
except jsonschema.exceptions.SchemaError as schema_err: | ||
raise schema_err | ||
|
||
|
||
def _check_required_properties_keys( | ||
required_properties_keys: list, schema: dict | ||
): | ||
"""Ensure that the input schema includes the required "properties" keys.""" | ||
# Get keys of "properties" dictionaries in schema | ||
properties_keys_in_schema = _extract_properties_keys(schema) | ||
|
||
# Get list of "properties" keys that are required but not in schema | ||
missing_keys = set(required_properties_keys) - set( | ||
properties_keys_in_schema | ||
) | ||
|
||
# Raise error if there are missing keys in the schema | ||
if missing_keys: | ||
raise ValueError( | ||
f"Required key(s) {sorted(missing_keys)} not found " | ||
"in schema. Note that " | ||
"a key may not be found correctly if the schema keywords " | ||
"(such as 'properties', 'type' or 'items') are not spelt " | ||
"correctly." | ||
) | ||
|
||
|
||
def _check_required_keys_in_dict( | ||
list_required_keys: list[str], | ||
data: dict, | ||
additional_message: str = "", | ||
): | ||
"""Check if the required keys are present in the input data_dict.""" | ||
missing_keys = set(list_required_keys) - data.keys() | ||
if missing_keys: | ||
raise ValueError( | ||
f"Required key(s) {sorted(missing_keys)} not " | ||
f"found{additional_message}." | ||
) | ||
|
||
|
||
def _extract_properties_keys(schema: dict, parent_key="") -> list: | ||
"""Recursively extract the keys of all "properties" subdictionaries. | ||
Recursively extract the keys of all subdictionaries in the input | ||
dictionary that are values to a "properties" key. The input dictionary | ||
represents a JSON schema dictionary | ||
(see https://json-schema.org/understanding-json-schema/about). | ||
The "properties" key always appears as part of a dictionary with at least | ||
another key, that is "type" or "item". | ||
""" | ||
# The "property keys" are either "properties" or "additionalProperties" | ||
# as they are the keys with the relevant data | ||
property_keys = ["properties", "additionalProperties"] | ||
|
||
def _contains_properties_key(input: dict): | ||
"""Return True if the input dictionary contains a property key.""" | ||
return any(x in input for x in property_keys) | ||
|
||
def _get_properties_subdict(input: dict): | ||
"""Get the subdictionary under the property key.""" | ||
return input[next(k for k in property_keys if k in input)] | ||
|
||
keys_of_properties_dicts = [] | ||
if "type" in schema: | ||
if _contains_properties_key(schema): | ||
# Get the subdictionary under the properties key | ||
properties_subdict = _get_properties_subdict(schema) | ||
|
||
# Check if there is a nested "properties" dict inside the current | ||
# one. If so, go down one level. | ||
if _contains_properties_key(properties_subdict): | ||
properties_subdict = _get_properties_subdict( | ||
properties_subdict | ||
) | ||
|
||
# Add keys of deepest "properties dict" to list | ||
keys_of_properties_dicts.extend( | ||
[ | ||
f"{parent_key}/{ky}" if parent_key else ky | ||
for ky in properties_subdict | ||
] | ||
) | ||
|
||
# Inspect non-properties dictionaries under this properties subdict | ||
for ky, val in properties_subdict.items(): | ||
full_key = f"{parent_key}/{ky}" if parent_key else ky | ||
keys_of_properties_dicts.extend( | ||
_extract_properties_keys(val, full_key) | ||
) | ||
|
||
elif "items" in schema: | ||
# Analyse the dictionary under the "items" key | ||
properties_subdict = schema["items"] | ||
keys_of_properties_dicts.extend( | ||
_extract_properties_keys( | ||
properties_subdict, parent_key=parent_key | ||
) | ||
) | ||
|
||
return sorted(keys_of_properties_dicts) |