diff --git a/ethology/annotations/io.py b/ethology/annotations/io.py
new file mode 100644
index 0000000..baa65f6
--- /dev/null
+++ b/ethology/annotations/io.py
@@ -0,0 +1,468 @@
+"""Module for reading and writing manually labelled annotations."""
+
+import ast
+import json
+from collections.abc import Callable
+from pathlib import Path
+from typing import Literal
+
+import pandas as pd
+
+from ethology.annotations.validators import ValidCOCO, ValidVIA, ValidVIAcsv
+
+# definition of standard bboxes dataframe
+STANDARD_BBOXES_DF_INDEX = "annotation_id"
+STANDARD_BBOXES_DF_COLUMNS = [
+    "image_filename",
+    "image_id",
+    "x_min",
+    "y_min",
+    "width",
+    "height",
+    "supercategory",
+    "category",
+    "image_width",
+    "image_height",
+]  # if a column is not defined, it is filled with nan
+
+
+def df_bboxes_from_files(
+    file_paths: Path | list[Path],
+    format: Literal["VIA", "COCO", "VIAcsv"],
+    images_dirs: Path | list[Path] | None = None,
+    **kwargs,
+) -> pd.DataFrame:
+    """Read bounding boxes annotations as a dataframe.
+
+    Parameters
+    ----------
+    file_paths : Path | list[Path]
+        Path or list of paths to the input annotations.
+    format : Literal["VIA", "COCO", "VIAcsv"]
+        Format of the input annotation files.
+    images_dirs : Path | list[Path], optional
+        Path or list of paths to the directories containing the images.
+    **kwargs
+        Additional keyword arguments to pass to the
+        ``pandas.DataFrame.drop_duplicates`` method. The ``ignore_index=True``
+        argument is always applied to force an index reset, and the ``inplace``
+        argument is set to `False` and cannot be overridden. The settings
+        apply if one or multiple files are read.
+
+    Returns
+    -------
+    pd.DataFrame
+        Bounding boxes annotations dataframe. The dataframe is indexed by
+        "annotation_id" and has the following columns: "image_filename",
+        "image_id", "x_min", "y_min", "width", "height", "supercategory",
+        "category".
+
+    See Also
+    --------
+    pandas.concat : Concatenate pandas objects along a particular axis.
+
+    pandas.DataFrame.drop_duplicates : Return DataFrame with duplicate rows
+    removed.
+
+    """
+    # Check kwargs that are forwarded to drop_duplicates
+    for fixed_kwargs in ["ignore_index", "inplace"]:
+        if fixed_kwargs in kwargs:
+            raise ValueError(
+                f"The '{fixed_kwargs}' argument for "
+                "`pandas.DataFrame.drop_duplicates` may not be overridden."
+            )
+
+    if isinstance(file_paths, list):
+        # Read multiple files
+        df_all = _df_bboxes_from_multiple_files(
+            file_paths, format=format, **kwargs
+        )
+
+    else:
+        # Read single VIA file
+        df_all = _df_bboxes_from_single_file(
+            file_paths, format=format, **kwargs
+        )
+
+    # Add metadata
+    df_all.metadata = {
+        "input_files": file_paths,
+        "format": format,
+        "images_dirs": images_dirs,
+    }
+
+    return df_all
+
+
+def _df_bboxes_from_multiple_files(
+    list_filepaths: list[Path],
+    format: Literal["VIA", "COCO", "VIAcsv"],
+    **kwargs,
+):
+    """Read bounding boxes annotations from multiple files.
+
+    Parameters
+    ----------
+    list_filepaths : list[Path]
+        List of input annotation filepaths.
+    format : Literal["VIA", "COCO", "VIAcsv"]
+        Format of the input files.
+        Currently supported formats are "VIA", "VIAcsv" and "COCO".
+    **kwargs
+        Additional keyword arguments to pass to the
+        ``pandas.DataFrame.drop_duplicates`` method. The ``ignore_index=True``
+        argument is always applied to force an index reset, and the ``inplace``
+        argument is set to `False` and cannot be overridden. The settings
+        apply if one or multiple files are read.
+
+    Returns
+    -------
+    pd.DataFrame
+        Bounding boxes annotations dataframe. The dataframe is indexed
+        by "annotation_id" and has the following columns: "image_filename",
+        "image_id", "x_min", "y_min", "width", "height", "supercategory",
+        "category".
+
+    """
+    # Get list of dataframes
+    df_list = [
+        _df_bboxes_from_single_file(file, format=format)
+        for file in list_filepaths
+    ]
+
+    # Concatenate with ignore_index=True,
+    # so that the resulting axis is labeled 0,1,…,n - 1.
+    # NOTE: after ignore_index=True the index name is no longer "annotation_id"
+    df_all = pd.concat(df_list, ignore_index=True)
+
+    # Update image_id based on the full sorted list of image filenames
+    list_image_filenames = sorted(list(df_all["image_filename"].unique()))
+    df_all["image_id"] = df_all["image_filename"].apply(
+        lambda x: list_image_filenames.index(x)
+    )
+
+    # Remove duplicates
+    df_all = df_all.drop_duplicates(ignore_index=True, inplace=False, **kwargs)
+
+    # Set the index name to "annotation_id"
+    df_all.index.name = STANDARD_BBOXES_DF_INDEX
+
+    return df_all
+
+
+def _df_bboxes_from_single_file(
+    file_path: Path, format: Literal["VIA", "COCO", "VIAcsv"], **kwargs
+) -> pd.DataFrame:
+    """Read bounding boxes annotations from a single file.
+
+    Parameters
+    ----------
+    file_path : Path
+        Path to the input annotations file.
+    format : Literal["VIA", "COCO", "VIAcsv"]
+        Format of the input annotations file.
+        Currently supported formats are "VIA", "VIAcsv" and "COCO".
+    **kwargs
+        Additional keyword arguments to pass to the
+        ``pandas.DataFrame.drop_duplicates`` method. The ``ignore_index=True``
+        argument is always applied to force an index reset, and the ``inplace``
+        argument is set to `False` and cannot be overridden. The settings
+        apply if one or multiple files are read.
+
+    Returns
+    -------
+    pd.DataFrame
+        Bounding boxes annotations dataframe. The dataframe is indexed
+        by "annotation_id" and has the following columns: "image_filename",
+        "image_id", "x_min", "y_min", "width", "height", "supercategory",
+        "category".
+
+    """
+    if format == "VIA":
+        return _df_bboxes_from_single_specific_file(
+            file_path,
+            validator=ValidVIA,
+            get_rows_from_file=_df_rows_from_valid_VIA_file,
+            **kwargs,
+        )
+    elif format == "COCO":
+        return _df_bboxes_from_single_specific_file(
+            file_path,
+            validator=ValidCOCO,
+            get_rows_from_file=_df_rows_from_valid_COCO_file,
+            **kwargs,
+        )
+    elif format == "VIAcsv":
+        return _df_bboxes_from_single_specific_file(
+            file_path,
+            validator=ValidVIAcsv,
+            get_rows_from_file=_df_rows_from_valid_VIA_csv_file,
+            **kwargs,
+        )
+    else:
+        raise ValueError(f"Unsupported format: {format}")
+
+
+def _df_bboxes_from_single_specific_file(
+    file_path: Path,
+    validator: type[ValidVIA] | type[ValidCOCO] | type[ValidVIAcsv],
+    get_rows_from_file: Callable,
+    **kwargs,
+) -> pd.DataFrame:
+    """Read bounding boxes annotations from a single specific file.
+
+    Parameters
+    ----------
+    file_path : Path
+        Path to the input annotations file.
+    validator : type[ValidVIA] | type[ValidCOCO]
+        Validator class for the input annotations file.
+    get_rows_from_file : Callable
+        Function to extract rows from the validated input annotations file.
+    **kwargs
+        Additional keyword arguments to pass to the
+        ``pandas.DataFrame.drop_duplicates`` method. The ``ignore_index=True``
+        argument is always applied to force an index reset, and the ``inplace``
+        argument is set to `False` and cannot be overridden. The settings
+        apply if one or multiple files are read.
+
+    Returns
+    -------
+    pd.DataFrame
+        Bounding boxes annotations dataframe. The dataframe is indexed
+        by "annotation_id" and has the following columns: "image_filename",
+        "image_id", "x_min", "y_min", "width", "height", "supercategory",
+        "category".
+
+    """
+    # Validate file
+    valid_file = validator(file_path)
+
+    # Build dataframe from extracted rows
+    list_rows = get_rows_from_file(valid_file.path)
+    df = pd.DataFrame(list_rows)
+
+    # Set "annotation_id" as index
+    # (otherwise duplicate annotations are not identified as such)
+    df = df.set_index(STANDARD_BBOXES_DF_INDEX)
+
+    # Drop duplicates and reset indices.
+    # We use ignore_index=True so that the resulting axis is labeled 0,1,…,n-1.
+    # NOTE: after this the index name is no longer "annotation_id"
+    df = df.drop_duplicates(ignore_index=True, inplace=False, **kwargs)
+
+    # Reorder columns to match standard columns
+    df = df.reindex(columns=STANDARD_BBOXES_DF_COLUMNS)
+
+    # Set the index name to "annotation_id"
+    df.index.name = STANDARD_BBOXES_DF_INDEX
+
+    # Read as standard dataframe
+    return df
+
+
+def _df_rows_from_valid_VIA_file(file_path: Path) -> list[dict]:
+    """Extract list of rows from validated VIA JSON file.
+
+    Parameters
+    ----------
+    file_path : Path
+        Path to the validated VIA JSON file.
+
+    Returns
+    -------
+    list[dict]
+        List of rows extracted from the VIA JSON file.
+
+    """
+    # Read validated json as dict
+    with open(file_path) as file:
+        data_dict = json.load(file)
+
+    # Prepare data
+    image_metadata_dict = data_dict["_via_img_metadata"]
+    via_image_id_list = data_dict["_via_image_id_list"]
+    via_attributes = data_dict["_via_attributes"]
+    supercategories_props = {}
+    if "region" in via_attributes:
+        supercategories_props = via_attributes["region"]
+
+    # Map image filenames to the image keys used by VIA
+    # the VIA keys are <filename><filesize> strings
+    map_filename_to_via_img_id = {
+        img_dict["filename"]: ky
+        for ky, img_dict in image_metadata_dict.items()
+    }
+
+    # Get list of rows in dataframe
+    list_rows = []
+    annotation_id = 0
+    # loop thru images
+    for _, img_dict in image_metadata_dict.items():
+        # loop thru annotations in the image
+        for region in img_dict["regions"]:
+            # Extract region data
+            region_shape = region["shape_attributes"]
+            region_attributes = region["region_attributes"]
+
+            # Define supercategory and category.
+            # We take first key in "region_attributes" as the supercategory,
+            # and its value as category_id_str
+            if region_attributes and supercategories_props:
+                supercategory = sorted(list(region_attributes.keys()))[0]
+                category_id_str = region_attributes[supercategory]
+                category = supercategories_props[supercategory]["options"][
+                    category_id_str
+                ]
+            else:
+                supercategory = ""
+                category = ""
+
+            row = {
+                "annotation_id": annotation_id,
+                "image_filename": img_dict["filename"],
+                "image_id": via_image_id_list.index(
+                    map_filename_to_via_img_id[img_dict["filename"]]
+                ),  # integer based on the VIA image ID
+                "x_min": region_shape["x"],
+                "y_min": region_shape["y"],
+                "width": region_shape["width"],
+                "height": region_shape["height"],
+                "supercategory": supercategory,
+                "category": category,
+            }
+
+            list_rows.append(row)
+
+            # update "annotation_id"
+            annotation_id += 1
+
+    return list_rows
+
+
+def _df_rows_from_valid_VIA_csv_file(file_path: Path):
+    """Extract list of rows from validated VIA CSV file.
+
+    Parameters
+    ----------
+    file_path : Path
+        Path to the validated VIA CSV file.
+
+    Returns
+    -------
+    list[dict]
+        List of rows extracted from the VIA CSV file.
+
+    """
+    # Read input csv file
+    df = pd.read_csv(file_path)
+
+    # Map image filenames to unique image IDs
+    image_filenames = sorted(df["filename"].unique())
+    map_filename_to_image_id = {f: i for i, f in enumerate(image_filenames)}
+
+    list_rows = []
+    for df_index, df_row in df.iterrows():
+        annotation_id = df_index
+
+        image_filename = df_row["filename"]
+        image_id = map_filename_to_image_id[image_filename]
+
+        region_shape_attrs = ast.literal_eval(
+            df_row["region_shape_attributes"]
+        )
+        region_attributes = ast.literal_eval(df_row["region_attributes"])
+
+        x_min = region_shape_attrs["x"]
+        y_min = region_shape_attrs["y"]
+        width = region_shape_attrs["width"]
+        height = region_shape_attrs["height"]
+
+        supercategory = list(region_attributes.keys())[0]
+        category_id = region_attributes[supercategory]
+
+        row = {
+            "annotation_id": annotation_id,
+            "image_filename": image_filename,
+            "image_id": image_id,
+            "x_min": x_min,
+            "y_min": y_min,
+            "width": width,
+            "height": height,
+            "supercategory": supercategory,
+            "category": category_id,  # category ID !
+        }
+        list_rows.append(row)
+
+    return list_rows
+
+
+def _df_rows_from_valid_COCO_file(file_path: Path) -> list[dict]:
+    """Extract list of rows from validated COCO JSON file.
+
+    Parameters
+    ----------
+    file_path : Path
+        Path to the validated COCO JSON file.
+
+    Returns
+    -------
+    list[dict]
+        List of rows extracted from the COCO JSON file.
+
+    """
+    # Read validated json as dict
+    with open(file_path) as file:
+        data_dict = json.load(file)
+
+    # Prepare data
+    map_image_id_to_filename = {
+        img_dict["id"]: img_dict["file_name"]
+        for img_dict in data_dict["images"]
+    }
+    map_image_id_to_width_height = {
+        img_dict["id"]: (img_dict["width"], img_dict["height"])
+        for img_dict in data_dict["images"]
+    }
+
+    map_category_id_to_category_data = {
+        cat_dict["id"]: (cat_dict["name"], cat_dict["supercategory"])
+        for cat_dict in data_dict["categories"]
+    }  # category data: category name, supercategor name
+
+    # Build standard dataframe
+    list_rows = []
+    for annot_dict in data_dict["annotations"]:
+        annotation_id = annot_dict["id"]
+
+        # image data
+        image_id = annot_dict["image_id"]
+        image_filename = map_image_id_to_filename[image_id]
+        image_width = map_image_id_to_width_height[image_id][0]
+        image_height = map_image_id_to_width_height[image_id][1]
+
+        # bbox data
+        x_min, y_min, width, height = annot_dict["bbox"]
+
+        # category data
+        category_id = annot_dict["category_id"]
+        category, supercategory = map_category_id_to_category_data[category_id]
+
+        row = {
+            "annotation_id": annotation_id,
+            "image_filename": image_filename,
+            "image_id": image_id,
+            "image_width": image_width,
+            "image_height": image_height,
+            "x_min": x_min,
+            "y_min": y_min,
+            "width": width,
+            "height": height,
+            "supercategory": supercategory,
+            "category": category,
+        }
+
+        list_rows.append(row)
+
+    return list_rows
diff --git a/ethology/annotations/validators.py b/ethology/annotations/validators.py
index e830ce8..a476f02 100644
--- a/ethology/annotations/validators.py
+++ b/ethology/annotations/validators.py
@@ -1,8 +1,10 @@
 """Validators for supported annotation files."""
 
+import ast
 import json
 from pathlib import Path
 
+import pandas as pd
 from attrs import define, field
 
 from ethology.annotations.json_schemas.utils import (
@@ -51,7 +53,11 @@ class ValidVIA:
     )
     required_keys: dict = field(
         default={
-            "main": ["_via_img_metadata", "_via_image_id_list"],
+            "main": [
+                "_via_img_metadata",
+                "_via_image_id_list",
+                "_via_attributes",
+            ],
             "images": ["filename", "regions"],
             "regions": ["shape_attributes", "region_attributes"],
             "shape_attributes": ["x", "y", "width", "height"],
@@ -191,3 +197,65 @@ def _singularise_err_msg(key):
                         f" for {_singularise_err_msg(ky)} {instance_dict}"
                     ),
                 )
+
+
+@define
+class ValidVIAcsv:
+    """Class for valid VIA CSV files.
+
+    It checks the input CSV file contains the expected header and
+    represents rectangular bounding boxes.
+
+    Attributes
+    ----------
+    path : pathlib.Path
+        Path to the VIA CSV file, passed as an input.
+    required_keys : dict
+        The required keys for the VIA CSV file.
+
+    Raises
+    ------
+    ValueError
+        If the VIA CSV file is missing any of the required keys.
+
+    """
+
+    path: Path = field()
+
+    @path.validator
+    def _check_file_contains_valid_header(self, attribute, value):
+        """Ensure the VIA .csv file contains the expected header."""
+        expected_header = [
+            "filename",
+            "file_size",
+            "file_attributes",
+            "region_count",
+            "region_id",
+            "region_shape_attributes",
+            "region_attributes",
+        ]
+
+        with open(value) as f:
+            header = f.readline().strip("\n").split(",")
+            if header != expected_header:
+                raise ValueError(
+                    ".csv header row does not match the known format for "
+                    "VIA .csv files. "
+                    f"Expected {expected_header} but got {header}.",
+                )
+
+    @path.validator
+    def _check_region_shape(self, attribute, value):
+        df = pd.read_csv(value, sep=",", header=0)
+
+        for row in df.itertuples():
+            region_shape_attrs = ast.literal_eval(row.region_shape_attributes)
+
+            # check annotation is a rectangle
+            if region_shape_attrs["name"] != "rect":
+                raise ValueError(
+                    f"{row.filename} (row {row.Index}): "
+                    "bounding box shape must be 'rect' (rectangular) "
+                    "but instead got "
+                    f"'{region_shape_attrs['name']}'.",
+                )
diff --git a/tests/test_unit/test_annotations/test_io.py b/tests/test_unit/test_annotations/test_io.py
new file mode 100644
index 0000000..abc67a2
--- /dev/null
+++ b/tests/test_unit/test_annotations/test_io.py
@@ -0,0 +1,524 @@
+from collections.abc import Callable
+from contextlib import nullcontext as does_not_raise
+from pathlib import Path
+from typing import Literal
+from unittest.mock import patch
+
+import pandas as pd
+import pytest
+
+from ethology.annotations.io import (
+    STANDARD_BBOXES_DF_COLUMNS,
+    STANDARD_BBOXES_DF_INDEX,
+    _df_bboxes_from_multiple_files,
+    _df_bboxes_from_single_file,
+    _df_bboxes_from_single_specific_file,
+    _df_rows_from_valid_COCO_file,
+    _df_rows_from_valid_VIA_file,
+    df_bboxes_from_files,
+)
+from ethology.annotations.validators import ValidCOCO, ValidVIA
+
+
+@pytest.fixture
+def multiple_input_files(annotations_test_data: dict) -> dict:
+    """Fixture that returns for each format, a pair of annotation files
+    with their number of annotations and images.
+    """
+    return {
+        "VIA": [
+            {
+                "path": annotations_test_data["VIA_JSON_sample_1.json"],
+                "n_annotations": 4440,
+                "n_images": 50,
+            },
+            {
+                "path": annotations_test_data["VIA_JSON_sample_2.json"],
+                "n_annotations": 3977,
+                "n_images": 50,
+            },
+        ],
+        "COCO": [
+            {
+                "path": annotations_test_data["COCO_JSON_sample_1.json"],
+                "n_annotations": 4344,
+                "n_images": 100,
+            },
+            {
+                "path": annotations_test_data["COCO_JSON_sample_2.json"],
+                "n_annotations": 4618,
+                "n_images": 100,
+            },
+        ],
+    }
+
+
+def assert_dataframe(
+    df: pd.DataFrame,
+    expected_n_annotations: int,
+    expected_n_images: int,
+    expected_supercategories: str | list[str],
+    expected_categories: str | list[str],
+    expected_annots_per_image: int | None = None,
+):
+    """Check that the dataframe has the expected shape and content."""
+    # Check shape of dataframe
+    assert df.shape[0] == expected_n_annotations
+
+    # Check annotation_id is the index name, and that IDs are unique
+    assert df.index.name == STANDARD_BBOXES_DF_INDEX
+    assert len(set(df.index)) == expected_n_annotations
+
+    # Check number of images
+    assert len(df["image_filename"].unique()) == expected_n_images
+    assert len(df["image_id"].unique()) == expected_n_images
+
+    # Check columns are as expected
+    assert df.columns.tolist() == STANDARD_BBOXES_DF_COLUMNS
+
+    # Check supercategories are as expected
+    assert df["supercategory"].unique() == expected_supercategories
+
+    # Check categories are as expected
+    assert df["category"].unique() == expected_categories
+
+    # Check number of annotations per image if provided
+    if expected_annots_per_image:
+        assert all(
+            df.groupby("image_id").count()["x_min"]
+            == expected_annots_per_image
+        )  # count number of "x_min" values when grouping by "image_id"
+
+
+@pytest.mark.parametrize(
+    "input_format",
+    [
+        "VIA",
+        "COCO",
+    ],
+)
+@pytest.mark.parametrize(
+    "images_dirs",
+    [
+        [Path("/path/to/images")],  # single directory
+        [Path("/path/to/images1"), Path("/path/to/images2")],  # multiple dirs
+        None,  # no images directories
+    ],
+)
+@pytest.mark.parametrize(
+    "file_path, function_to_mock",
+    [
+        (
+            Path("/path/to/file"),  # single file
+            "ethology.annotations.io._df_bboxes_from_single_file",
+        ),
+        (
+            [Path("/path/to/file1"), Path("/path/to/file2")],  # multiple files
+            "ethology.annotations.io._df_bboxes_from_multiple_files",
+        ),
+    ],
+)
+def test_df_bboxes_from_files(
+    input_format: Literal["VIA", "COCO"],
+    images_dirs: Path | list[Path] | None,
+    file_path: Path,
+    function_to_mock: str,
+):
+    """Test that the general bounding boxes loading function delegates
+    correctly to the single or multiple file readers, and check the
+    metadata is added correctly.
+    """
+    # Call general function and see if mocked function is called
+    with patch(function_to_mock) as mock:
+        df = df_bboxes_from_files(
+            file_path,
+            format=input_format,
+            images_dirs=images_dirs,
+        )
+        mock.assert_called_once_with(file_path, format=input_format)
+
+    # Check metadata
+    assert df.metadata["input_files"] == file_path
+    assert df.metadata["format"] == input_format
+    if images_dirs:
+        assert df.metadata["images_dirs"] == images_dirs
+
+
+@pytest.mark.parametrize(
+    "input_format",
+    [
+        "VIA",
+        "COCO",
+    ],
+)
+def test_df_bboxes_from_multiple_files(
+    input_format: Literal["VIA", "COCO"], multiple_input_files: dict
+):
+    """Test that the general bounding boxes loading function reads
+    correctly multiple files of the supported formats.
+    """
+    # Get format and list of files
+    list_files = multiple_input_files[input_format]
+
+    # Get paths, annotations and images
+    list_paths = [file["path"] for file in list_files]
+    list_n_annotations = [file["n_annotations"] for file in list_files]
+    list_n_images = [file["n_images"] for file in list_files]
+
+    # Read all files as a dataframe
+    df_all = _df_bboxes_from_multiple_files(list_paths, format=input_format)
+
+    # Check dataframe
+    assert_dataframe(
+        df_all,
+        expected_n_annotations=sum(list_n_annotations),
+        expected_n_images=sum(list_n_images),
+        expected_supercategories="animal",
+        expected_categories="crab",
+    )
+
+
+@pytest.mark.parametrize(
+    "input_format, validator, row_function, no_error_expected",
+    [
+        ("VIA", ValidVIA, _df_rows_from_valid_VIA_file, True),
+        ("COCO", ValidCOCO, _df_rows_from_valid_COCO_file, True),
+        ("unsupported", None, None, False),
+    ],
+)
+def test_df_bboxes_from_single_file(
+    input_format: Literal["VIA", "COCO"],
+    validator: type[ValidVIA] | type[ValidCOCO] | None,
+    row_function: Callable | None,
+    no_error_expected: bool,
+):
+    """Test that the ``_df_bboxes_from_single_file`` function delegates
+    correctly into the specific format readers.
+    """
+    file_path = Path("/mock/path/to/file")
+    function_to_mock = (
+        "ethology.annotations.io._df_bboxes_from_single_specific_file"
+    )
+
+    # If the format is supported, check that when calling
+    # `_df_bboxes_from_single_file`, `_df_bboxes_from_single_specific_file` is
+    # called under the hood with the correct arguments
+    if no_error_expected:
+        with patch(function_to_mock) as mock:
+            _df_bboxes_from_single_file(file_path, input_format)
+            mock.assert_called_once_with(
+                file_path,
+                validator=validator,
+                get_rows_from_file=row_function,
+            )
+    # If the format is not supported, check that an error is raised
+    else:
+        with pytest.raises(ValueError) as excinfo:
+            _df_bboxes_from_single_file(file_path, input_format)
+        assert "Unsupported format" in str(excinfo.value)
+
+
+@pytest.mark.parametrize(
+    (
+        "input_file, validator, row_function, "
+        "expected_n_annotations, expected_n_images"
+    ),
+    [
+        (
+            "VIA_JSON_sample_1.json",
+            ValidVIA,
+            _df_rows_from_valid_VIA_file,
+            4440,
+            50,
+        ),  # medium VIA file
+        (
+            "VIA_JSON_sample_2.json",
+            ValidVIA,
+            _df_rows_from_valid_VIA_file,
+            3977,
+            50,
+        ),  # medium VIA file
+        (
+            "small_bboxes_VIA.json",
+            ValidVIA,
+            _df_rows_from_valid_VIA_file,
+            3,
+            3,
+        ),  # small VIA file
+        (
+            "COCO_JSON_sample_1.json",
+            ValidCOCO,
+            _df_rows_from_valid_COCO_file,
+            4344,
+            100,
+        ),  # medium COCO file
+        (
+            "COCO_JSON_sample_2.json",
+            ValidCOCO,
+            _df_rows_from_valid_COCO_file,
+            4618,
+            100,
+        ),  # medium COCO file
+        (
+            "small_bboxes_COCO.json",
+            ValidCOCO,
+            _df_rows_from_valid_COCO_file,
+            3,
+            3,
+        ),  # small COCO file
+    ],
+)
+def test_df_bboxes_from_single_specific_file(
+    input_file: str,
+    validator: type[ValidVIA] | type[ValidCOCO],
+    row_function: Callable,
+    expected_n_annotations: int,
+    expected_n_images: int,
+    annotations_test_data: dict,
+):
+    """Test the specific bounding box format readers."""
+    # Compute bboxes dataframe from a single file
+    df = _df_bboxes_from_single_specific_file(
+        file_path=annotations_test_data[input_file],
+        validator=validator,
+        get_rows_from_file=row_function,
+    )
+
+    # Check dataframe
+    # (we only check annotations per image in small datasets)
+    assert_dataframe(
+        df,
+        expected_n_annotations,
+        expected_n_images,
+        expected_supercategories="animal",
+        expected_categories="crab",
+        expected_annots_per_image=1 if expected_n_images < 5 else None,
+    )
+
+
+@pytest.mark.parametrize(
+    ("input_file, validator, row_function"),
+    [
+        (
+            "small_bboxes_duplicates_VIA.json",
+            ValidVIA,
+            _df_rows_from_valid_VIA_file,
+        ),
+        (
+            "small_bboxes_duplicates_COCO.json",
+            ValidCOCO,
+            _df_rows_from_valid_COCO_file,
+        ),
+    ],
+)
+def test_df_bboxes_from_single_specific_file_duplicates(
+    input_file: str,
+    validator: type[ValidVIA] | type[ValidCOCO],
+    row_function: Callable,
+    annotations_test_data: dict,
+):
+    """Test the specific bounding box format readers when the input file
+    contains duplicate annotations.
+    """
+    # Properties of input data
+    # one annotation is duplicated in the first frame
+    expected_n_annotations_w_duplicates = 4
+    expected_n_annotations_wo_duplicates = 3
+    expected_n_images = 3
+
+    # Extract rows
+    rows = row_function(file_path=annotations_test_data[input_file])
+
+    # Check total number of annotations including duplicates
+    assert len(rows) == expected_n_annotations_w_duplicates
+
+    # Compute bboxes dataframe
+    df = _df_bboxes_from_single_specific_file(
+        file_path=annotations_test_data[input_file],
+        validator=validator,
+        get_rows_from_file=row_function,
+    )
+
+    # Check dataframe has no duplicates
+    assert_dataframe(
+        df,
+        expected_n_annotations_wo_duplicates,
+        expected_n_images,
+        expected_supercategories="animal",
+        expected_categories="crab",
+    )
+
+
+@pytest.mark.parametrize(
+    ("input_file, validator, row_function, expected_exception"),
+    [
+        (
+            "small_bboxes_no_cat_VIA.json",
+            ValidVIA,
+            _df_rows_from_valid_VIA_file,
+            does_not_raise(),
+        ),
+        (
+            "small_bboxes_no_cat_COCO.json",
+            ValidCOCO,
+            _df_rows_from_valid_COCO_file,
+            pytest.raises(KeyError),
+        ),
+    ],
+)
+def test_df_bboxes_from_single_specific_file_no_cat(
+    input_file: str,
+    validator: type[ValidVIA] | type[ValidCOCO],
+    row_function: Callable,
+    expected_exception: pytest.raises,
+    annotations_test_data: dict,
+):
+    """Test the specific bounding box format readers when the input file
+    has annotations with no category.
+    """
+    # Compute bboxes dataframe with input file that has no categories
+    # (this should raise an error for COCO files)
+    with expected_exception as excinfo:
+        df = _df_bboxes_from_single_specific_file(
+            file_path=annotations_test_data[input_file],
+            validator=validator,
+            get_rows_from_file=row_function,
+        )
+
+    # If no error expected, check that the dataframe has empty categories
+    if not excinfo:
+        assert all(df.loc[:, "category"] == "")
+        assert all(df.loc[:, "supercategory"] == "")
+
+
+@pytest.mark.parametrize(
+    "input_file, expected_n_annotations",
+    [
+        ("VIA_JSON_sample_1.json", 4440),
+        ("VIA_JSON_sample_2.json", 3977),
+        ("small_bboxes_VIA.json", 3),
+        ("small_bboxes_duplicates_VIA.json", 4),  # contains duplicates
+    ],
+)
+def test_df_rows_from_valid_VIA_file(
+    input_file: str,
+    expected_n_annotations: int,
+    annotations_test_data: dict,
+):
+    """Test the extraction of rows from a valid VIA file."""
+    rows = _df_rows_from_valid_VIA_file(
+        file_path=annotations_test_data[input_file]
+    )
+
+    # Check number of rows
+    assert len(rows) == expected_n_annotations
+
+    # Check each row contains required column data
+    # Note that "image_width" and "image_height" are not exported to the
+    # VIA file
+    for row in rows:
+        assert all(
+            key in row
+            for key in [STANDARD_BBOXES_DF_INDEX] + STANDARD_BBOXES_DF_COLUMNS
+            if key not in ["image_width", "image_height"]
+        )
+
+
+@pytest.mark.parametrize(
+    "input_file, expected_n_annotations",
+    [
+        ("COCO_JSON_sample_1.json", 4344),
+        ("COCO_JSON_sample_2.json", 4618),
+        ("small_bboxes_COCO.json", 3),
+        ("small_bboxes_duplicates_COCO.json", 4),  # contains duplicates
+    ],
+)
+def test_df_rows_from_valid_COCO_file(
+    input_file: str,
+    expected_n_annotations: int,
+    annotations_test_data: dict,
+):
+    """Test the extraction of rows from a valid COCO file."""
+    rows = _df_rows_from_valid_COCO_file(
+        file_path=annotations_test_data[input_file]
+    )
+
+    # Check number of rows
+    assert len(rows) == expected_n_annotations
+
+    # Check each row contains required column data
+    for row in rows:
+        assert all(
+            key in row
+            for key in [STANDARD_BBOXES_DF_INDEX] + STANDARD_BBOXES_DF_COLUMNS
+        )
+
+
+@pytest.mark.parametrize(
+    "duplicates_kwargs, expected_exception",
+    [
+        ({"ignore_index": True}, pytest.raises(ValueError)),
+        ({"inplace": True}, pytest.raises(ValueError)),
+        ({"subset": "image_id"}, does_not_raise()),
+        ({"keep": "last"}, does_not_raise()),
+    ],
+)
+@pytest.mark.parametrize(
+    "input_format, filename",
+    [
+        ("VIA", "small_bboxes_duplicates_VIA.json"),
+        ("VIA", "MULTIPLE_VIA_FILES"),
+        ("COCO", "small_bboxes_duplicates_COCO.json"),
+        ("COCO", "MULTIPLE_COCO_FILES"),
+    ],
+)
+def test_df_bboxes_from_files_kwargs(
+    input_format: Literal["VIA", "COCO"],
+    filename: str | list[str],
+    duplicates_kwargs: dict,
+    expected_exception: pytest.raises,
+    annotations_test_data: dict,
+    multiple_input_files: dict,
+):
+    # Check kwargs behaviour when passing multiple files
+    if "MULTIPLE" in filename:
+        list_files = multiple_input_files[input_format]
+
+        input_files = [file["path"] for file in list_files]
+        list_n_annotations = [file["n_annotations"] for file in list_files]
+        list_n_images = [file["n_images"] for file in list_files]
+
+        expected_n_annotations = sum(list_n_annotations)
+        expected_n_images = sum(list_n_images)
+        expected_annots_per_image = None
+
+    # Check kwargs behaviour when passing a single file
+    else:
+        input_files = annotations_test_data[filename]
+        expected_n_annotations = 3
+        expected_n_images = 3
+        expected_annots_per_image = 1
+
+    # Compute dataframe and check if an error is raised
+    with expected_exception as excinfo:
+        df = df_bboxes_from_files(
+            input_files,
+            format=input_format,
+            **duplicates_kwargs,
+        )
+    if excinfo:
+        assert (
+            "argument for `pandas.DataFrame.drop_duplicates` "
+            "may not be overridden." in str(excinfo.value)
+        )
+
+    # If no error expected: check dataframe content
+    if expected_exception == does_not_raise():
+        assert_dataframe(
+            df,
+            expected_n_annotations=expected_n_annotations,
+            expected_n_images=expected_n_images,
+            expected_supercategories="animal",
+            expected_categories="crab",
+            expected_annots_per_image=expected_annots_per_image,
+        )