Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add iSAID dataset #2550

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/api/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,11 @@ HySpecNet-11k

.. autoclass:: HySpecNet11k

iSAID
^^^^^

.. autoclass:: ISAID

IDTReeS
^^^^^^^

Expand Down
1 change: 1 addition & 0 deletions docs/api/datasets/non_geo_datasets.csv
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Dataset,Task,Source,License,# Samples,# Classes,Size (px),Resolution (m),Bands
`HySpecNet-11k`_,-,EnMAP,CC0-1.0,11k,-,128,30,HSI
`IDTReeS`_,"OD,C",Aerial,"CC-BY-4.0",591,33,200x200,0.1--1,RGB
`Inria Aerial Image Labeling`_,S,Aerial,-,360,2,"5,000x5,000",0.3,RGB
`iSAID`_,"OD,I",Aerial,"CC-BY-NC-4.0","2,806",15,"varies","varies",RGB
`LandCover.ai`_,S,Aerial,"CC-BY-NC-SA-4.0","10,674",5,512x512,0.25--0.5,RGB
`LEVIR-CD`_,CD,Google Earth,-,637,2,"1,024x1,024",0.5,RGB
`LEVIR-CD+`_,CD,Google Earth,-,985,2,"1,024x1,024",0.5,RGB
Expand Down
135 changes: 135 additions & 0 deletions tests/data/isaid/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import hashlib
import json
import os
import shutil
import tarfile
from pathlib import Path

import numpy as np
from PIL import Image


def create_dummy_image(path: Path, size: tuple[int, int] = (64, 64)) -> None:
"""Create dummy RGB image."""
img = np.random.randint(0, 255, (*size, 3), dtype=np.uint8)
Image.fromarray(img).save(path)


def create_coco_annotations(split: str, num_images: int) -> dict:
"""Create COCO format annotations."""
return {
'info': {'year': 2023, 'version': '1.0'},
'images': [
{'id': i, 'file_name': f'P{i:04d}.png', 'height': 64, 'width': 64}
for i in range(num_images)
],
'annotations': [
{
'id': i,
'image_id': i // 2, # 2 annotations per image
'category_id': i % 15,
'segmentation': [[10, 10, 20, 10, 20, 20, 10, 20]],
'area': 100,
'bbox': [10, 10, 10, 10],
'iscrowd': 0,
}
for i in range(num_images * 2)
],
'categories': [
{'id': i, 'name': name}
for i, name in enumerate(
[
'plane',
'ship',
'storage tank',
'baseball diamond',
'tennis court',
'basketball court',
'ground track field',
'harbor',
'bridge',
'vehicle',
'helicopter',
'roundabout',
'swimming pool',
'soccer ball field',
'container crane',
]
)
],
}


def create_test_data(root: Path) -> None:
"""Create iSAID test dataset."""
splits = {'train': 3, 'val': 2}

for split, num_samples in splits.items():
if os.path.exists(root / split):
shutil.rmtree(root / split)

# Create directories
for subdir in ['images', 'Annotations', 'Instance_masks', 'Semantic_masks']:
(root / split / subdir).mkdir(parents=True, exist_ok=True)

# Create images and masks
for i in range(num_samples):
# RGB image
create_dummy_image(root / split / 'images' / f'P{i:04d}.png')

# Instance mask (R+G*256+B*256^2 encoding)
instance_mask = np.zeros((64, 64, 3), dtype=np.uint8)
instance_mask[10:20, 10:20, 0] = i + 1 # R channel for unique IDs
Image.fromarray(instance_mask).save(
root / split / 'Instance_masks' / f'P{i:04d}.png'
)

# Semantic mask (similar encoding for class IDs)
semantic_mask = np.zeros((64, 64, 3), dtype=np.uint8)
semantic_mask[10:20, 10:20, 0] = 1 # Class ID 1
Image.fromarray(semantic_mask).save(
root / split / 'Semantic_masks' / f'P{i:04d}.png'
)

# Create COCO annotations
annotations = create_coco_annotations(split, num_samples)
with open(root / split / 'Annotations' / f'iSAID_{split}.json', 'w') as f:
json.dump(annotations, f)

# Create image tar
img_tar = f'dotav1_images_{split}.tar.gz'
with tarfile.open(root / img_tar, 'w:gz') as tar:
tar.add(root / split / 'images', arcname=os.path.join(split, 'images'))

# Create annotations tar with all splits
ann_tar = f'isaid_annotations_{split}.tar.gz'
with tarfile.open(root / ann_tar, 'w:gz') as tar:
for split in splits:
for subdir in ['Annotations', 'Instance_masks', 'Semantic_masks']:
tar.add(root / split / subdir, arcname=os.path.join(split, subdir))

# print md5sums
def md5(fname: str) -> str:
hash_md5 = hashlib.md5()
with open(fname, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b''):
hash_md5.update(chunk)
return hash_md5.hexdigest()

# Print MD5 checksums
for split in splits:
print(
f'MD5 for dotav1_images_{split}.tar.gz: '
f'{md5(root / f"dotav1_images_{split}.tar.gz")}'
)
print(
f'MD5 for isaid_annotations_{split}.tar.gz: {md5(root / f"isaid_annotations_{split}.tar.gz")}'
)


if __name__ == '__main__':
root = Path('.')
create_test_data(root)
Binary file added tests/data/isaid/dotav1_images_train.tar.gz
Binary file not shown.
Binary file added tests/data/isaid/dotav1_images_val.tar.gz
Binary file not shown.
Binary file added tests/data/isaid/isaid_annotations_train.tar.gz
Binary file not shown.
Binary file added tests/data/isaid/isaid_annotations_val.tar.gz
Binary file not shown.
1 change: 1 addition & 0 deletions tests/data/isaid/train/Annotations/iSAID_train.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"info": {"year": 2023, "version": "1.0"}, "images": [{"id": 0, "file_name": "P0000.png", "height": 64, "width": 64}, {"id": 1, "file_name": "P0001.png", "height": 64, "width": 64}, {"id": 2, "file_name": "P0002.png", "height": 64, "width": 64}], "annotations": [{"id": 0, "image_id": 0, "category_id": 0, "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]], "area": 100, "bbox": [10, 10, 10, 10], "iscrowd": 0}, {"id": 1, "image_id": 0, "category_id": 1, "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]], "area": 100, "bbox": [10, 10, 10, 10], "iscrowd": 0}, {"id": 2, "image_id": 1, "category_id": 2, "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]], "area": 100, "bbox": [10, 10, 10, 10], "iscrowd": 0}, {"id": 3, "image_id": 1, "category_id": 3, "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]], "area": 100, "bbox": [10, 10, 10, 10], "iscrowd": 0}, {"id": 4, "image_id": 2, "category_id": 4, "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]], "area": 100, "bbox": [10, 10, 10, 10], "iscrowd": 0}, {"id": 5, "image_id": 2, "category_id": 5, "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]], "area": 100, "bbox": [10, 10, 10, 10], "iscrowd": 0}], "categories": [{"id": 0, "name": "plane"}, {"id": 1, "name": "ship"}, {"id": 2, "name": "storage tank"}, {"id": 3, "name": "baseball diamond"}, {"id": 4, "name": "tennis court"}, {"id": 5, "name": "basketball court"}, {"id": 6, "name": "ground track field"}, {"id": 7, "name": "harbor"}, {"id": 8, "name": "bridge"}, {"id": 9, "name": "vehicle"}, {"id": 10, "name": "helicopter"}, {"id": 11, "name": "roundabout"}, {"id": 12, "name": "swimming pool"}, {"id": 13, "name": "soccer ball field"}, {"id": 14, "name": "container crane"}]}
Binary file added tests/data/isaid/train/Instance_masks/P0000.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/isaid/train/Instance_masks/P0001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/isaid/train/Instance_masks/P0002.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/isaid/train/Semantic_masks/P0000.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/isaid/train/Semantic_masks/P0001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/isaid/train/Semantic_masks/P0002.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/isaid/train/images/P0000.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/isaid/train/images/P0001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/isaid/train/images/P0002.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions tests/data/isaid/val/Annotations/iSAID_val.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"info": {"year": 2023, "version": "1.0"}, "images": [{"id": 0, "file_name": "P0000.png", "height": 64, "width": 64}, {"id": 1, "file_name": "P0001.png", "height": 64, "width": 64}], "annotations": [{"id": 0, "image_id": 0, "category_id": 0, "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]], "area": 100, "bbox": [10, 10, 10, 10], "iscrowd": 0}, {"id": 1, "image_id": 0, "category_id": 1, "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]], "area": 100, "bbox": [10, 10, 10, 10], "iscrowd": 0}, {"id": 2, "image_id": 1, "category_id": 2, "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]], "area": 100, "bbox": [10, 10, 10, 10], "iscrowd": 0}, {"id": 3, "image_id": 1, "category_id": 3, "segmentation": [[10, 10, 20, 10, 20, 20, 10, 20]], "area": 100, "bbox": [10, 10, 10, 10], "iscrowd": 0}], "categories": [{"id": 0, "name": "plane"}, {"id": 1, "name": "ship"}, {"id": 2, "name": "storage tank"}, {"id": 3, "name": "baseball diamond"}, {"id": 4, "name": "tennis court"}, {"id": 5, "name": "basketball court"}, {"id": 6, "name": "ground track field"}, {"id": 7, "name": "harbor"}, {"id": 8, "name": "bridge"}, {"id": 9, "name": "vehicle"}, {"id": 10, "name": "helicopter"}, {"id": 11, "name": "roundabout"}, {"id": 12, "name": "swimming pool"}, {"id": 13, "name": "soccer ball field"}, {"id": 14, "name": "container crane"}]}
Binary file added tests/data/isaid/val/Instance_masks/P0000.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/isaid/val/Instance_masks/P0001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/isaid/val/Semantic_masks/P0000.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/isaid/val/Semantic_masks/P0001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/isaid/val/images/P0000.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/isaid/val/images/P0001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
104 changes: 104 additions & 0 deletions tests/datasets/test_isaid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import os
import shutil
from pathlib import Path

import pytest
import torch
import torch.nn as nn
from _pytest.fixtures import SubRequest
from pytest import MonkeyPatch

from torchgeo.datasets import ISAID, DatasetNotFoundError

pytest.importorskip('pycocotools')


class TestISAID:
@pytest.fixture(params=['train', 'val'])
def dataset(
self, monkeypatch: MonkeyPatch, tmp_path: Path, request: SubRequest
) -> ISAID:
url = os.path.join('tests', 'data', 'isaid', '{}')
monkeypatch.setattr(ISAID, 'img_url', url)
monkeypatch.setattr(ISAID, 'label_url', url)

img_files = {
'train': {
'filename': 'dotav1_images_train.tar.gz',
'md5': 'a38ad9832066e2ca6d30b8eec65f9ce8',
},
'val': {
'filename': 'dotav1_images_val.tar.gz',
'md5': '154babe8091484bd85c6340f43cea1ea',
},
}

monkeypatch.setattr(ISAID, 'img_files', img_files)

label_files = {
'train': {
'filename': 'isaid_annotations_train.tar.gz',
'md5': 'f4de0f6b38f1b11b121dc01c880aeb2a',
},
'val': {
'filename': 'isaid_annotations_val.tar.gz',
'md5': '88eccdf9744c201248266b9a784ffeab',
},
}
monkeypatch.setattr(ISAID, 'label_files', label_files)

root = tmp_path
split = request.param

transforms = nn.Identity()

return ISAID(root, split, transforms=transforms, download=True, checksum=True)

def test_getitem(self, dataset: ISAID) -> None:
for i in range(len(dataset)):
x = dataset[i]
assert isinstance(x, dict)
assert isinstance(x['image'], torch.Tensor)
assert isinstance(x['masks'], torch.Tensor)
assert isinstance(x['boxes'], torch.Tensor)

def test_len(self, dataset: ISAID) -> None:
if dataset.split == 'train':
assert len(dataset) == 3
else:
assert len(dataset) == 2

def test_already_downloaded(self, dataset: ISAID) -> None:
ISAID(root=dataset.root, download=True)

def test_not_yet_extracted(self, tmp_path: Path) -> None:
files = [
'dotav1_images_train.tar.gz',
'dotav1_images_val.tar.gz',
'isaid_annotations_train.tar.gz',
'isaid_annotations_val.tar.gz',
]
for path in files:
shutil.copyfile(
os.path.join('tests', 'data', 'isaid', path),
os.path.join(str(tmp_path), path),
)

ISAID(root=tmp_path)

def test_invalid_split(self) -> None:
with pytest.raises(AssertionError):
ISAID(split='foo')

def test_corrupted(self, tmp_path: Path) -> None:
with open(os.path.join(tmp_path, 'dotav1_images_train.tar.gz'), 'w') as f:
f.write('bad')
with pytest.raises(RuntimeError, match='Archive'):
ISAID(root=tmp_path, checksum=True)

def test_not_downloaded(self, tmp_path: Path) -> None:
with pytest.raises(DatasetNotFoundError, match='Dataset not found'):
ISAID(tmp_path)
2 changes: 2 additions & 0 deletions torchgeo/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
from .inaturalist import INaturalist
from .inria import InriaAerialImageLabeling
from .iobench import IOBench
from .isaid import ISAID
from .l7irish import L7Irish
from .l8biome import L8Biome
from .landcoverai import LandCoverAI, LandCoverAI100, LandCoverAIBase, LandCoverAIGeo
Expand Down Expand Up @@ -163,6 +164,7 @@
'FAIR1M',
'GBIF',
'GID15',
'ISAID',
'LEVIRCD',
'MDAS',
'NAIP',
Expand Down
Loading