Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DOTA dataset #2551

Merged
merged 17 commits into from
Mar 12, 2025
4 changes: 4 additions & 0 deletions docs/api/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,10 @@ DL4GAM
^^^^^^
.. autoclass:: DL4GAMAlps

DOTA
^^^^
.. autoclass:: DOTA

ETCI2021 Flood Detection
^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
1 change: 1 addition & 0 deletions docs/api/datasets/non_geo_datasets.csv
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Dataset,Task,Source,License,# Samples,# Classes,Size (px),Resolution (m),Bands
`DFC2022`_,S,Aerial,"CC-BY-4.0","3,981",15,"2,000x2,000",0.5,RGB
`Digital Typhoon`_,"C, R",Himawari,"CC-BY-4.0","189,364",8,512,5000,Infrared
`DL4GAM`_,S,"Sentinel-2","CC-BY-4.0","2,251 or 11,440","2","256x256","10","MSI"
`DOTA`_,OD,"Google Earth, Gaofen-2, Jilin-1, CycloMedia B.V.","CC-BY-NC-4.0","5,229",15,"1000-5000",RGB
`ETCI2021 Flood Detection`_,S,Sentinel-1,-,"66,810",2,256x256,5--20,SAR
`EuroSAT`_,C,Sentinel-2,"MIT","27,000",10,64x64,10,MSI
`EverWatch`_,OD,Aerial,"CC0-1.0","5,325",7,"1,500x1500p",0.01,RGB
Expand Down
159 changes: 159 additions & 0 deletions tests/data/dota/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import hashlib
import os
import shutil
import tarfile
from pathlib import Path

import numpy as np
import pandas as pd
from PIL import Image


def create_dummy_image(path: Path, size: tuple[int, int] = (64, 64)) -> None:
"""Create small dummy image."""
img = np.random.randint(0, 255, (*size, 3), dtype=np.uint8)
Image.fromarray(img).save(path)


def create_annotation_file(
path: Path, is_hbb: bool = False, no_boxes: bool = False
) -> None:
"""Create dummy annotation file with scaled coordinates."""
if is_hbb:
# Horizontal boxes scaled for 64x64
boxes = [
'10.0 10.0 20.0 10.0 20.0 20.0 10.0 20.0 plane 0\n',
'30.0 30.0 40.0 30.0 40.0 40.0 30.0 40.0 ship 0\n',
]
else:
# Oriented boxes scaled for 64x64
boxes = [
'10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0\n',
'30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0\n',
]

if no_boxes:
boxes = []

with open(path, 'w') as f:
f.write('imagesource:dummy\n')
f.write('gsd:1.0\n')
f.writelines(boxes)


def create_test_data(root: Path) -> None:
"""Create DOTA test dataset."""
splits = ['train', 'val']
versions = ['1.0', '1.5', '2.0']

# Create directory structure
for split in splits:
num_samples = 3 if split == 'train' else 2

if os.path.exists(root / split):
shutil.rmtree(root / split)
for version in versions:
# Create images and annotations
for i in range(num_samples):
img_name = f'P{i:04d}.png'
ann_name = f'P{i:04d}.txt'

# Create directories
(root / split / 'images').mkdir(parents=True, exist_ok=True)
(root / split / 'annotations' / f'version{version}').mkdir(
parents=True, exist_ok=True
)

# Create files
if i == 0:
no_boxes = True
else:
no_boxes = False
create_dummy_image(root / split / 'images' / img_name)
create_annotation_file(
root / split / 'annotations' / f'version{version}' / ann_name,
False,
no_boxes,
)

# Create tar archives
for type_ in ['images', 'annotations']:
src_dir = root / split / type_
if src_dir.exists():
tar_name = f'dotav{version}_{type_}_{split}.tar.gz'
with tarfile.open(root / tar_name, 'w:gz') as tar:
tar.add(src_dir, arcname=f'{split}/{type_}')

# print md5sums
def md5(fname: str) -> str:
hash_md5 = hashlib.md5()
with open(fname, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b''):
hash_md5.update(chunk)
return hash_md5.hexdigest()

print('file_info = {')
for split in splits:
print(f" '{split}': {{")

for type_ in ['images', 'annotations']:
print(f" '{type_}': {{")

for version in versions:
tar_name = f'dotav{version}_{type_}_{split}.tar.gz'
checksum = md5(tar_name)

# version 1.0 and 1.5 have the same images
if version == '1.5' and type_ == 'images':
version_filename = '1.0'
else:
version_filename = version

print(f" '{version}': {{")
print(
f" 'filename': 'dotav{version_filename}_{type_}_{split}.tar.gz',"
)
print(f" 'md5': '{checksum}',")
print(' },')

print(' },')

print(' },')
print('}')


def create_sample_df(root: Path) -> pd.DataFrame:
"""Create sample DataFrame for test data."""
rows = []
splits = ['train', 'val']
versions = ['1.0', '1.5', '2.0']

for split in splits:
num_samples = 3 if split == 'train' else 2
for version in versions:
for i in range(num_samples):
img_name = f'P{i:04d}.png'
ann_name = f'P{i:04d}.txt'

row = {
'image_path': str(Path(split) / 'images' / img_name),
'annotation_path': str(
Path(split) / 'annotations' / f'version{version}' / ann_name
),
'split': split,
'version': version,
}
rows.append(row)

df = pd.DataFrame(rows)
df.to_csv(root / 'samples.csv')
return df


if __name__ == '__main__':
root = Path('.')
create_test_data(root)
df = create_sample_df(root)
Binary file added tests/data/dota/dotav1.0_annotations_train.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav1.0_annotations_val.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav1.0_images_train.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav1.0_images_val.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav1.5_annotations_train.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav1.5_annotations_val.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav1.5_images_train.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav1.5_images_val.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav2.0_annotations_train.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav2.0_annotations_val.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav2.0_images_train.tar.gz
Binary file not shown.
Binary file added tests/data/dota/dotav2.0_images_val.tar.gz
Binary file not shown.
16 changes: 16 additions & 0 deletions tests/data/dota/samples.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
,image_path,annotation_path,split,version
0,train/images/P0000.png,train/annotations/version1.0/P0000.txt,train,1.0
1,train/images/P0001.png,train/annotations/version1.0/P0001.txt,train,1.0
2,train/images/P0002.png,train/annotations/version1.0/P0002.txt,train,1.0
3,train/images/P0000.png,train/annotations/version1.5/P0000.txt,train,1.5
4,train/images/P0001.png,train/annotations/version1.5/P0001.txt,train,1.5
5,train/images/P0002.png,train/annotations/version1.5/P0002.txt,train,1.5
6,train/images/P0000.png,train/annotations/version2.0/P0000.txt,train,2.0
7,train/images/P0001.png,train/annotations/version2.0/P0001.txt,train,2.0
8,train/images/P0002.png,train/annotations/version2.0/P0002.txt,train,2.0
9,val/images/P0000.png,val/annotations/version1.0/P0000.txt,val,1.0
10,val/images/P0001.png,val/annotations/version1.0/P0001.txt,val,1.0
11,val/images/P0000.png,val/annotations/version1.5/P0000.txt,val,1.5
12,val/images/P0001.png,val/annotations/version1.5/P0001.txt,val,1.5
13,val/images/P0000.png,val/annotations/version2.0/P0000.txt,val,2.0
14,val/images/P0001.png,val/annotations/version2.0/P0001.txt,val,2.0
Binary file added tests/data/dota/samples.parquet
Binary file not shown.
2 changes: 2 additions & 0 deletions tests/data/dota/train/annotations/version1.0/P0000.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
imagesource:dummy
gsd:1.0
4 changes: 4 additions & 0 deletions tests/data/dota/train/annotations/version1.0/P0001.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
4 changes: 4 additions & 0 deletions tests/data/dota/train/annotations/version1.0/P0002.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
2 changes: 2 additions & 0 deletions tests/data/dota/train/annotations/version1.5/P0000.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
imagesource:dummy
gsd:1.0
4 changes: 4 additions & 0 deletions tests/data/dota/train/annotations/version1.5/P0001.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
4 changes: 4 additions & 0 deletions tests/data/dota/train/annotations/version1.5/P0002.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
2 changes: 2 additions & 0 deletions tests/data/dota/train/annotations/version2.0/P0000.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
imagesource:dummy
gsd:1.0
4 changes: 4 additions & 0 deletions tests/data/dota/train/annotations/version2.0/P0001.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
4 changes: 4 additions & 0 deletions tests/data/dota/train/annotations/version2.0/P0002.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
Binary file added tests/data/dota/train/images/P0000.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dota/train/images/P0001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dota/train/images/P0002.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions tests/data/dota/val/annotations/version1.0/P0000.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
imagesource:dummy
gsd:1.0
4 changes: 4 additions & 0 deletions tests/data/dota/val/annotations/version1.0/P0001.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
2 changes: 2 additions & 0 deletions tests/data/dota/val/annotations/version1.5/P0000.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
imagesource:dummy
gsd:1.0
4 changes: 4 additions & 0 deletions tests/data/dota/val/annotations/version1.5/P0001.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
2 changes: 2 additions & 0 deletions tests/data/dota/val/annotations/version2.0/P0000.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
imagesource:dummy
gsd:1.0
4 changes: 4 additions & 0 deletions tests/data/dota/val/annotations/version2.0/P0001.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
imagesource:dummy
gsd:1.0
10.0 10.0 20.0 12.0 18.0 20.0 8.0 18.0 plane 0
30.0 30.0 42.0 32.0 40.0 40.0 28.0 38.0 ship 0
Binary file added tests/data/dota/val/images/P0000.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/dota/val/images/P0001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading