Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

streaming export #9084

Open
wants to merge 3 commits into
base: dl/update-datumaro
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 24 additions & 15 deletions cvat/apps/dataset_manager/bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1660,11 +1660,8 @@ def __init__(
self._user = self._load_user_info(instance_meta) if dimension == DimensionType.DIM_3D else {}
self._dimension = dimension
self._format_type = format_type

is_video = instance_meta['mode'] == 'interpolation'
ext = ''
if is_video:
ext = TaskFrameProvider.VIDEO_FRAME_EXT
self._instance_data = instance_data
self._include_images = include_images

if dimension == DimensionType.DIM_3D or include_images:
if isinstance(instance_data, TaskData):
Expand All @@ -1678,23 +1675,29 @@ def __init__(
{0: MediaSource(db_task)}
)

dm_items: list[dm.DatasetItem] = []
for frame_data in instance_data.group_by_frame(include_empty=True):
def __iter__(self):
instance_meta = self._instance_data.meta[self._instance_data.META_FIELD]
is_video = instance_meta['mode'] == 'interpolation'
ext = ''
if is_video:
ext = TaskFrameProvider.VIDEO_FRAME_EXT

for frame_data in self._instance_data.group_by_frame(include_empty=True):
dm_media_args = { 'path': frame_data.name + ext }
if dimension == DimensionType.DIM_3D:
if self._dimension == DimensionType.DIM_3D:
dm_media: dm.PointCloud = self._media_provider.get_media_for_frame(
0, frame_data.id, **dm_media_args
)

if not include_images:
if not self._include_images:
dm_media_args["extra_images"] = [
dm.Image.from_file(path=osp.basename(image.path))
for image in dm_media.extra_images
]
dm_media = dm.PointCloud.from_file(**dm_media_args)
else:
dm_media_args['size'] = (frame_data.height, frame_data.width)
if include_images:
if self._include_images:
dm_media: dm.Image = self._media_provider.get_media_for_frame(
0, frame_data.idx, **dm_media_args
)
Expand All @@ -1705,16 +1708,16 @@ def __init__(

dm_attributes = {'frame': frame_data.frame}

if dimension == DimensionType.DIM_2D:
if self._dimension == DimensionType.DIM_2D:
dm_item = dm.DatasetItem(
id=osp.splitext(frame_data.name)[0],
subset=frame_data.subset,
annotations=dm_anno,
media=dm_media,
attributes=dm_attributes,
)
elif dimension == DimensionType.DIM_3D:
if format_type == "sly_pointcloud":
elif self._dimension == DimensionType.DIM_3D:
if self._format_type == "sly_pointcloud":
dm_attributes["name"] = self._user["name"]
dm_attributes["createdAt"] = self._user["createdAt"]
dm_attributes["updatedAt"] = self._user["updatedAt"]
Expand All @@ -1731,9 +1734,10 @@ def __init__(
attributes=dm_attributes,
)

dm_items.append(dm_item)
yield dm_item

self._items = dm_items
def __len__(self):
return len(self._instance_data)

def _read_cvat_anno(self, cvat_frame_anno: CommonData.Frame, labels: list):
categories = self.categories()
Expand All @@ -1747,6 +1751,11 @@ def map_label(name, parent=''): return label_cat.find(name, parent)[0]
return self.convert_annotations(cvat_frame_anno,
label_attrs, map_label, self._format_type, self._dimension)

@property
def is_stream(self) -> bool:
return True


class CVATProjectDataExtractor(dm.DatasetBase, CVATDataExtractorMixin):
def __init__(
self,
Expand Down
9 changes: 6 additions & 3 deletions cvat/apps/dataset_manager/formats/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
import zipfile

from datumaro.components.annotation import AnnotationType
from datumaro.components.dataset import Dataset
from datumaro.components.dataset import Dataset, StreamDataset
from datumaro.plugins.data_formats.coco.importer import CocoImporter

from cvat.apps.dataset_manager.bindings import (
GetCVATDataExtractor,
NoMediaInAnnotationFileError,
ProjectData,
detect_dataset,
import_dm_annotations,
)
Expand All @@ -23,7 +24,8 @@
@exporter(name="COCO", ext="ZIP", version="1.0")
def _export(dst_file, temp_dir, instance_data, save_images=False):
with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor:
dataset = Dataset.from_extractors(extractor, env=dm_env)
dataset_cls = Dataset if isinstance(instance_data, ProjectData) else StreamDataset
dataset = dataset_cls.from_extractors(extractor, env=dm_env)
dataset.export(temp_dir, "coco_instances", save_media=save_images, merge_images=False)

make_zip_archive(temp_dir, dst_file)
Expand All @@ -50,7 +52,8 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs
@exporter(name="COCO Keypoints", ext="ZIP", version="1.0")
def _export(dst_file, temp_dir, instance_data, save_images=False):
with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor:
dataset = Dataset.from_extractors(extractor, env=dm_env)
dataset_cls = Dataset if isinstance(instance_data, ProjectData) else StreamDataset
dataset = dataset_cls.from_extractors(extractor, env=dm_env)
dataset.export(
temp_dir, "coco_person_keypoints", save_media=save_images, merge_images=False
)
Expand Down
7 changes: 5 additions & 2 deletions cvat/apps/dataset_manager/formats/yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Callable, Optional

from datumaro.components.annotation import AnnotationType
from datumaro.components.dataset import StreamDataset
from datumaro.components.dataset_base import DatasetItem
from datumaro.components.project import Dataset
from pyunpack import Archive
Expand Down Expand Up @@ -36,7 +37,8 @@ def _export_common(
**kwargs,
):
with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor:
dataset = Dataset.from_extractors(extractor, env=dm_env)
dataset_cls = Dataset if isinstance(instance_data, ProjectData) else StreamDataset
dataset = dataset_cls.from_extractors(extractor, env=dm_env)
dataset.export(temp_dir, format_name, save_media=save_images, **kwargs)

make_zip_archive(temp_dir, dst_file)
Expand Down Expand Up @@ -109,7 +111,8 @@ def _export_yolo_ultralytics_oriented_boxes(*args, **kwargs):
@exporter(name="Ultralytics YOLO Segmentation", ext="ZIP", version="1.0")
def _export_yolo_ultralytics_segmentation(dst_file, temp_dir, instance_data, *, save_images=False):
with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor:
dataset = Dataset.from_extractors(extractor, env=dm_env)
dataset_cls = Dataset if isinstance(instance_data, ProjectData) else StreamDataset
dataset = dataset_cls.from_extractors(extractor, env=dm_env)
dataset = dataset.transform("masks_to_polygons")
dataset.export(temp_dir, "yolo_ultralytics_segmentation", save_media=save_images)

Expand Down
2 changes: 1 addition & 1 deletion cvat/requirements/base.in
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ azure-storage-blob==12.13.0
boto3==1.17.61
clickhouse-connect==0.6.8
coreapi==2.3.3
datumaro @ git+https://github.com/cvat-ai/datumaro.git@58c6ecbdd9f630d5b0b344b2bb98e66c209eaeb2
datumaro @ git+https://github.com/cvat-ai/datumaro.git@33a69fc42c58445968a117ec450f4436e1adefe0
dj-pagination==2.5.0
# Despite direct indication allauth in requirements we should keep 'with_social' for dj-rest-auth
# to avoid possible further versions conflicts (we use registration functionality)
Expand Down
2 changes: 1 addition & 1 deletion cvat/requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ cryptography==44.0.0
# pyjwt
cycler==0.12.1
# via matplotlib
datumaro @ git+https://github.com/cvat-ai/datumaro.git@58c6ecbdd9f630d5b0b344b2bb98e66c209eaeb2
datumaro @ git+https://github.com/cvat-ai/datumaro.git@33a69fc42c58445968a117ec450f4436e1adefe0
# via -r cvat/requirements/base.in
defusedxml==0.7.1
# via
Expand Down
Loading