From 2a2dc86075f1d7ed58e144077812d640d662d6d2 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Fri, 7 Feb 2025 12:56:40 +0100 Subject: [PATCH 01/14] Add new API && remove deprecated && refactor working with rq meta --- cvat/apps/dataset_manager/bindings.py | 9 +- cvat/apps/dataset_manager/project.py | 9 +- cvat/apps/dataset_manager/views.py | 6 +- cvat/apps/engine/background.py | 589 ++++++------------- cvat/apps/engine/backup.py | 17 +- cvat/apps/engine/cache.py | 9 +- cvat/apps/engine/middleware.py | 8 + cvat/apps/engine/mixins.py | 86 +-- cvat/apps/engine/permissions.py | 83 ++- cvat/apps/engine/rq_job_handler.py | 169 +++++- cvat/apps/engine/serializers.py | 44 +- cvat/apps/engine/task.py | 45 +- cvat/apps/engine/utils.py | 15 +- cvat/apps/engine/views.py | 540 +++++------------ cvat/apps/events/export.py | 7 +- cvat/apps/events/handlers.py | 21 +- cvat/apps/lambda_manager/views.py | 24 +- cvat/apps/quality_control/quality_reports.py | 5 +- cvat/apps/quality_control/views.py | 4 +- 19 files changed, 746 insertions(+), 944 deletions(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index a2de70fce159..76b99a3aba04 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -44,7 +44,7 @@ ShapeType, Task, ) -from cvat.apps.engine.rq_job_handler import RQJobMetaField +from cvat.apps.engine.rq_job_handler import RQMeta from ..engine.log import ServerLogManager from .annotation import AnnotationIR, AnnotationManager, TrackManager @@ -2444,9 +2444,10 @@ def load_dataset_data(project_annotation, dataset: dm.Dataset, project_data): raise CvatImportError(f'Target project does not have label with name "{label.name}"') for subset_id, subset in enumerate(dataset.subsets().values()): job = rq.get_current_job() - job.meta[RQJobMetaField.STATUS] = 'Task from dataset is being created...' - job.meta[RQJobMetaField.PROGRESS] = (subset_id + job.meta.get(RQJobMetaField.TASK_PROGRESS, 0.)) / len(dataset.subsets().keys()) - job.save_meta() + job_meta = RQMeta.from_job(job) + job_meta.status = 'Task from dataset is being created...' + job_meta.progress = (subset_id + job_meta.task_progress or 0.) / len(dataset.subsets().keys()) + job_meta.save() task_fields = { 'project': project_annotation.db_project, diff --git a/cvat/apps/dataset_manager/project.py b/cvat/apps/dataset_manager/project.py index 8f91e4f12651..815a9337330b 100644 --- a/cvat/apps/dataset_manager/project.py +++ b/cvat/apps/dataset_manager/project.py @@ -17,7 +17,6 @@ from cvat.apps.dataset_manager.util import TmpDirManager from cvat.apps.engine import models from cvat.apps.engine.log import DatasetLogManager -from cvat.apps.engine.rq_job_handler import RQJobMetaField from cvat.apps.engine.serializers import DataSerializer, TaskWriteSerializer from cvat.apps.engine.task import _create_thread as create_task @@ -197,9 +196,11 @@ def data(self) -> dict: @transaction.atomic def import_dataset_as_project(src_file, project_id, format_name, conv_mask_to_poly): rq_job = rq.get_current_job() - rq_job.meta[RQJobMetaField.STATUS] = 'Dataset import has been started...' - rq_job.meta[RQJobMetaField.PROGRESS] = 0. - rq_job.save_meta() + from cvat.apps.engine.rq_job_handler import RQMeta + rq_job_meta = RQMeta.from_job(rq_job) + rq_job_meta.status = 'Dataset import has been started...' + rq_job_meta.progress = 0. + rq_job_meta.save() project = ProjectAnnotationAndData(project_id) project.init_from_db() diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py index 351bd4c76491..3d8567f3c500 100644 --- a/cvat/apps/dataset_manager/views.py +++ b/cvat/apps/dataset_manager/views.py @@ -22,6 +22,7 @@ from cvat.apps.engine.models import Job, Project, Task from cvat.apps.engine.rq_job_handler import RQMeta from cvat.apps.engine.utils import get_rq_lock_by_user +from cvat.apps.engine.rq_job_handler import RQMeta from .formats.registry import EXPORT_FORMATS, IMPORT_FORMATS from .util import ( @@ -88,7 +89,8 @@ def _patched_retry(*_1, **_2): settings.CVAT_QUEUES.EXPORT_DATA.value ) - user_id = current_rq_job.meta.get('user', {}).get('id') or -1 + rq_job_meta = RQMeta.from_job(current_rq_job) + user_id = rq_job_meta.user.id or -1 with get_rq_lock_by_user(settings.CVAT_QUEUES.EXPORT_DATA.value, user_id): scheduled_rq_job: rq.job.Job = scheduler.enqueue_in( @@ -97,7 +99,7 @@ def _patched_retry(*_1, **_2): *current_rq_job.args, **current_rq_job.kwargs, job_id=current_rq_job.id, - meta=RQMeta.reset_meta_on_retry(current_rq_job.meta), + meta=rq_job_meta.reset_meta_on_retry(), job_ttl=current_rq_job.ttl, job_result_ttl=current_rq_job.result_ttl, job_description=current_rq_job.description, diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index f2b5d0e89b6d..180bd8ae0fe1 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -7,7 +7,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from datetime import datetime -from typing import Any, Callable, Optional, Union +from typing import Any, Callable, ClassVar, Optional, Union import django_rq from attrs.converters import to_bool @@ -23,7 +23,8 @@ from rq.job import JobStatus as RQJobStatus import cvat.apps.dataset_manager as dm -from cvat.apps.dataset_manager.util import extend_export_file_lifetime +from cvat.apps.dataset_manager.util import get_export_cache_lock +from cvat.apps.dataset_manager.views import get_export_cache_ttl from cvat.apps.engine import models from cvat.apps.engine.backup import ProjectExporter, TaskExporter, create_backup from cvat.apps.engine.cloud_provider import export_resource_to_cloud_storage @@ -38,13 +39,12 @@ Task, ) from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export -from cvat.apps.engine.rq_job_handler import RQId, RQJobMetaField +from cvat.apps.engine.rq_job_handler import RQId, RQMeta from cvat.apps.engine.serializers import RqIdSerializer from cvat.apps.engine.utils import ( build_annotations_file_name, build_backup_file_name, define_dependent_job, - get_rq_job_meta, get_rq_lock_by_user, get_rq_lock_for_job, sendfile, @@ -59,43 +59,63 @@ LOCK_ACQUIRE_TIMEOUT = LOCK_TTL - 5 -class _ResourceExportManager(ABC): +class ResourceExportManager(ABC): QUEUE_NAME = settings.CVAT_QUEUES.EXPORT_DATA.value + SUPPORTED_RESOURCES: ClassVar[set[RequestSubresource]] + SUPPORTEd_SUBRESOURCES: ClassVar[set[RequestSubresource]] def __init__( self, - version: int, db_instance: Union[models.Project, models.Task, models.Job], - *, - export_callback: Callable, + request: Request, ) -> None: """ Args: - version (int): API endpoint version to use. Possible options: 1 or 2 db_instance (Union[models.Project, models.Task, models.Job]): Model instance export_callback (Callable): Main function that will be executed in the background """ - self.version = version self.db_instance = db_instance + self.request = request self.resource = db_instance.__class__.__name__.lower() if self.resource not in self.SUPPORTED_RESOURCES: raise ValueError("Unexpected type of db_instance: {}".format(type(db_instance))) + def initialize_export_args(self, *, export_callback: Callable[..., str]) -> None: self.export_callback = export_callback @abstractmethod - def export(self) -> Response: + def validate_export_args(self) -> Response | None: pass - @abstractmethod - def setup_background_job(self, queue: DjangoRQ, rq_id: str) -> None: - pass + def export(self) -> Response: + assert hasattr(self, "export_callback") + assert hasattr(self, "export_args") + + if invalid_response := self.validate_export_args(): + return invalid_response + + queue: DjangoRQ = django_rq.get_queue(self.QUEUE_NAME) + rq_id = self.build_rq_id() + + # ensure that there is no race condition when processing parallel requests + with get_rq_lock_for_job(queue, rq_id): + rq_job = queue.fetch_job(rq_id) + if response := self.handle_rq_job(rq_job, queue): + return response + self.setup_background_job(queue, rq_id) + + self.send_events() + + serializer = RqIdSerializer(data={"rq_id": rq_id}) + serializer.is_valid(raise_exception=True) + + return Response(serializer.data, status=status.HTTP_202_ACCEPTED) @abstractmethod - def _handle_rq_job_v1(self, rq_job: Optional[RQJob], queue: DjangoRQ) -> Optional[Response]: + def setup_background_job(self, queue: DjangoRQ, rq_id: str) -> None: pass - def _handle_rq_job_v2(self, rq_job: Optional[RQJob], queue: DjangoRQ) -> Optional[Response]: + def handle_rq_job(self, rq_job: Optional[RQJob], queue: DjangoRQ) -> Optional[Response]: if not rq_job: return None @@ -119,27 +139,16 @@ def _handle_rq_job_v2(self, rq_job: Optional[RQJob], queue: DjangoRQ) -> Optiona rq_job.delete() return None - def handle_rq_job(self, rq_job: RQJob | None, queue: DjangoRQ) -> Optional[Response]: - if self.version == 1: - return self._handle_rq_job_v1(rq_job, queue) - elif self.version == 2: - return self._handle_rq_job_v2(rq_job, queue) - - raise ValueError("Unsupported version") - @abstractmethod - def get_v1_endpoint_view_name(self) -> str: - pass + def get_download_api_endpoint_view_name(self) -> str: ... - def make_result_url(self) -> str: - view_name = self.get_v1_endpoint_view_name() + def make_result_url(self, *, rq_id: str) -> str: + view_name = self.get_download_api_endpoint_view_name() result_url = reverse(view_name, args=[self.db_instance.pk], request=self.request) - query_dict = self.request.query_params.copy() - query_dict["action"] = "download" - result_url += "?" + query_dict.urlencode() - return result_url + return result_url + f"?rq_id={rq_id}" + # TODO: move method to the model class (or remove it and use just instance.updated_date) def get_instance_update_time(self) -> datetime: instance_update_time = timezone.localtime(self.db_instance.updated_date) if isinstance(self.db_instance, Project): @@ -152,9 +161,84 @@ def get_instance_update_time(self) -> datetime: instance_update_time = max(tasks_update + [instance_update_time]) return instance_update_time + # TODO: move into a model class def get_timestamp(self, time_: datetime) -> str: return datetime.strftime(time_, "%Y_%m_%d_%H_%M_%S") + # TODO: drop ext support + @abstractmethod + def get_result_filename_and_ext(self) -> tuple[str, str | None]: ... + + def validate_rq_id(self, *, rq_id: str | None) -> HttpResponseBadRequest | None: + if not rq_id: + return HttpResponseBadRequest("Missing request id in query parameters") + + parsed_rq_id = RQId.parse(rq_id) + assert parsed_rq_id.action == RequestAction.EXPORT + assert parsed_rq_id.target == RequestTarget(self.resource) + assert parsed_rq_id.identifier == self.db_instance.pk + assert parsed_rq_id.subresource in self.SUPPORTEd_SUBRESOURCES + + @abstractmethod + def build_rq_id(self) -> str: ... + + @abstractmethod + def send_events(self) -> None: ... + + def download_file(self) -> Response: + queue: DjangoRQ = django_rq.get_queue(self.QUEUE_NAME) + rq_id = self.request.query_params.get("rq_id") + + if invalid_response := self.validate_rq_id(rq_id=rq_id): + return invalid_response + + # ensure that there is no race condition when processing parallel requests + with get_rq_lock_for_job(queue, rq_id): + rq_job = queue.fetch_job(rq_id) + + if not rq_job: + return HttpResponseBadRequest("Unknown export request id") + + # define status once to avoid refreshing it on each check + # FUTURE-TODO: get_status will raise InvalidJobOperation exception instead of returning None in one of the next releases + rq_job_status = rq_job.get_status(refresh=False) + + # handle cases where the status is None for some reason + if rq_job_status != RQJobStatus.FINISHED: + return Response(status=status.HTTP_204_NO_CONTENT) + + rq_job_meta = RQMeta.from_job(rq_job) + file_path = rq_job.return_value() + + if not file_path: + return ( + Response( + "A result for exporting job was not found for finished RQ job", + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + if rq_job_meta.get_export_result_url() + else Response(status=status.HTTP_204_NO_CONTENT) + ) + + with get_export_cache_lock( + file_path, ttl=LOCK_TTL, acquire_timeout=LOCK_ACQUIRE_TIMEOUT + ): + if not osp.exists(file_path): + return Response( + "The exported file has expired, please retry exporting", + status=status.HTTP_404_NOT_FOUND, + ) + + # TODO: write redis migration + filename = rq_job_meta.result.filename + osp.splitext(file_path)[1] + + return sendfile( + self.request, + file_path, + attachment=True, + attachment_filename=filename, + ) + def cancel_and_delete(rq_job: RQJob) -> None: # In the case the server is configured with ONE_RUNNING_JOB_IN_QUEUE_PER_USER @@ -163,8 +247,9 @@ def cancel_and_delete(rq_job: RQJob) -> None: rq_job.delete() -class DatasetExportManager(_ResourceExportManager): - SUPPORTED_RESOURCES = {"project", "task", "job"} +class DatasetExportManager(ResourceExportManager): + SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK, RequestTarget.JOB} + SUPPORTEd_SUBRESOURCES = {RequestSubresource.DATASET, RequestSubresource.ANNOTATIONS} @dataclass class ExportArgs: @@ -177,32 +262,27 @@ class ExportArgs: def location(self) -> Location: return self.location_config["location"] - def __init__( + def initialize_export_args( self, - db_instance: Union[models.Project, models.Task, models.Job], - request: Request, - export_callback: Callable, - save_images: Optional[bool] = None, *, - version: int = 2, + export_callback: Callable | None = None, + save_images: bool | None = None, ) -> None: - super().__init__(version, db_instance, export_callback=export_callback) - self.request = request - - format_name = request.query_params.get("format", "") - filename = request.query_params.get("filename", "") + super().initialize_export_args(export_callback=export_callback) + format_name = self.request.query_params.get("format", "") + filename = self.request.query_params.get("filename", "") # can be passed directly when it is initialized based on API request, not query param save_images = ( save_images if save_images is not None - else to_bool(request.query_params.get("save_images", False)) + else to_bool(self.request.query_params.get("save_images", False)) ) try: location_config = get_location_configuration( - db_instance=db_instance, - query_params=request.query_params, + db_instance=self.db_instance, + query_params=self.request.query_params, field_name=StorageType.TARGET, ) except ValueError as ex: @@ -222,162 +302,7 @@ def __init__( location_config=location_config, ) - def _handle_rq_job_v1( - self, - rq_job: Optional[RQJob], - queue: DjangoRQ, - ) -> Optional[Response]: - - def is_result_outdated() -> bool: - return rq_job.meta[RQJobMetaField.REQUEST]["timestamp"] < instance_update_time - - def handle_local_download() -> Response: - with dm.util.get_export_cache_lock( - file_path, ttl=LOCK_TTL, acquire_timeout=LOCK_ACQUIRE_TIMEOUT - ): - if not osp.exists(file_path): - return Response( - "The exported file has expired, please retry exporting", - status=status.HTTP_404_NOT_FOUND, - ) - - filename = self.export_args.filename or build_annotations_file_name( - class_name=self.resource, - identifier=( - self.db_instance.name - if isinstance(self.db_instance, (Task, Project)) - else self.db_instance.id - ), - timestamp=instance_timestamp, - format_name=self.export_args.format, - is_annotation_file=not self.export_args.save_images, - extension=osp.splitext(file_path)[1], - ) - - rq_job.delete() - return sendfile( - self.request, - file_path, - attachment=True, - attachment_filename=filename, - ) - - action = self.request.query_params.get("action") - - if action not in {None, "download"}: - raise serializers.ValidationError( - f"Unexpected action {action!r} specified for the request" - ) - - msg_no_such_job_when_downloading = ( - "Unknown export request id. " - "Please request export first by sending a request without the action=download parameter." - ) - if not rq_job: - return ( - None - if action != "download" - else HttpResponseBadRequest(msg_no_such_job_when_downloading) - ) - - # define status once to avoid refreshing it on each check - # FUTURE-TODO: get_status will raise InvalidJobOperation exception instead of returning None in one of the next releases - rq_job_status = rq_job.get_status(refresh=False) - - # handle cases where the status is None for some reason - if not rq_job_status: - rq_job.delete() - return ( - None - if action != "download" - else HttpResponseBadRequest(msg_no_such_job_when_downloading) - ) - - if action == "download": - if self.export_args.location != Location.LOCAL: - return HttpResponseBadRequest( - 'Action "download" is only supported for a local dataset location' - ) - if rq_job_status not in { - RQJobStatus.FINISHED, - RQJobStatus.FAILED, - RQJobStatus.CANCELED, - RQJobStatus.STOPPED, - }: - return HttpResponseBadRequest("Dataset export has not been finished yet") - - instance_update_time = self.get_instance_update_time() - instance_timestamp = self.get_timestamp(instance_update_time) - - if rq_job_status == RQJobStatus.FINISHED: - if self.export_args.location == Location.CLOUD_STORAGE: - rq_job.delete() - return Response(status=status.HTTP_200_OK) - elif self.export_args.location == Location.LOCAL: - file_path = rq_job.return_value() - - if not file_path: - return Response( - "A result for exporting job was not found for finished RQ job", - status=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - - if action == "download": - return handle_local_download() - else: - with dm.util.get_export_cache_lock( - file_path, - ttl=LOCK_TTL, - acquire_timeout=LOCK_ACQUIRE_TIMEOUT, - ): - if osp.exists(file_path) and not is_result_outdated(): - extend_export_file_lifetime(file_path) - - return Response(status=status.HTTP_201_CREATED) - - cancel_and_delete(rq_job) - return None - else: - raise NotImplementedError( - f"Export to {self.export_args.location} location is not implemented yet" - ) - elif rq_job_status == RQJobStatus.FAILED: - exc_info = rq_job.meta.get(RQJobMetaField.FORMATTED_EXCEPTION, str(rq_job.exc_info)) - rq_job.delete() - return Response(exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - elif ( - rq_job_status == RQJobStatus.DEFERRED - and rq_job.id not in queue.deferred_job_registry.get_job_ids() - ): - # Sometimes jobs can depend on outdated jobs in the deferred jobs registry. - # They can be fetched by their specific ids, but are not listed by get_job_ids(). - # Supposedly, this can happen because of the server restarts - # (potentially, because the redis used for the queue is in memory). - # Another potential reason is canceling without enqueueing dependents. - # Such dependencies are never removed or finished, - # as there is no TTL for deferred jobs, - # so the current job can be blocked indefinitely. - cancel_and_delete(rq_job) - return None - - elif rq_job_status in {RQJobStatus.CANCELED, RQJobStatus.STOPPED}: - rq_job.delete() - return ( - None - if action != "download" - else Response( - "Export was cancelled, please request it one more time", - status=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - ) - - if is_result_outdated(): - cancel_and_delete(rq_job) - return None - - return Response(RqIdSerializer({"rq_id": rq_job.id}).data, status=status.HTTP_202_ACCEPTED) - - def export(self) -> Response: + def validate_export_args(self): format_desc = {f.DISPLAY_NAME: f for f in dm.views.get_export_formats()}.get( self.export_args.format ) @@ -386,8 +311,8 @@ def export(self) -> Response: elif not format_desc.ENABLED: return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED) - queue: DjangoRQ = django_rq.get_queue(self.QUEUE_NAME) - rq_id = RQId( + def build_rq_id(self): + return RQId( RequestAction.EXPORT, RequestTarget(self.resource), self.db_instance.pk, @@ -400,13 +325,7 @@ def export(self) -> Response: user_id=self.request.user.id, ).render() - # ensure that there is no race condition when processing parallel requests - with get_rq_lock_for_job(queue, rq_id): - rq_job = queue.fetch_job(rq_id) - if response := self.handle_rq_job(rq_job, queue): - return response - self.setup_background_job(queue, rq_id) - + def send_events(self): handle_dataset_export( self.db_instance, format_name=self.export_args.format, @@ -414,11 +333,6 @@ def export(self) -> Response: save_images=self.export_args.save_images, ) - serializer = RqIdSerializer(data={"rq_id": rq_id}) - serializer.is_valid(raise_exception=True) - - return Response(serializer.data, status=status.HTTP_202_ACCEPTED) - def setup_background_job( self, queue: DjangoRQ, @@ -431,7 +345,7 @@ def setup_background_job( except Exception: server_address = None - cache_ttl = dm.views.get_export_cache_ttl(self.db_instance) + cache_ttl = get_export_cache_ttl(self.db_instance) user_id = self.request.user.id @@ -453,8 +367,11 @@ def setup_background_job( request=self.request, is_default=self.export_args.location_config["is_default"], ) + ###----------------------------------------### instance_update_time = self.get_instance_update_time() instance_timestamp = self.get_timestamp(instance_update_time) + # todo: think how improve it + # TODO: check that there is no filename.zip.zip in case when filename is specified filename_pattern = build_annotations_file_name( class_name=self.db_instance.__class__.__name__, identifier=( @@ -466,6 +383,7 @@ def setup_background_job( format_name=self.export_args.format, is_annotation_file=not self.export_args.save_images, ) + ###----------------------------------------### func = export_resource_to_cloud_storage func_args = ( db_storage, @@ -475,9 +393,12 @@ def setup_background_job( ) + func_args else: db_storage = None - result_url = self.make_result_url() + result_url = self.make_result_url(rq_id=rq_id) with get_rq_lock_by_user(queue, user_id): + result_filename, result_ext = self.get_result_filename_and_ext() + meta = RQMeta.build_base(request=self.request, db_obj=self.db_instance) + RQMeta.update_result_info(meta, result_url=result_url, result_filename=result_filename, result_file_ext=result_ext) queue.enqueue_call( func=func, args=func_args, @@ -485,33 +406,37 @@ def setup_background_job( "server_url": server_address, }, job_id=rq_id, - meta=get_rq_job_meta( - request=self.request, db_obj=self.db_instance, result_url=result_url - ), + meta=meta, depends_on=define_dependent_job(queue, user_id, rq_id=rq_id), result_ttl=cache_ttl.total_seconds(), failure_ttl=cache_ttl.total_seconds(), ) - def get_v1_endpoint_view_name(self) -> str: - """ - Get view name of the endpoint for the first API version + def get_result_filename_and_ext(self) -> tuple[str, str | None]: + filename = self.export_args.filename - Possible view names: - - project-dataset - - task|job-dataset-export - - project|task|job-annotations - """ - if self.export_args.save_images: - template = "{}-dataset" + ("-export" if self.resource != "project" else "") - else: - template = "{}-annotations" + if filename: + return osp.splitext(filename) + + instance_update_time = self.get_instance_update_time() + instance_timestamp = self.get_timestamp(instance_update_time) + filename = build_annotations_file_name( + class_name=self.resource, + identifier=self.db_instance.id, + timestamp=instance_timestamp, + format_name=self.export_args.format, + is_annotation_file=not self.export_args.save_images, + ) + + return filename, None - return template.format(self.resource) + def get_download_api_endpoint_view_name(self) -> str: + return f"{self.resource}-download-dataset" -class BackupExportManager(_ResourceExportManager): - SUPPORTED_RESOURCES = {"project", "task"} +class BackupExportManager(ResourceExportManager): + SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK} + SUPPORTEd_SUBRESOURCES = {RequestSubresource.BACKUP} @dataclass class ExportArgs: @@ -522,17 +447,9 @@ class ExportArgs: def location(self) -> Location: return self.location_config["location"] - def __init__( - self, - db_instance: Union[models.Project, models.Task], - request: Request, - *, - version: int = 2, - ) -> None: - super().__init__(version, db_instance, export_callback=create_backup) - self.request = request - - filename = request.query_params.get("filename", "") + def initialize_export_args(self) -> None: + super().initialize_export_args(export_callback=create_backup) + filename = self.request.query_params.get("filename", "") location_config = get_location_configuration( db_instance=self.db_instance, @@ -541,143 +458,28 @@ def __init__( ) self.export_args = self.ExportArgs(filename, location_config) - def _handle_rq_job_v1( - self, - rq_job: Optional[RQJob], - queue: DjangoRQ, - ) -> Optional[Response]: + def validate_export_args(self): + return - def is_result_outdated() -> bool: - return rq_job.meta[RQJobMetaField.REQUEST]["timestamp"] < last_instance_update_time + def get_result_filename_and_ext(self) -> tuple[str, str | None]: + filename = self.export_args.filename - last_instance_update_time = timezone.localtime(self.db_instance.updated_date) - timestamp = self.get_timestamp(last_instance_update_time) + if filename: + return osp.splitext(filename) - action = self.request.query_params.get("action") - if action not in (None, "download"): - raise serializers.ValidationError( - f"Unexpected action {action!r} specified for the request" - ) + instance_update_time = self.get_instance_update_time() + instance_timestamp = self.get_timestamp(instance_update_time) - msg_no_such_job_when_downloading = ( - "Unknown export request id. " - "Please request export first by sending a request without the action=download parameter." + filename = build_backup_file_name( + class_name=self.resource, + identifier=self.db_instance.name, + timestamp=instance_timestamp, ) - if not rq_job: - return ( - None - if action != "download" - else HttpResponseBadRequest(msg_no_such_job_when_downloading) - ) - - # define status once to avoid refreshing it on each check - # FUTURE-TODO: get_status will raise InvalidJobOperation exception instead of None in one of the next releases - rq_job_status = rq_job.get_status(refresh=False) - # handle cases where the status is None for some reason - if not rq_job_status: - rq_job.delete() - return ( - None - if action != "download" - else HttpResponseBadRequest(msg_no_such_job_when_downloading) - ) - - if action == "download": - if self.export_args.location != Location.LOCAL: - return HttpResponseBadRequest( - 'Action "download" is only supported for a local backup location' - ) - if rq_job_status not in { - RQJobStatus.FINISHED, - RQJobStatus.FAILED, - RQJobStatus.CANCELED, - RQJobStatus.STOPPED, - }: - return HttpResponseBadRequest("Backup export has not been finished yet") - - if rq_job_status == RQJobStatus.FINISHED: - if self.export_args.location == Location.CLOUD_STORAGE: - rq_job.delete() - return Response(status=status.HTTP_200_OK) - elif self.export_args.location == Location.LOCAL: - file_path = rq_job.return_value() - - if not file_path: - return Response( - "Export is completed, but has no results", - status=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) + return filename, None - if action == "download": - with dm.util.get_export_cache_lock( - file_path, ttl=LOCK_TTL, acquire_timeout=LOCK_ACQUIRE_TIMEOUT - ): - if not os.path.exists(file_path): - return Response( - "The backup file has been expired, please retry backing up", - status=status.HTTP_404_NOT_FOUND, - ) - - filename = self.export_args.filename or build_backup_file_name( - class_name=self.resource, - identifier=self.db_instance.name, - timestamp=timestamp, - extension=os.path.splitext(file_path)[1], - ) - - rq_job.delete() - return sendfile( - self.request, file_path, attachment=True, attachment_filename=filename - ) - with dm.util.get_export_cache_lock( - file_path, ttl=LOCK_TTL, acquire_timeout=LOCK_ACQUIRE_TIMEOUT - ): - if osp.exists(file_path) and not is_result_outdated(): - extend_export_file_lifetime(file_path) - return Response(status=status.HTTP_201_CREATED) - - cancel_and_delete(rq_job) - return None - else: - raise NotImplementedError( - f"Export to {self.export_args.location} location is not implemented yet" - ) - elif rq_job_status == RQJobStatus.FAILED: - exc_info = rq_job.meta.get(RQJobMetaField.FORMATTED_EXCEPTION, str(rq_job.exc_info)) - rq_job.delete() - return Response(exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - elif ( - rq_job_status == RQJobStatus.DEFERRED - and rq_job.id not in queue.deferred_job_registry.get_job_ids() - ): - # Sometimes jobs can depend on outdated jobs in the deferred jobs registry. - # They can be fetched by their specific ids, but are not listed by get_job_ids(). - # Supposedly, this can happen because of the server restarts - # (potentially, because the redis used for the queue is in memory). - # Another potential reason is canceling without enqueueing dependents. - # Such dependencies are never removed or finished, - # as there is no TTL for deferred jobs, - # so the current job can be blocked indefinitely. - cancel_and_delete(rq_job) - return None - - elif rq_job_status in {RQJobStatus.CANCELED, RQJobStatus.STOPPED}: - rq_job.delete() - return ( - None - if action != "download" - else Response( - "Export was cancelled, please request it one more time", - status=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - ) - - return Response(RqIdSerializer({"rq_id": rq_job.id}).data, status=status.HTTP_202_ACCEPTED) - - def export(self) -> Response: - queue: DjangoRQ = django_rq.get_queue(self.QUEUE_NAME) - rq_id = RQId( + def build_rq_id(self): + return RQId( RequestAction.EXPORT, RequestTarget(self.resource), self.db_instance.pk, @@ -685,18 +487,6 @@ def export(self) -> Response: user_id=self.request.user.id, ).render() - # ensure that there is no race condition when processing parallel requests - with get_rq_lock_for_job(queue, rq_id): - rq_job = queue.fetch_job(rq_id) - if response := self.handle_rq_job(rq_job, queue): - return response - self.setup_background_job(queue, rq_id) - - serializer = RqIdSerializer(data={"rq_id": rq_id}) - serializer.is_valid(raise_exception=True) - - return Response(serializer.data, status=status.HTTP_202_ACCEPTED) - def setup_background_job( self, queue: DjangoRQ, @@ -751,24 +541,27 @@ def setup_background_job( self.export_callback, ) + func_args else: - result_url = self.make_result_url() + result_url = self.make_result_url(rq_id=rq_id) user_id = self.request.user.id with get_rq_lock_by_user(queue, user_id): + result_filename, result_ext = self.get_result_filename_and_ext() + meta = RQMeta.build_base(request=self.request, db_obj=self.db_instance) + RQMeta.update_result_info(meta, result_url=result_url, result_filename=result_filename, result_file_ext=result_ext) + queue.enqueue_call( func=func, args=func_args, job_id=rq_id, - meta=get_rq_job_meta( - request=self.request, db_obj=self.db_instance, result_url=result_url - ), + meta=meta, depends_on=define_dependent_job(queue, user_id, rq_id=rq_id), result_ttl=cache_ttl.total_seconds(), failure_ttl=cache_ttl.total_seconds(), ) - def get_v1_endpoint_view_name(self) -> str: - """Get view name of the endpoint for the first API version""" + def get_download_api_endpoint_view_name(self) -> str: + return f"{self.resource}-download-backup" - return f"{self.resource}-export-backup" + def send_events(self): + pass diff --git a/cvat/apps/engine/backup.py b/cvat/apps/engine/backup.py index 3d9e420c6a03..e2fcc219be30 100644 --- a/cvat/apps/engine/backup.py +++ b/cvat/apps/engine/backup.py @@ -17,6 +17,9 @@ from tempfile import NamedTemporaryFile from typing import Any, ClassVar, Optional, Type, Union from zipfile import ZipFile +from cvat.apps.engine.middleware import PatchedRequest +from rq.job import Job as RQJob +from cvat.apps.engine.rq_job_handler import RQMeta import django_rq from django.conf import settings @@ -59,7 +62,7 @@ StorageMethodChoice, ) from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export -from cvat.apps.engine.rq_job_handler import RQId, RQJobMetaField +from cvat.apps.engine.rq_job_handler import RQId from cvat.apps.engine.serializers import ( AnnotationGuideWriteSerializer, AssetWriteSerializer, @@ -1131,11 +1134,10 @@ def create_backup( log_exception(logger) raise -def _import(importer, request, queue, rq_id, Serializer, file_field_name, location_conf, filename=None): - rq_job = queue.fetch_job(rq_id) - if (user_id_from_meta := getattr(rq_job, 'meta', {}).get(RQJobMetaField.USER, {}).get('id')) and user_id_from_meta != request.user.id: - return Response(status=status.HTTP_403_FORBIDDEN) + +def _import(importer, request: PatchedRequest, queue, rq_id, Serializer, file_field_name, location_conf, filename=None): + rq_job: RQJob = queue.fetch_job(rq_id) if not rq_job: org_id = getattr(request.iam_context['organization'], 'id', None) @@ -1185,6 +1187,7 @@ def _import(importer, request, queue, rq_id, Serializer, file_field_name, locati func=func, args=func_args, job_id=rq_id, + # TODO: meta={ 'tmp_file': filename, **get_rq_job_meta(request=request, db_obj=None) @@ -1194,6 +1197,10 @@ def _import(importer, request, queue, rq_id, Serializer, file_field_name, locati failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds() ) else: + rq_job_meta = RQMeta.from_job(rq_job) + if rq_job_meta.user.id != request.user.id: + return Response(status=status.HTTP_403_FORBIDDEN) + if rq_job.is_finished: project_id = rq_job.return_value() rq_job.delete() diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index ffe8fe0cb920..ca825383d3d0 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -53,7 +53,7 @@ ZipCompressedChunkWriter, load_image, ) -from cvat.apps.engine.rq_job_handler import RQJobMetaField +from cvat.apps.engine.rq_job_handler import RQMeta from cvat.apps.engine.utils import ( CvatChunkTimestampMismatchError, format_list, @@ -107,9 +107,10 @@ def wait_for_rq_job(rq_job: rq.job.Job): if job_status in ("finished",): return elif job_status in ("failed",): - job_meta = rq_job.get_meta() - exc_type = job_meta.get(RQJobMetaField.EXCEPTION_TYPE, Exception) - exc_args = job_meta.get(RQJobMetaField.EXCEPTION_ARGS, ("Cannot create chunk",)) + rq_job.get_meta() # refresh from Redis + job_meta = RQMeta.from_job(rq_job) + exc_type = job_meta.exc_type or Exception + exc_args = job_meta.exc_args or ("Cannot create chunk",) raise exc_type(*exc_args) time.sleep(settings.CVAT_CHUNK_CREATE_CHECK_INTERVAL) diff --git a/cvat/apps/engine/middleware.py b/cvat/apps/engine/middleware.py index 2e8f116f4ecd..3c46dd958941 100644 --- a/cvat/apps/engine/middleware.py +++ b/cvat/apps/engine/middleware.py @@ -4,6 +4,14 @@ from uuid import uuid4 +from typing import Protocol +from rest_framework.request import Request + +class WithUUID(Protocol): + uuid: str + +class PatchedRequest(Request, WithUUID): + pass class RequestTrackingMiddleware: def __init__(self, get_response): diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 9e69ffdd5ccb..2ba6b2993a6c 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -13,14 +13,13 @@ from pathlib import Path from tempfile import NamedTemporaryFile from textwrap import dedent -from typing import Any, Callable, Optional +from typing import Callable from unittest import mock from urllib.parse import urljoin import django_rq from attr.converters import to_bool from django.conf import settings -from django.http import HttpRequest from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema from rest_framework import mixins, status @@ -28,6 +27,7 @@ from rest_framework.decorators import action from rest_framework.response import Response from rest_framework.views import APIView +from cvat.apps.engine.middleware import PatchedRequest from cvat.apps.engine.background import BackupExportManager, DatasetExportManager from cvat.apps.engine.handlers import clear_import_cache @@ -416,27 +416,7 @@ def partial_update(self, request, *args, **kwargs): with mock.patch.object(self, 'update', new=self._update, create=True): return mixins.UpdateModelMixin.partial_update(self, request=request, *args, **kwargs) - class DatasetMixin: - def export_dataset_v1( - self, - request, - save_images: bool, - *, - get_data: Optional[Callable[[int], dict[str, Any]]] = None, - ) -> Response: - if request.query_params.get("format"): - callback = self.get_export_callback(save_images) - - dataset_export_manager = DatasetExportManager(self._object, request, callback, save_images=save_images, version=1) - return dataset_export_manager.export() - - if not get_data: - return Response("Format is not specified", status=status.HTTP_400_BAD_REQUEST) - - data = get_data(self._object.pk) - return Response(data) - @extend_schema( summary='Initialize process to export resource as a dataset in a specific format', description=dedent("""\ @@ -466,14 +446,33 @@ def export_dataset_v1( }, ) @action(detail=True, methods=['POST'], serializer_class=None, url_path='dataset/export') - def export_dataset_v2(self, request: HttpRequest, pk: int): + def initialize_dataset_export(self, request: PatchedRequest, pk: int): self._object = self.get_object() # force call of check_object_permissions() save_images = is_dataset_export(request) callback = self.get_export_callback(save_images) - dataset_export_manager = DatasetExportManager(self._object, request, callback, save_images=save_images, version=2) - return dataset_export_manager.export() + export_manager = DatasetExportManager(self._object, request) + export_manager.initialize_export_args(export_callback=callback, save_images=save_images) + + return export_manager.export() + + @extend_schema(summary='Download a prepared dataset file', + parameters=[ + OpenApiParameter('rq_id', description='Request ID', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=True), + ], + responses={ + '200': OpenApiResponse(description='Download of file started'), + '204': OpenApiResponse(description='No prepared dataset file related with provider request ID'), + }, + exclude=True, # private API endpoint that should be used only as result_url + ) + @action(methods=['GET'], detail=True, url_path='dataset/download') + def download_dataset(self, request: PatchedRequest, pk: int): + obj = self.get_object() # force to call check_object_permissions + export_manager = DatasetExportManager(obj, request) + return export_manager.download_file() # FUTURE-TODO: migrate to new API def import_annotations(self, request, db_obj, import_func, rq_func, rq_id_factory): @@ -508,19 +507,8 @@ def import_annotations(self, request, db_obj, import_func, rq_func, rq_id_factor class BackupMixin: - def export_backup_v1(self, request: HttpRequest) -> Response: - db_object = self.get_object() # force to call check_object_permissions - - export_backup_manager = BackupExportManager(db_object, request, version=1) - response = export_backup_manager.export() - - if request.query_params.get('action') != 'download': - response.headers['Deprecated'] = True - - return response - # FUTURE-TODO: migrate to new API - def import_backup_v1(self, request: HttpRequest, import_func: Callable) -> Response: + def import_backup_v1(self, request: PatchedRequest, import_func: Callable) -> Response: location = request.query_params.get("location", Location.LOCAL) if location == Location.CLOUD_STORAGE: file_name = request.query_params.get("filename", "") @@ -554,11 +542,29 @@ def import_backup_v1(self, request: HttpRequest, import_func: Callable) -> Respo }, ) @action(detail=True, methods=['POST'], serializer_class=None, url_path='backup/export') - def export_backup_v2(self, request: HttpRequest, pk: int): + def initialize_backup_export(self, request: PatchedRequest, pk: int): db_object = self.get_object() # force to call check_object_permissions + export_manager = BackupExportManager(db_object, request) + export_manager.initialize_export_args() + return export_manager.export() - export_backup_manager = BackupExportManager(db_object, request, version=2) - return export_backup_manager.export() + + @extend_schema(summary='Download a prepared backup file', + parameters=[ + OpenApiParameter('rq_id', description='Request ID', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=True), + ], + responses={ + '200': OpenApiResponse(description='Download of file started'), + '204': OpenApiResponse(description='No prepared backup file related with provider request ID'), + }, + exclude=True, # private API endpoint that should be used only as result_url + ) + @action(methods=['GET'], detail=True, url_path='backup/download') + def download_backup(self, request: PatchedRequest, pk: int): + obj = self.get_object() # force to call check_object_permissions + export_manager = BackupExportManager(obj, request) + return export_manager.download_file() class CsrfWorkaroundMixin(APIView): diff --git a/cvat/apps/engine/permissions.py b/cvat/apps/engine/permissions.py index a180410142cd..dc0e403af471 100644 --- a/cvat/apps/engine/permissions.py +++ b/cvat/apps/engine/permissions.py @@ -22,7 +22,9 @@ ) from cvat.apps.organizations.models import Organization -from .models import AnnotationGuide, CloudStorage, Issue, Job, Label, Project, Task +from .models import AnnotationGuide, CloudStorage, Issue, Job, Label, Project, Task, RequestAction, RequestTarget, RequestSubresource +from cvat.apps.engine.rq_job_handler import RQId +from cvat.apps.engine.middleware import PatchedRequest def _get_key(d: dict[str, Any], key_path: Union[str, Sequence[str]]) -> Optional[Any]: @@ -221,9 +223,10 @@ class Scopes(StrEnum): EXPORT_DATASET = 'export:dataset' EXPORT_BACKUP = 'export:backup' IMPORT_BACKUP = 'import:backup' + DOWNLOAD_EXPORTED_FILE = 'retrieve:exported_file' @classmethod - def create(cls, request, view, obj, iam_context): + def create(cls, request: PatchedRequest, view, obj, iam_context): permissions = [] if view.basename == 'project': assignee_id = request.data.get('assignee_id') or request.data.get('assignee') @@ -232,6 +235,21 @@ def create(cls, request, view, obj, iam_context): assignee_id=assignee_id) permissions.append(self) + + if scope == cls.Scopes.DOWNLOAD_EXPORTED_FILE: + # check that a user still has rights to export project dataset|backup + rq_id = request.query_params.get('rq_id') + assert rq_id + parsed_rq_id = RQId.parse(rq_id) + if ( + # TODO: move these checks to view class + parsed_rq_id.action != RequestAction.EXPORT + or parsed_rq_id.target != RequestTarget.PROJECT + or parsed_rq_id.identifier != obj.id + or parsed_rq_id.user_id != iam_context.get('user_id', request.user.id) + ): + raise PermissionDenied('You don\'t have permission to perform this action') + if view.action == 'tasks': perm = TaskPermission.create_scope_list(request, iam_context) permissions.append(perm) @@ -277,15 +295,19 @@ def get_scopes(request, view, obj): ('dataset', 'POST'): Scopes.IMPORT_DATASET, ('append_dataset_chunk', 'HEAD'): Scopes.IMPORT_DATASET, ('append_dataset_chunk', 'PATCH'): Scopes.IMPORT_DATASET, - ('annotations', 'GET'): Scopes.EXPORT_ANNOTATIONS, - ('dataset', 'GET'): Scopes.IMPORT_DATASET if request.query_params.get('action') == 'import_status' else Scopes.EXPORT_DATASET, - ('export_dataset_v2', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, - ('export_backup', 'GET'): Scopes.EXPORT_BACKUP, - ('export_backup_v2', 'POST'): Scopes.EXPORT_BACKUP, + ('initialize_dataset_export', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, + ('initialize_backup_export', 'POST'): Scopes.EXPORT_BACKUP, ('import_backup', 'POST'): Scopes.IMPORT_BACKUP, ('append_backup_chunk', 'PATCH'): Scopes.IMPORT_BACKUP, ('append_backup_chunk', 'HEAD'): Scopes.IMPORT_BACKUP, ('preview', 'GET'): Scopes.VIEW, + ('download_dataset', 'GET'): Scopes.DOWNLOAD_EXPORTED_FILE, + ('download_backup', 'GET'): Scopes.DOWNLOAD_EXPORTED_FILE, + # FUTURE-TODO: delete this after dropping support for deprecated API + ('annotations', 'GET'): Scopes.EXPORT_ANNOTATIONS, + ('dataset', 'GET'): Scopes.IMPORT_DATASET if request.query_params.get('action') == 'import_status' else Scopes.EXPORT_DATASET, + ('export_backup', 'GET'): Scopes.EXPORT_BACKUP, + }[(view.action, request.method)] scopes = [] @@ -393,6 +415,7 @@ class Scopes(StrEnum): EXPORT_BACKUP = 'export:backup' VIEW_VALIDATION_LAYOUT = 'view:validation_layout' UPDATE_VALIDATION_LAYOUT = 'update:validation_layout' + DOWNLOAD_EXPORTED_FILE = 'retrieve:exported_file' @classmethod def create(cls, request, view, obj, iam_context): @@ -415,6 +438,20 @@ def create(cls, request, view, obj, iam_context): elif scope == __class__.Scopes.UPDATE_OWNER: params['owner_id'] = owner + elif scope == cls.Scopes.DOWNLOAD_EXPORTED_FILE: + # check that a user still has rights to export task dataset|backup + rq_id = request.query_params.get('rq_id') + assert rq_id + parsed_rq_id = RQId.parse(rq_id) + if ( + # TODO: move these checks to view class + parsed_rq_id.action != RequestAction.EXPORT + or parsed_rq_id.target != RequestTarget.TASK + or parsed_rq_id.identifier != obj.id + or parsed_rq_id.user_id != iam_context.get('user_id', request.user.id) + ): + raise PermissionDenied('You don\'t have permission to perform this action') + self = cls.create_base_perm(request, view, scope, iam_context, obj, **params) permissions.append(self) @@ -487,8 +524,7 @@ def get_scopes(request, view, obj) -> list[Scopes]: ('annotations', 'POST'): Scopes.IMPORT_ANNOTATIONS, ('append_annotations_chunk', 'PATCH'): Scopes.UPDATE_ANNOTATIONS, ('append_annotations_chunk', 'HEAD'): Scopes.UPDATE_ANNOTATIONS, - ('dataset_export', 'GET'): Scopes.EXPORT_DATASET, - ('export_dataset_v2', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, + ('initialize_dataset_export', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, ('metadata', 'GET'): Scopes.VIEW_METADATA, ('metadata', 'PATCH'): Scopes.UPDATE_METADATA, ('data', 'GET'): Scopes.VIEW_DATA, @@ -499,11 +535,15 @@ def get_scopes(request, view, obj) -> list[Scopes]: ('import_backup', 'POST'): Scopes.IMPORT_BACKUP, ('append_backup_chunk', 'PATCH'): Scopes.IMPORT_BACKUP, ('append_backup_chunk', 'HEAD'): Scopes.IMPORT_BACKUP, - ('export_backup', 'GET'): Scopes.EXPORT_BACKUP, - ('export_backup_v2', 'POST'): Scopes.EXPORT_BACKUP, + ('initialize_backup_export', 'POST'): Scopes.EXPORT_BACKUP, ('preview', 'GET'): Scopes.VIEW, ('validation_layout', 'GET'): Scopes.VIEW_VALIDATION_LAYOUT, ('validation_layout', 'PATCH'): Scopes.UPDATE_VALIDATION_LAYOUT, + ('download_dataset', 'GET'): Scopes.DOWNLOAD_EXPORTED_FILE, + ('download_backup', 'GET'): Scopes.DOWNLOAD_EXPORTED_FILE, + # FUTURE-TODO: deprecated API + ('dataset_export', 'GET'): Scopes.EXPORT_DATASET, + ('export_backup', 'GET'): Scopes.EXPORT_BACKUP, }[(view.action, request.method)] scopes = [] @@ -626,6 +666,7 @@ class Scopes(StrEnum): UPDATE_METADATA = 'update:metadata' VIEW_VALIDATION_LAYOUT = 'view:validation_layout' UPDATE_VALIDATION_LAYOUT = 'update:validation_layout' + DOWNLOAD_EXPORTED_FILE = 'retrieve:exported_file' @classmethod def create(cls, request, view, obj, iam_context): @@ -649,6 +690,20 @@ def create(cls, request, view, obj, iam_context): request, task, iam_context=iam_context )) + elif scope == cls.Scopes.DOWNLOAD_EXPORTED_FILE: + # check that a user still has rights to export task dataset|backup + rq_id = request.query_params.get('rq_id') + assert rq_id + parsed_rq_id = RQId.parse(rq_id) + if ( + # TODO: move these checks to view class + parsed_rq_id.action != RequestAction.EXPORT + or parsed_rq_id.target != RequestTarget.JOB + or parsed_rq_id.identifier != obj.id + or parsed_rq_id.user_id != iam_context.get('user_id', request.user.id) + ): + raise PermissionDenied('You don\'t have permission to perform this action') + self = cls.create_base_perm(request, view, scope, iam_context, obj, **scope_params) permissions.append(self) @@ -718,11 +773,13 @@ def get_scopes(request, view, obj): ('metadata','GET'): Scopes.VIEW_METADATA, ('metadata','PATCH'): Scopes.UPDATE_METADATA, ('issues', 'GET'): Scopes.VIEW, - ('dataset_export', 'GET'): Scopes.EXPORT_DATASET, - ('export_dataset_v2', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, + ('initialize_dataset_export', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, ('preview', 'GET'): Scopes.VIEW, ('validation_layout', 'GET'): Scopes.VIEW_VALIDATION_LAYOUT, ('validation_layout', 'PATCH'): Scopes.UPDATE_VALIDATION_LAYOUT, + ('download_dataset', 'GET'): Scopes.DOWNLOAD_EXPORTED_FILE, + # deprecated API + ('dataset_export', 'GET'): Scopes.EXPORT_DATASET, }[(view.action, request.method)] scopes = [] diff --git a/cvat/apps/engine/rq_job_handler.py b/cvat/apps/engine/rq_job_handler.py index b4f146197afc..30fcfd1b3cfb 100644 --- a/cvat/apps/engine/rq_job_handler.py +++ b/cvat/apps/engine/rq_job_handler.py @@ -11,9 +11,106 @@ from rq.job import Job as RQJob from .models import RequestAction, RequestSubresource, RequestTarget +from django.db.models import Model +from django.utils import timezone +from datetime import datetime +from collections.abc import Iterable +from cvat.apps.engine.middleware import PatchedRequest +from attrs import asdict + +str_validator = attrs.validators.instance_of(str) +int_validator = attrs.validators.instance_of(int) +optional_str_validator = attrs.validators.optional(attrs.validators.instance_of(str)) +optional_int_validator = attrs.validators.optional(attrs.validators.instance_of(int)) +optional_bool_validator = attrs.validators.optional(attrs.validators.instance_of(bool)) +optional_float_validator = attrs.validators.optional(attrs.validators.instance_of(float)) + + +def _update_value(self: RQMeta, attribute: attrs.Attribute, value: Any): + setattr(self, attribute.name, value) + self.__job.meta[attribute.name] = value + + +@attrs.frozen +class UserInfo: + id: int = attrs.field(validator=[int_validator]) + username: str = attrs.field(validator=[str_validator]) + email: str = attrs.field(validator=[str_validator]) + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + +@attrs.frozen +class RequestInfo: + uuid: str = attrs.field(validator=[str_validator]) + # TODO: it is not timestamp + timestamp: datetime = attrs.field(validator=[attrs.validators.instance_of(datetime)]) + +@attrs.frozen +class ExportResultInfo: + url: str = attrs.field(validator=[str_validator]) + filename: str = attrs.field(validator=[str_validator]) + ext: str | None = attrs.field(validator=[optional_str_validator]) + +@attrs.define class RQMeta: + __job: RQJob = attrs.field(init=False) + + # immutable and required fields + user: UserInfo = attrs.field( + validator=[ + attrs.validators.instance_of(UserInfo) + ], + converter=lambda d: UserInfo(**d), + on_setattr=attrs.setters.frozen, + ) + request: RequestInfo = attrs.field( + validator=[attrs.validators.instance_of(RequestInfo)], + converter=lambda d: RequestInfo(**d), + on_setattr=attrs.setters.frozen, + ) + + # immutable and optional fields + org_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) + org_slug: str | None = attrs.field(validator=[optional_str_validator], default=None, on_setattr=attrs.setters.frozen) + project_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) + task_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) + job_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) + function_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) + lambda_: bool | None = attrs.field(validator=[optional_bool_validator], default=None, on_setattr=attrs.setters.frozen) + + result: ExportResultInfo | None = attrs.field(default=None, converter=lambda d: ExportResultInfo(**d) if d else None) + + # mutable fields + status: str = attrs.field(validator=[optional_str_validator], default="", on_setattr=_update_value) + progress: float | None = attrs.field(validator=[optional_float_validator], default=None) + task_progress: float | None = attrs.field(validator=[optional_float_validator],default=None) + + formatted_exception: str | None = attrs.field(validator=[optional_str_validator], default=None) + exc_type: str | None = attrs.field(validator=[optional_str_validator], default=None) + exc_args: Iterable | None = attrs.field(default=None) + + def get_export_result_url(self) -> str | None: + # keep backward compatibility + return self.result.url or self.__job.meta.get(RQJobMetaField.RESULT_URL) + + # todo: + def get_export_filename(self): + pass # and ext + + @classmethod + def from_job(cls, rq_job: RQJob) -> "RQMeta": + meta = cls(**rq_job.meta) + meta.__job = rq_job + + return meta + + def save(self) -> None: + assert hasattr(self, "__job") and isinstance(self.__job, RQJob) + self.__job.save_meta() + @staticmethod def get_resettable_fields() -> list[RQJobMetaField]: """Return a list of fields that must be reset on retry""" @@ -24,14 +121,76 @@ def get_resettable_fields() -> list[RQJobMetaField]: RQJobMetaField.STATUS ] - @classmethod - def reset_meta_on_retry(cls, meta_to_update: dict[RQJobMetaField, Any]) -> dict[RQJobMetaField, Any]: - resettable_fields = cls.get_resettable_fields() + def reset_meta_on_retry(self) -> dict[RQJobMetaField, Any]: + resettable_fields = self.get_resettable_fields() return { - k: v for k, v in meta_to_update.items() if k not in resettable_fields + k: v for k, v in self.__job.meta.items() if k not in resettable_fields } + def to_dict(self) -> dict: + d = asdict(self) + if v := d.pop(RQJobMetaField.LAMBDA + "_", None) is not None: + d[RQJobMetaField.LAMBDA] = v + + return d + + @classmethod + def build_base( + cls, + *, + request: PatchedRequest, + db_obj: Model, + ): + # to prevent circular import + from cvat.apps.events.handlers import job_id, organization_slug, task_id + from cvat.apps.webhooks.signals import organization_id, project_id + + oid = organization_id(db_obj) + oslug = organization_slug(db_obj) + pid = project_id(db_obj) + tid = task_id(db_obj) + jid = job_id(db_obj) + + user = request.user + + meta = cls( + user=asdict(UserInfo( + id=getattr(user, "id", None), + username=getattr(user, "username", None), + email=getattr(user, "email", None), + )), + request=asdict(RequestInfo( + uuid=request.uuid, + timestamp=timezone.localtime(), + )), + org_id=oid, + org_slug=oslug, + project_id=pid, + task_id=tid, + job_id=jid, + ) + + # TODO: do not include unset fields + return meta.to_dict() + + @classmethod + def update_result_info( + cls, + original_meta: dict[RQJobMetaField, Any], + *, + result_url: str, result_filename: str, result_file_ext: str | None = None + ) -> None: + original_meta[RQJobMetaField.RESULT] = asdict( + ExportResultInfo(url=result_url, filename=result_filename, ext=result_file_ext) + ) + + @classmethod + def update_lambda_info(cls, original_meta: dict[RQJobMetaField, Any], *, function_id: int) -> None: + original_meta[RQJobMetaField.FUNCTION_ID] = function_id + original_meta[RQJobMetaField.LAMBDA] = True + +# TODO: check that RQJobMetaField is used only in this module class RQJobMetaField: # common fields FORMATTED_EXCEPTION = "formatted_exception" @@ -40,6 +199,7 @@ class RQJobMetaField: PROJECT_ID = 'project_id' TASK_ID = 'task_id' JOB_ID = 'job_id' + LAMBDA = 'lambda' ORG_ID = 'org_id' ORG_SLUG = 'org_slug' STATUS = 'status' @@ -47,6 +207,7 @@ class RQJobMetaField: TASK_PROGRESS = 'task_progress' # export specific fields RESULT_URL = 'result_url' + RESULT = 'result' FUNCTION_ID = 'function_id' EXCEPTION_TYPE = 'exc_type' EXCEPTION_ARGS = 'exc_args' diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 6c760b42ba65..e5c3797260cf 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -40,7 +40,7 @@ from cvat.apps.engine.frame_provider import FrameQuality, TaskFrameProvider from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.permissions import TaskPermission -from cvat.apps.engine.rq_job_handler import RQId, RQJobMetaField +from cvat.apps.engine.rq_job_handler import RQId, RQMeta from cvat.apps.engine.task_validation import HoneypotFrameSelector from cvat.apps.engine.utils import ( CvatChunkTimestampMismatchError, @@ -1770,17 +1770,6 @@ def to_internal_value(self, data): def to_representation(self, instance): return instance.file if instance else instance -class RqStatusSerializer(serializers.Serializer): - state = serializers.ChoiceField(choices=[ - "Queued", "Started", "Finished", "Failed"]) - message = serializers.CharField(allow_blank=True, default="") - progress = serializers.FloatField(max_value=100, default=0) - - def __init__(self, instance=None, data=..., **kwargs): - warnings.warn("RqStatusSerializer is deprecated, " - "use cvat.apps.engine.serializers.RequestSerializer instead", DeprecationWarning) - super().__init__(instance, data, **kwargs) - class RqIdSerializer(serializers.Serializer): rq_id = serializers.CharField(help_text="Request id") @@ -3435,6 +3424,7 @@ class RequestDataOperationSerializer(serializers.Serializer): def to_representation(self, rq_job: RQJob) -> dict[str, Any]: parsed_rq_id: RQId = rq_job.parsed_rq_id + rq_job_meta = RQMeta.from_job(rq_job) return { "type": ":".join( @@ -3444,11 +3434,11 @@ def to_representation(self, rq_job: RQJob) -> dict[str, Any]: ] ), "target": parsed_rq_id.target, - "project_id": rq_job.meta[RQJobMetaField.PROJECT_ID], - "task_id": rq_job.meta[RQJobMetaField.TASK_ID], - "job_id": rq_job.meta[RQJobMetaField.JOB_ID], + "project_id": rq_job_meta.project_id, + "task_id": rq_job_meta.task_id, + "job_id": rq_job_meta.job_id, "format": parsed_rq_id.format, - "function_id": rq_job.meta.get(RQJobMetaField.FUNCTION_ID), + "function_id": rq_job_meta.function_id, } class RequestSerializer(serializers.Serializer): @@ -3475,15 +3465,19 @@ class RequestSerializer(serializers.Serializer): @extend_schema_field(UserIdentifiersSerializer()) def get_owner(self, rq_job: RQJob) -> dict[str, Any]: - return UserIdentifiersSerializer(rq_job.meta[RQJobMetaField.USER]).data + # TODO: define parsed meta once + rq_job_meta = RQMeta.from_job(rq_job) + return UserIdentifiersSerializer(rq_job_meta.user.to_dict()).data @extend_schema_field( serializers.FloatField(min_value=0, max_value=1, required=False, allow_null=True) ) def get_progress(self, rq_job: RQJob) -> Decimal: + # TODO: define parsed meta once + rq_job_meta = RQMeta.from_job(rq_job) # progress of task creation is stored in "task_progress" field # progress of project import is stored in "progress" field - return Decimal(rq_job.meta.get(RQJobMetaField.PROGRESS) or rq_job.meta.get(RQJobMetaField.TASK_PROGRESS) or 0.) + return Decimal(rq_job_meta.progress or rq_job_meta.task_progress or 0.) @extend_schema_field(serializers.DateTimeField(required=False, allow_null=True)) def get_expiry_date(self, rq_job: RQJob) -> Optional[str]: @@ -3501,28 +3495,30 @@ def get_expiry_date(self, rq_job: RQJob) -> Optional[str]: @extend_schema_field(serializers.CharField(allow_blank=True)) def get_message(self, rq_job: RQJob) -> str: + # TODO: define parsed meta once + rq_job_meta = RQMeta.from_job(rq_job) rq_job_status = rq_job.get_status() message = '' if RQJobStatus.STARTED == rq_job_status: - message = rq_job.meta.get(RQJobMetaField.STATUS, '') + message = rq_job_meta.status elif RQJobStatus.FAILED == rq_job_status: - message = rq_job.meta.get( - RQJobMetaField.FORMATTED_EXCEPTION, - parse_exception_message(str(rq_job.exc_info or "Unknown error")), - ) + message = rq_job_meta.formatted_exception or parse_exception_message(str(rq_job.exc_info or "Unknown error")) return message def to_representation(self, rq_job: RQJob) -> dict[str, Any]: representation = super().to_representation(rq_job) + # TODO: define parsed meta once + rq_job_meta = RQMeta.from_job(rq_job) + # FUTURE-TODO: support such statuses on UI if representation["status"] in (RQJobStatus.DEFERRED, RQJobStatus.SCHEDULED): representation["status"] = RQJobStatus.QUEUED if representation["status"] == RQJobStatus.FINISHED: - if result_url := rq_job.meta.get(RQJobMetaField.RESULT_URL): + if result_url := rq_job_meta.get_export_result_url(): representation["result_url"] = result_url if ( diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 7aa92acba2fd..8f02273fc9b6 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -17,6 +17,7 @@ from typing import Any, NamedTuple, Optional, Union from urllib import parse as urlparse from urllib import request as urlrequest +from cvat.apps.engine.rq_job_handler import RQMeta import attrs import av @@ -53,7 +54,6 @@ av_scan_paths, define_dependent_job, format_list, - get_rq_job_meta, get_rq_lock_by_user, take_by, ) @@ -83,7 +83,7 @@ def create( func=_create_thread, args=(db_task.pk, data), job_id=rq_id, - meta=get_rq_job_meta(request=request, db_obj=db_task), + meta=RQMeta.build_base(request=request, db_obj=db_task), depends_on=define_dependent_job(q, user_id), failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds(), ) @@ -107,13 +107,14 @@ class SegmentsParams(NamedTuple): def _copy_data_from_share_point( server_files: list[str], + *, upload_dir: str, server_dir: Optional[str] = None, server_files_exclude: Optional[list[str]] = None, + rq_job_meta: RQMeta, ): - job = rq.get_current_job() - job.meta['status'] = 'Data are being copied from source..' - job.save_meta() + rq_job_meta.status = 'Data are being copied from source..' + rq_job_meta.save() filtered_server_files = server_files.copy() @@ -201,8 +202,9 @@ def _create_segments_and_jobs( job_file_mapping: Optional[JobFileMapping] = None, ): rq_job = rq.get_current_job() - rq_job.meta['status'] = 'Task is being saved in database' - rq_job.save_meta() + rq_job_meta = RQMeta.from_job(rq_job) + rq_job_meta.status = 'Task is being saved in database' + rq_job_meta.save() segments, segment_size, overlap = _generate_segment_params( db_task=db_task, job_file_mapping=job_file_mapping, @@ -433,7 +435,7 @@ def _validate_scheme(url): if parsed_url.scheme not in ALLOWED_SCHEMES: raise ValueError('Unsupported URL scheme: {}. Only http and https are supported'.format(parsed_url.scheme)) -def _download_data(urls, upload_dir): +def _download_data(urls, upload_dir, *, rq_job_meta: RQMeta): job = rq.get_current_job() local_files = {} @@ -444,8 +446,8 @@ def _download_data(urls, upload_dir): raise Exception("filename collision: {}".format(name)) _validate_scheme(url) slogger.glob.info("Downloading: {}".format(url)) - job.meta['status'] = '{} is being downloaded..'.format(url) - job.save_meta() + rq_job_meta.status = '{} is being downloaded..'.format(url) + rq_job_meta.save() response = session.get(url, stream=True, proxies=PROXIES_FOR_UNTRUSTED_URLS) if response.status_code == 200: @@ -583,10 +585,11 @@ def _create_thread( slogger.glob.info("create task #{}".format(db_task.id)) job = rq.get_current_job() + rq_job_meta = RQMeta.from_job(job) def _update_status(msg: str) -> None: - job.meta['status'] = msg - job.save_meta() + rq_job_meta.status = msg + rq_job_meta.save() job_file_mapping = _validate_job_file_mapping(db_task, data) @@ -599,7 +602,7 @@ def _update_status(msg: str) -> None: is_data_in_cloud = db_data.storage == models.StorageChoice.CLOUD_STORAGE if data['remote_files'] and not is_dataset_import: - data['remote_files'] = _download_data(data['remote_files'], upload_dir) + data['remote_files'] = _download_data(data['remote_files'], upload_dir, rq_job_meta=rq_job_meta) # find and validate manifest file manifest_files = _find_manifest_files(data) @@ -777,7 +780,11 @@ def _update_status(msg: str) -> None: # this means that the data has not been downloaded from the storage to the host _copy_data_from_share_point( (data['server_files'] + [manifest_file]) if manifest_file else data['server_files'], - upload_dir, data.get('server_files_path'), data.get('server_files_exclude')) + upload_dir=upload_dir, + server_dir=data.get('server_files_path'), + server_files_exclude=data.get('server_files_exclude'), + rq_job_meta=rq_job_meta, + ) manifest_root = upload_dir elif is_data_in_cloud: # we should sort media before sorting in the extractor because the manifest structure should match to the sorted media @@ -799,8 +806,7 @@ def _update_status(msg: str) -> None: av_scan_paths(upload_dir) - job.meta['status'] = 'Media files are being extracted...' - job.save_meta() + _update_status('Media files are being extracted...') # If upload from server_files image and directories # need to update images list by all found images in directories @@ -1539,9 +1545,10 @@ def update_progress(self, progress: float): status_message, progress_animation[self._call_counter] ) - self._rq_job.meta['status'] = status_message - self._rq_job.meta['task_progress'] = progress or 0. - self._rq_job.save_meta() + rq_job_meta = RQMeta.from_job(self._rq_job) + rq_job_meta.status = status_message + rq_job_meta.task_progress = progress or 0. + rq_job_meta.save() self._call_counter = (self._call_counter + 1) % len(progress_animation) diff --git a/cvat/apps/engine/utils.py b/cvat/apps/engine/utils.py index b3e3d48f69d6..8ccbc781a379 100644 --- a/cvat/apps/engine/utils.py +++ b/cvat/apps/engine/utils.py @@ -220,6 +220,7 @@ def get_rq_lock_for_job(queue: DjangoRQ, rq_id: str, *, timeout: int = 60, block blocking_timeout=blocking_timeout, ) +# TODO: delete def get_rq_job_meta( request: HttpRequest, db_obj: Any, @@ -382,11 +383,10 @@ def build_backup_file_name( class_name: str, identifier: str | int, timestamp: str, - extension: str = "{}", ) -> str: - # "__backup_.zip" - return "{}_{}_backup_{}{}".format( - class_name, identifier, timestamp, extension, + # "__backup_" + return "{}_{}_backup_{}".format( + class_name, identifier, timestamp, ).lower() def build_annotations_file_name( @@ -396,12 +396,11 @@ def build_annotations_file_name( timestamp: str, format_name: str, is_annotation_file: bool = True, - extension: str = "{}", ) -> str: - # "____.zip" - return "{}_{}_{}_{}_{}{}".format( + # "____" + return "{}_{}_{}_{}_{}".format( class_name, identifier, 'annotations' if is_annotation_file else 'dataset', - timestamp, format_name, extension, + timestamp, format_name ).lower() diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 6a5691dc1182..e4203508dc79 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -21,7 +21,10 @@ from pathlib import Path from tempfile import NamedTemporaryFile from types import SimpleNamespace -from typing import Any, Callable, Optional, Union, cast +from typing import Any, Callable, Optional, Union, cast, Type + +from rest_framework.reverse import reverse +from cvat.apps.engine.middleware import PatchedRequest import django_rq from attr.converters import to_bool @@ -31,7 +34,7 @@ from django.db import models as django_models from django.db import transaction from django.db.models.query import Prefetch -from django.http import HttpRequest, HttpResponse, HttpResponseBadRequest, HttpResponseNotFound +from django.http import HttpRequest, HttpResponse, HttpResponseBadRequest, HttpResponseNotFound, HttpResponseGone from django.utils import timezone from django.utils.decorators import method_decorator from django.views.decorators.cache import never_cache @@ -67,6 +70,7 @@ db_storage_to_storage_instance, import_resource_from_cloud_storage, ) +from cvat.apps.engine.rq_job_handler import RQMeta from cvat.apps.engine.filters import ( NonModelJsonLogicFilter, NonModelOrderingFilter, @@ -120,7 +124,7 @@ get_cloud_storage_for_import_or_export, get_iam_context, ) -from cvat.apps.engine.rq_job_handler import RQId, RQJobMetaField, is_rq_job_owner +from cvat.apps.engine.rq_job_handler import RQId, is_rq_job_owner from cvat.apps.engine.serializers import ( AboutSerializer, AnnotationFileSerializer, @@ -154,7 +158,6 @@ ProjectWriteSerializer, RequestSerializer, RqIdSerializer, - RqStatusSerializer, TaskFileSerializer, TaskReadSerializer, TaskValidationLayoutReadSerializer, @@ -168,7 +171,6 @@ get_rq_job_meta, get_rq_lock_by_user, import_resource_with_clean_up_after, - parse_exception_message, process_failed_job, sendfile, ) @@ -393,46 +395,49 @@ def get_export_callback(self, save_images: bool) -> Callable: @extend_schema(methods=['GET'], summary='Export a project as a dataset / Check dataset import status', description=textwrap.dedent(""" - To check the status of the process of importing a project dataset from a file: - - After initiating the dataset upload, you will receive an rq_id parameter. - Make sure to include this parameter as a query parameter in your subsequent - GET /api/projects/id/dataset requests to track the status of the dataset import. - Also you should specify action parameter: action=import_status. + Utilizing this endpoint: + - to export project dataset in a specific format + - to check the status of the process of importing a project dataset from a file + is deprecated. - Deprecation warning: - Utilizing this endpoint to export project dataset in - a specific format will be deprecated in one of the next releases. Consider using new API: - POST /api/projects//dataset/export/?save_images=True to initiate export process - - GET /api/requests/ to check process status, - where rq_id is request id returned on initializing request + - GET /api/requests/ to check process status + - GET \{result_url\} to download a prepared file, + Where: + - `rq_id` can be found in the response on initializing request + - `result_url` can be found in the response on checking status request """), parameters=[ OpenApiParameter('format', description='Desired output format name\n' 'You can get the list of supported formats at:\n/server/annotation/formats', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, + deprecated=True + ), OpenApiParameter('filename', description='Desired output file name', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, + deprecated=True + ), OpenApiParameter('action', description='Used to start downloading process locally after annotation file has been created', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, enum=['download', 'import_status']), + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, enum=['download', 'import_status'], + deprecated=True + ), OpenApiParameter('location', description='Where need to save downloaded dataset', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - enum=Location.list()), + enum=Location.list(), + deprecated=True + ), OpenApiParameter('cloud_storage_id', description='Storage id', - location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), - OpenApiParameter('use_default_location', description='Use the location that was configured in project to import dataset', - location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, - default=True, deprecated=True), - OpenApiParameter('rq_id', description='rq id', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), + location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False, + deprecated=True + ), + OpenApiParameter('rq_id', description='Request ID', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=True), ], - # deprecated=True, FUTURE-TODO: uncomment when new API for result downloading will be implemented + deprecated=True, responses={ - '200': OpenApiResponse(OpenApiTypes.BINARY, description='Download of file started'), - '201': OpenApiResponse(description='Output file is ready for downloading'), - '202': OpenApiResponse(description='Exporting has been started'), - '405': OpenApiResponse(description='Format is not available'), + '301': OpenApiResponse(description='Redirects to the new API to check status of import process'), + '410': OpenApiResponse(description='API endpoint no longer supports exporting datasets'), }) @extend_schema(methods=['POST'], summary='Import a dataset into a project', @@ -468,58 +473,27 @@ def get_export_callback(self, save_images: bool) -> Callable: }) @action(detail=True, methods=['GET', 'POST', 'OPTIONS'], serializer_class=None, url_path=r'dataset/?$', parser_classes=_UPLOAD_PARSER_CLASSES, - csrf_workaround_is_needed=lambda qp: - csrf_workaround_is_needed_for_export(qp) and qp.get("action") != "import_status") - def dataset(self, request, pk): + ) + def dataset(self, request: PatchedRequest, pk: int): self._object = self.get_object() # force call of check_object_permissions() - if request.method in {'POST', 'OPTIONS'}: - return self.import_annotations( - request=request, - db_obj=self._object, - import_func=_import_project_dataset, - rq_func=dm.project.import_dataset_as_project, - rq_id_factory=self.IMPORT_RQ_ID_FACTORY, - ) - else: - action = request.query_params.get("action", "").lower() - if action in ("import_status",): - queue = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) - rq_id = request.query_params.get('rq_id') - if not rq_id: - return Response( - 'The rq_id param should be specified in the query parameters', - status=status.HTTP_400_BAD_REQUEST, - ) - - rq_job = queue.fetch_job(rq_id) - - if rq_job is None: - return Response(status=status.HTTP_404_NOT_FOUND) - # check that the user has access to the current rq_job - elif not is_rq_job_owner(rq_job, request.user.id): - return Response(status=status.HTTP_403_FORBIDDEN) - - if rq_job.is_finished: - rq_job.delete() - return Response(status=status.HTTP_201_CREATED) - elif rq_job.is_failed: - exc_info = process_failed_job(rq_job) + if request.method == "GET": + if request.query_params.get("action") == "import_status": + if rq_id := request.query_params.get("rq_id"): + return reverse('requests', request=request, args=[rq_id]) + return HttpResponseBadRequest("Missing rq_id") + # we don't redirect to the new API here since this endpoint used not only to check the status + # of exporting process|download a result file, but also to initiate export process + return HttpResponseGone("API endpoint is no longer handles exporting process") + + return self.import_annotations( + request=request, + db_obj=self._object, + import_func=_import_project_dataset, + rq_func=dm.project.import_dataset_as_project, + rq_id_factory=self.IMPORT_RQ_ID_FACTORY, + ) - return Response( - data=str(exc_info), - status=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - else: - return Response( - data=self._get_rq_response( - settings.CVAT_QUEUES.IMPORT_DATA.value, - rq_id, - ), - status=status.HTTP_202_ACCEPTED, - ) - else: - return self.export_dataset_v1(request=request, save_images=True) @tus_chunk_action(detail=True, suffix_base="dataset") def append_dataset_chunk(self, request, pk, file_id): @@ -569,87 +543,47 @@ def upload_finished(self, request): return Response(data='Unknown upload was finished', status=status.HTTP_400_BAD_REQUEST) - @extend_schema(summary='Export project annotations as a dataset', + @extend_schema( description=textwrap.dedent("""\ - Deprecation warning: - Using this endpoint to initiate export of annotations as a dataset or to check export status is deprecated. Consider using new API: - POST /api/projects//dataset/export?save_images=False to initiate exporting process - GET /api/requests/ to check export status, where rq_id is request id returned on initializing request' + - GET \{result_url\} to download a prepared file with annotations, + where result_url can be found in the response on checking status request """), - parameters=[ - OpenApiParameter('format', description='Desired output format name\n' - 'You can get the list of supported formats at:\n/server/annotation/formats', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=True), - OpenApiParameter('filename', description='Desired output file name', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), - OpenApiParameter('action', description='Used to start downloading process locally after annotation file has been created', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, enum=['download']), - OpenApiParameter('location', description='Where need to save downloaded dataset', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - enum=Location.list()), - OpenApiParameter('cloud_storage_id', description='Storage id', - location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), - OpenApiParameter('use_default_location', description='Use the location that was configured in project to export annotation', - location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, - default=True, deprecated=True), - ], responses={ - '200': OpenApiResponse(PolymorphicProxySerializer( - component_name='AnnotationsRead', - serializers=[LabeledDataSerializer, OpenApiTypes.BINARY], - resource_type_field_name=None - ), description='Download of file started'), - '201': OpenApiResponse(description='Annotations file is ready to download'), - '202': OpenApiResponse(description='Dump of annotations has been started'), - '401': OpenApiResponse(description='Format is not specified'), - '405': OpenApiResponse(description='Format is not available'), - }) + '410': OpenApiResponse(description="API endpoint is no longer handles exporting process"), + }, + deprecated=True, + ) @action(detail=True, methods=['GET'], serializer_class=LabeledDataSerializer, csrf_workaround_is_needed=csrf_workaround_is_needed_for_export) def annotations(self, request, pk): - # FUTURE-TODO: mark exporting dataset using this endpoint as deprecated when new API for result file downloading will be implemented - self._object = self.get_object() # force call of check_object_permissions() - return self.export_dataset_v1(request=request, save_images=False) + return HttpResponseGone("API endpoint is no longer handles exporting process") + # --- Deprecated API endpoint, should be deleted in the next release --- @extend_schema(summary='Back up a project', description=textwrap.dedent("""\ - Deprecation warning: - - This endpoint will be deprecated in one of the next releases. - Consider using new API: - - POST /api/projects//backup/export to initiate backup process - - GET /api/requests/ to check process status, - where rq_id is request id returned on initializing request - """), - parameters=[ - OpenApiParameter('action', location=OpenApiParameter.QUERY, - description='Used to start downloading process after backup file had been created', - type=OpenApiTypes.STR, required=False, enum=['download']), - OpenApiParameter('filename', description='Backup file name', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), - OpenApiParameter('location', description='Where need to save downloaded backup', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - enum=Location.list()), - OpenApiParameter('cloud_storage_id', description='Storage id', - location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), - OpenApiParameter('use_default_location', description='Use the location that was configured in project to export backup', - location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, - default=True, deprecated=True), - ], + Consider using new API: + - POST /api/projects//backup/export to initiate backup process + - GET /api/requests/ to check process status, + where rq_id can be found in the response on initializing request + - GET \{result_url\} to download a prepared file, + where result_url can be found in the response on checking status request + """ + ), responses={ - '200': OpenApiResponse(description='Download of file started'), - '201': OpenApiResponse(description='Output backup file is ready for downloading'), - '202': OpenApiResponse(description='Creating a backup file has been started'), - }) + '410': OpenApiResponse(description='Deprecated API endpoint'), + }, + deprecated=True, + ) @action(methods=['GET'], detail=True, url_path='backup', csrf_workaround_is_needed=csrf_workaround_is_needed_for_backup) - def export_backup(self, request, pk=None): - # FUTURE-TODO: mark this endpoint as deprecated when new API for result file downloading will be implemented - return self.export_backup_v1(request) + def export_backup(self, request: PatchedRequest, pk: int): + return HttpResponseGone("API endpoint is no longer handles the project backup process") @extend_schema(methods=['POST'], summary='Recreate a project from a backup', description=textwrap.dedent(""" @@ -721,23 +655,6 @@ def preview(self, request, pk): return data_getter() - @staticmethod - def _get_rq_response(queue, job_id): - queue = django_rq.get_queue(queue) - job = queue.fetch_job(job_id) - response = {} - if job is None or job.is_finished: - response = { "state": "Finished" } - elif job.is_queued or job.is_deferred: - response = { "state": "Queued" } - elif job.is_failed: - response = { "state": "Failed", "message": job.exc_info } - else: - response = { "state": "Started" } - response['message'] = job.meta.get('status', '') - response['progress'] = job.meta.get('progress', 0.) - - return response class _DataGetter(metaclass=ABCMeta): def __init__( @@ -1056,44 +973,22 @@ def append_backup_chunk(self, request, file_id): @extend_schema(summary='Back up a task', description=textwrap.dedent("""\ - Deprecation warning: - This endpoint will be deprecated in one of the next releases. Consider using new API: - POST /api/tasks//backup/export to initiate backup process - GET /api/requests/ to check process status, - where rq_id is request id returned on initializing request' - """), - parameters=[ - OpenApiParameter('action', location=OpenApiParameter.QUERY, - description='Used to start downloading process after backup file had been created', - type=OpenApiTypes.STR, required=False, enum=['download']), - OpenApiParameter('filename', description='Backup file name', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), - OpenApiParameter('location', description='Where need to save downloaded backup', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - enum=Location.list()), - OpenApiParameter('cloud_storage_id', description='Storage id', - location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), - OpenApiParameter('use_default_location', description='Use the location that was configured in the task to export backup', - location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, - default=True, deprecated=True), - ], + where rq_id can be found in the response on initializing request + - GET \{result_url\} to download a prepared file, + where result_url can be found in the response on checking status request + """ + ), responses={ - '200': OpenApiResponse(description='Download of file started'), - '201': OpenApiResponse(description='Output backup file is ready for downloading'), - '202': OpenApiResponse(description='Creating a backup file has been started'), - '400': OpenApiResponse(description='Backup of a task without data is not allowed'), - }) - @action(methods=['GET'], detail=True, url_path='backup', - csrf_workaround_is_needed=csrf_workaround_is_needed_for_backup) + '410': OpenApiResponse(description='Deprecated API endpoint'), + }, + deprecated=True, + ) + @action(methods=['GET'], detail=True, url_path='backup') def export_backup(self, request, pk=None): - # FUTURE-TODO: mark this endpoint as deprecated when new API for result file downloading will be implemented - if self.get_object().data is None: - return Response( - data='Backup of a task without data is not allowed', - status=status.HTTP_400_BAD_REQUEST - ) - return self.export_backup_v1(request) + return HttpResponseGone("API endpoint is no longer handles the task backup process") @transaction.atomic def perform_update(self, serializer): @@ -1472,47 +1367,50 @@ def append_data_chunk(self, request, pk, file_id): def get_export_callback(self, save_images: bool) -> Callable: return dm.views.export_task_as_dataset if save_images else dm.views.export_task_annotations - # TODO: mark this endpoint as deprecated when new endpoint for downloading results will be implemented - @extend_schema(methods=['GET'], summary='Get task annotations or export them as a dataset in a specific format', + @extend_schema(methods=['GET'], summary='Get task annotations', description=textwrap.dedent("""\ Deprecation warning: - Utilizing this endpoint ot export annotations as a dataset in - a specific format will be deprecated in one of the next releases. + Utilizing this endpoint to export annotations as a dataset in + a specific format is deprecated. Consider using new API: - POST /api/tasks//dataset/export?save_images=False to initiate export process - GET /api/requests/ to check process status, where rq_id is request id returned on initializing request + - GET \{result_url\} to download a prepared file, + where result_url can be found in the response on checking status request """), parameters=[ + # --- Deprecated params section --- OpenApiParameter('format', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, description="Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/formats", + deprecated=True ), OpenApiParameter('filename', description='Desired output file name', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, + deprecated=True + ), OpenApiParameter('action', location=OpenApiParameter.QUERY, description='Used to start downloading process locally after annotation file has been created', - type=OpenApiTypes.STR, required=False, enum=['download']), + type=OpenApiTypes.STR, required=False, enum=['download'], + deprecated=True + ), OpenApiParameter('location', description='Where need to save downloaded dataset', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - enum=Location.list()), + enum=Location.list(), + deprecated=True + ), OpenApiParameter('cloud_storage_id', description='Storage id', - location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), - OpenApiParameter('use_default_location', description='Use the location that was configured in the task to export annotation', - location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, - default=True, deprecated=True), + location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False, + deprecated=True + ), + # --- Deprecated params section --- ], responses={ - '200': OpenApiResponse(PolymorphicProxySerializer( - component_name='AnnotationsRead', - serializers=[LabeledDataSerializer, OpenApiTypes.BINARY], - resource_type_field_name=None - ), description='Download of file started'), - '201': OpenApiResponse(description='Annotations file is ready to download'), - '202': OpenApiResponse(description='Dump of annotations has been started'), - '400': OpenApiResponse(description='Exporting without data is not allowed'), - '405': OpenApiResponse(description='Format is not available'), + '200': OpenApiResponse(LabeledDataSerializer), + '400': OpenApiResponse(description="Exporting without data is not allowed"), + '410': OpenApiResponse(description="API endpoint is no longer handles exporting process"), }) @extend_schema(methods=['PUT'], summary='Replace task annotations / Get annotation import status', description=textwrap.dedent(""" @@ -1588,15 +1486,18 @@ def get_export_callback(self, save_images: bool) -> Callable: def annotations(self, request, pk): self._object = self.get_object() # force call of check_object_permissions() if request.method == 'GET': - if self._object.data: - return self.export_dataset_v1( - request=request, - save_images=False, - get_data=dm.task.get_task_data, - ) - else: + if not self._object.data: return HttpResponseBadRequest("Exporting annotations from a task without data is not allowed") + if ( + {"format", "filename", "action", "location", "cloud_storage_id"} + & request.query_params.keys() + ): + return HttpResponseGone(f"API endpoint no longer handles exporting process") + + data = dm.task.get_task_data(self._object.pk) + return Response(data) + elif request.method == 'POST' or request.method == 'OPTIONS': # NOTE: initialization process of annotations import format_name = request.query_params.get('format', '') @@ -1650,52 +1551,7 @@ def append_annotations_chunk(self, request, pk, file_id): self._object = self.get_object() return self.append_tus_chunk(request, file_id) - ### --- DEPRECATED METHOD --- ### - @extend_schema( - summary='Get the creation status of a task', - responses={ - '200': RqStatusSerializer, - }, - deprecated=True, - description="This method is deprecated and will be removed in one of the next releases. " - "To check status of task creation, use new common API " - "for managing background operations: GET /api/requests/?action=create&task_id=", - ) - @action(detail=True, methods=['GET'], serializer_class=RqStatusSerializer) - def status(self, request, pk): - task = self.get_object() # force call of check_object_permissions() - response = self._get_rq_response( - queue=settings.CVAT_QUEUES.IMPORT_DATA.value, - job_id=RQId(RequestAction.CREATE, RequestTarget.TASK, task.id).render() - ) - serializer = RqStatusSerializer(data=response) - serializer.is_valid(raise_exception=True) - return Response(serializer.data, headers={'Deprecation': 'true'}) - - ### --- DEPRECATED METHOD--- ### - @staticmethod - def _get_rq_response(queue, job_id): - queue = django_rq.get_queue(queue) - job = queue.fetch_job(job_id) - response = {} - if job is None or job.is_finished: - response = { "state": "Finished" } - elif job.is_queued or job.is_deferred: - response = { "state": "Queued" } - elif job.is_failed: - # FIXME: It seems that in some cases exc_info can be None. - # It's not really clear how it is possible, but it can - # lead to an error in serializing the response - # https://github.com/cvat-ai/cvat/issues/5215 - response = { "state": "Failed", "message": parse_exception_message(job.exc_info or "Unknown error") } - else: - response = { "state": "Started" } - if job.meta.get('status'): - response['message'] = job.meta['status'] - response['progress'] = job.meta.get('task_progress', 0.) - - return response @extend_schema(methods=['GET'], summary='Get metainformation for media files in a task', responses={ @@ -1743,55 +1599,23 @@ def metadata(self, request, pk): @extend_schema(summary='Export task as a dataset in a specific format', description=textwrap.dedent("""\ - Deprecation warning: - Utilizing this endpoint to export task dataset in - a specific format will be deprecated in one of the next releases. + a specific format is deprecated. Consider using new API: - POST /api/tasks//dataset/export?save_images=True to initiate export process - GET /api/requests/ to check process status, where rq_id is request id returned on initializing request + - GET \{result_url\} to download a prepared file, + where result_url can be found in the response on checking status request """), - parameters=[ - OpenApiParameter('format', location=OpenApiParameter.QUERY, - description='Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/formats', - type=OpenApiTypes.STR, required=True), - OpenApiParameter('filename', description='Desired output file name', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), - OpenApiParameter('action', location=OpenApiParameter.QUERY, - description='Used to start downloading process locally after annotation file has been created', - type=OpenApiTypes.STR, required=False, enum=['download']), - OpenApiParameter('use_default_location', description='Use the location that was configured in task to export annotations', - location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, - default=True, deprecated=True), - OpenApiParameter('location', description='Where need to save downloaded dataset', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - enum=Location.list()), - OpenApiParameter('cloud_storage_id', description='Storage id', - location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), - ], responses={ - '200': OpenApiResponse(OpenApiTypes.BINARY, description='Download of file started'), - '201': OpenApiResponse(description='Output file is ready for downloading'), - '202': OpenApiResponse(description='Exporting has been started'), - '400': OpenApiResponse(description='Exporting without data is not allowed'), - '405': OpenApiResponse(description='Format is not available'), + '410': OpenApiResponse(description='Deprecated API endpoint'), }, ) - @action(detail=True, methods=['GET'], serializer_class=None, - url_path='dataset', csrf_workaround_is_needed=csrf_workaround_is_needed_for_export) + @action(detail=True, methods=['GET'], serializer_class=None, url_path='dataset') def dataset_export(self, request, pk): - # FUTURE-TODO: mark this endpoint as deprecated when new API for result file downloading will be implemented - self._object = self.get_object() # force call of check_object_permissions() - - if self._object.data: - return self.export_dataset_v1( - request=request, - save_images=True - ) - - return HttpResponseBadRequest("Exporting a dataset from a task without data is not allowed") + return HttpResponseGone("Deprecated API endpoint") @extend_schema(summary='Get a preview image for a task', responses={ @@ -2039,48 +1863,22 @@ def upload_finished(self, request): status=status.HTTP_400_BAD_REQUEST) @extend_schema(methods=['GET'], - summary="Get job annotations or export job annotations as a dataset in a specific format", + summary="Get job annotations", description=textwrap.dedent("""\ - If format is specified, a ZIP archive will be returned. Otherwise, - the annotations will be returned as a JSON document. - Deprecation warning: - Utilizing this endpoint to export annotations as a dataset in - a specific format will be deprecated in one of the next releases. + Utilizing this endpoint to export job dataset in a specific format is deprecated. Consider using new API: - - POST /api/jobs//dataset/export?save_images=False to initiate export process + - POST /api/jobs//dataset/export?save_images=True to initiate export process - GET /api/requests/ to check process status, where rq_id is request id returned on initializing request + - GET \{result_url\} to download a prepared file, + where result_url can be found in the response on checking status request """), - parameters=[ - OpenApiParameter('format', location=OpenApiParameter.QUERY, - description='Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/formats', - type=OpenApiTypes.STR, required=False), - OpenApiParameter('filename', description='Desired output file name', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), - OpenApiParameter('action', location=OpenApiParameter.QUERY, - description='Used to start downloading process locally after annotation file has been created', - type=OpenApiTypes.STR, required=False, enum=['download']), - OpenApiParameter('location', description='Where need to save downloaded annotation', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - enum=Location.list()), - OpenApiParameter('cloud_storage_id', description='Storage id', - location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), - OpenApiParameter('use_default_location', description='Use the location that was configured in the task to export annotation', - location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, - default=True, deprecated=True), - ], responses={ - '200': OpenApiResponse(PolymorphicProxySerializer( - component_name='AnnotationsRead', - serializers=[LabeledDataSerializer, OpenApiTypes.BINARY], - resource_type_field_name=None - ), description='Download of file started'), - '201': OpenApiResponse(description='Output file is ready for downloading'), - '202': OpenApiResponse(description='Exporting has been started'), - '405': OpenApiResponse(description='Format is not available'), + '200': OpenApiResponse(LabeledDataSerializer), + '410': OpenApiResponse(description="API endpoint no longer handles dataset exporting process"), }) @extend_schema(methods=['POST'], summary='Import annotations into a job', @@ -2163,12 +1961,15 @@ def upload_finished(self, request): def annotations(self, request, pk): self._object: models.Job = self.get_object() # force call of check_object_permissions() if request.method == 'GET': - # FUTURE-TODO: mark as deprecated using this endpoint to export annotations when new API for result file downloading will be implemented - return self.export_dataset_v1( - request=request, - save_images=False, - get_data=dm.task.get_job_data, - ) + + if ( + {"format", "filename", "location", "action", "cloud_storage_id"} + & request.query_params.keys() + ): + return HttpResponseGone(f"API endpoint no longer handles dataset exporting process") + + annotations = dm.task.get_job_data(self._object.pk) + return Response(annotations) elif request.method == 'POST' or request.method == 'OPTIONS': format_name = request.query_params.get('format', '') @@ -2227,48 +2028,6 @@ def append_annotations_chunk(self, request, pk, file_id): return self.append_tus_chunk(request, file_id) - @extend_schema(summary='Export job as a dataset in a specific format', - description=textwrap.dedent("""\ - Deprecation warning: - This endpoint will be deprecated in one of the next releases. - Consider using new API: - - POST /api/jobs//dataset/export?save_images=True to initiate export process - - GET /api/requests/ to check process status, - where rq_id is request id returned on initializing request - """), - parameters=[ - OpenApiParameter('format', location=OpenApiParameter.QUERY, - description='Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/formats', - type=OpenApiTypes.STR, required=True), - OpenApiParameter('filename', description='Desired output file name', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), - OpenApiParameter('action', location=OpenApiParameter.QUERY, - description='Used to start downloading process locally after annotation file has been created', - type=OpenApiTypes.STR, required=False, enum=['download']), - OpenApiParameter('use_default_location', description='Use the location that was configured in the task to export dataset', - location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, - default=True, deprecated=True), - OpenApiParameter('location', description='Where need to save downloaded dataset', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - enum=Location.list()), - OpenApiParameter('cloud_storage_id', description='Storage id', - location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), - ], - responses={ - '200': OpenApiResponse(OpenApiTypes.BINARY, description='Download of file started'), - '201': OpenApiResponse(description='Output file is ready for downloading'), - '202': OpenApiResponse(description='Exporting has been started'), - '405': OpenApiResponse(description='Format is not available'), - }, - ) - @action(detail=True, methods=['GET'], serializer_class=None, - url_path='dataset', csrf_workaround_is_needed=csrf_workaround_is_needed_for_export) - def dataset_export(self, request, pk): - # FUTURE-TODO: mark this endpoint as deprecated when new API for result file downloading will be implemented - self._object = self.get_object() # force call of check_object_permissions() - - return self.export_dataset_v1(request=request, save_images=True) - def get_export_callback(self, save_images: bool) -> Callable: return dm.views.export_job_as_dataset if save_images else dm.views.export_job_annotations @@ -3339,13 +3098,14 @@ def perform_destroy(self, instance): super().perform_destroy(instance) target.touch() -def rq_exception_handler(rq_job, exc_type, exc_value, tb): - rq_job.meta[RQJobMetaField.FORMATTED_EXCEPTION] = "".join( +def rq_exception_handler(rq_job: RQJob, exc_type: Type[Exception], exc_value, tb): + rq_job_meta = RQMeta.from_job(rq_job) + rq_job_meta.formatted_exception = "".join( traceback.format_exception_only(exc_type, exc_value)) if rq_job.origin == settings.CVAT_QUEUES.CHUNKS.value: - rq_job.meta[RQJobMetaField.EXCEPTION_TYPE] = exc_type - rq_job.meta[RQJobMetaField.EXCEPTION_ARGS] = exc_value.args - rq_job.save_meta() + rq_job_meta.exc_type = exc_type + rq_job_meta.exc_args = exc_value.args + rq_job_meta.save() return True @@ -3434,6 +3194,7 @@ def _import_annotations(request, rq_id_factory, rq_func, db_obj, format_name, args=func_args, job_id=rq_id, depends_on=define_dependent_job(queue, user_id, rq_id=rq_id), + # TODO: meta={ 'tmp_file': filename, **get_rq_job_meta(request=request, db_obj=db_obj), @@ -3537,6 +3298,7 @@ def _import_project_dataset( func=func, args=func_args, job_id=rq_id, + # TODO: meta={ 'tmp_file': filename, **get_rq_job_meta(request=request, db_obj=db_obj), diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index 770f84dda054..130962561e41 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -17,7 +17,7 @@ from cvat.apps.dataset_manager.views import log_exception from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.rq_job_handler import RQJobMetaField +from cvat.apps.engine.rq_job_handler import RQMeta from cvat.apps.engine.utils import sendfile slogger = ServerLogManager(__name__) @@ -136,7 +136,7 @@ def export(request, filter_query, queue_name): "query_id": query_id, } - queue = django_rq.get_queue(queue_name) + queue: django_rq.queues.DjangoRQ = django_rq.get_queue(queue_name) rq_job = queue.fetch_job(rq_id) if rq_job: @@ -152,7 +152,8 @@ def export(request, filter_query, queue_name): if os.path.exists(file_path): return Response(status=status.HTTP_201_CREATED) elif rq_job.is_failed: - exc_info = rq_job.meta.get(RQJobMetaField.FORMATTED_EXCEPTION, str(rq_job.exc_info)) + rq_job_meta = RQMeta.from_job(rq_job) + exc_info = rq_job_meta.formatted_exception or str(rq_job.exc_info) rq_job.delete() return Response(exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR) else: diff --git a/cvat/apps/events/handlers.py b/cvat/apps/events/handlers.py index 69dd4b11cdd8..58c06e2d4752 100644 --- a/cvat/apps/events/handlers.py +++ b/cvat/apps/events/handlers.py @@ -22,7 +22,7 @@ Task, User, ) -from cvat.apps.engine.rq_job_handler import RQJobMetaField +from cvat.apps.engine.rq_job_handler import RQMeta from cvat.apps.engine.serializers import ( BasicUserSerializer, CloudStorageReadSerializer, @@ -105,11 +105,11 @@ def get_user(instance=None): # Try to get user from rq_job if isinstance(instance, rq.job.Job): - return instance.meta.get(RQJobMetaField.USER, None) + return RQMeta.from_job(instance).user else: rq_job = rq.get_current_job() if rq_job: - return rq_job.meta.get(RQJobMetaField.USER, None) + return RQMeta.from_job(rq_job).user if isinstance(instance, User): return instance @@ -123,11 +123,11 @@ def get_request(instance=None): return request if isinstance(instance, rq.job.Job): - return instance.meta.get(RQJobMetaField.REQUEST, None) + return RQMeta.from_job(instance).request else: rq_job = rq.get_current_job() if rq_job: - return rq_job.meta.get(RQJobMetaField.REQUEST, None) + return RQMeta.from_job(rq_job).request return None @@ -569,11 +569,12 @@ def handle_function_call( def handle_rq_exception(rq_job, exc_type, exc_value, tb): - oid = rq_job.meta.get(RQJobMetaField.ORG_ID, None) - oslug = rq_job.meta.get(RQJobMetaField.ORG_SLUG, None) - pid = rq_job.meta.get(RQJobMetaField.PROJECT_ID, None) - tid = rq_job.meta.get(RQJobMetaField.TASK_ID, None) - jid = rq_job.meta.get(RQJobMetaField.JOB_ID, None) + rq_job_meta = RQMeta.from_job(rq_job) + oid = rq_job_meta.org_id + oslug = rq_job_meta.org_slug + pid = rq_job_meta.project_id + tid = rq_job_meta.task_id + jid = rq_job_meta.job_id uid = user_id(rq_job) uname = user_name(rq_job) uemail = user_email(rq_job) diff --git a/cvat/apps/lambda_manager/views.py b/cvat/apps/lambda_manager/views.py index 2ea106662149..4863b8686955 100644 --- a/cvat/apps/lambda_manager/views.py +++ b/cvat/apps/lambda_manager/views.py @@ -34,6 +34,7 @@ from rest_framework.response import Response import cvat.apps.dataset_manager as dm +from cvat.apps.engine.rq_job_handler import RQMeta from cvat.apps.engine.frame_provider import TaskFrameProvider from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.models import ( @@ -45,9 +46,9 @@ SourceType, Task, ) -from cvat.apps.engine.rq_job_handler import RQId, RQJobMetaField +from cvat.apps.engine.rq_job_handler import RQId, RQMeta from cvat.apps.engine.serializers import LabeledDataSerializer -from cvat.apps.engine.utils import define_dependent_job, get_rq_job_meta, get_rq_lock_by_user +from cvat.apps.engine.utils import define_dependent_job, get_rq_lock_by_user from cvat.apps.events.handlers import handle_function_call from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS from cvat.apps.lambda_manager.models import FunctionKind @@ -615,17 +616,13 @@ def enqueue( user_id = request.user.id with get_rq_lock_by_user(queue, user_id): + meta = RQMeta.build_base(request=request, db_obj=Job.objects.get(pk=job) if job else Task.objects.get(pk=task)) + RQMeta.update_lambda_info(meta, function_id=lambda_func.id) + rq_job = queue.create_job( LambdaJob(None), job_id=rq_id, - meta={ - **get_rq_job_meta( - request, - db_obj=(Job.objects.get(pk=job) if job else Task.objects.get(pk=task)), - ), - RQJobMetaField.FUNCTION_ID: lambda_func.id, - "lambda": True, - }, + meta=meta, kwargs={ "function": lambda_func, "threshold": threshold, @@ -648,7 +645,7 @@ def enqueue( def fetch_job(self, pk): queue = self._get_queue() rq_job = queue.fetch_job(pk) - if rq_job is None or not rq_job.meta.get("lambda"): + if rq_job is None or not RQMeta.from_job(rq_job).lambda_: raise ValidationError( "{} lambda job is not found".format(pk), code=status.HTTP_404_NOT_FOUND ) @@ -891,10 +888,11 @@ def _map(sublabel_body): # progress is in [0, 1] range def _update_progress(progress): job = rq.get_current_job() + rq_job_meta = RQMeta.from_job(job) # If the job has been deleted, get_status will return None. Thus it will # exist the loop. - job.meta["progress"] = int(progress * 100) - job.save_meta() + rq_job_meta.progress = int(progress * 100) + rq_job_meta.save() return job.get_status() diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index f757aeabc61a..a02a9aa6a88a 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -51,7 +51,7 @@ User, ValidationMode, ) -from cvat.apps.engine.utils import define_dependent_job, get_rq_job_meta, get_rq_lock_by_user +from cvat.apps.engine.utils import define_dependent_job, get_rq_lock_by_user from cvat.apps.profiler import silk_profile from cvat.apps.quality_control import models from cvat.apps.quality_control.models import ( @@ -59,6 +59,7 @@ AnnotationConflictType, AnnotationType, ) +from cvat.apps.engine.rq_job_handler import RQMeta class _Serializable: @@ -2271,7 +2272,7 @@ def schedule_custom_quality_check_job( self._check_task_quality, task_id=task.id, job_id=rq_id, - meta=get_rq_job_meta(request=request, db_obj=task), + meta=RQMeta.build_base(request=request, db_obj=task), result_ttl=self._JOB_RESULT_TTL, failure_ttl=self._JOB_RESULT_TTL, depends_on=dependency, diff --git a/cvat/apps/quality_control/views.py b/cvat/apps/quality_control/views.py index c5e64d058646..8cb4afcd7e65 100644 --- a/cvat/apps/quality_control/views.py +++ b/cvat/apps/quality_control/views.py @@ -21,7 +21,7 @@ from cvat.apps.engine.mixins import PartialUpdateModelMixin from cvat.apps.engine.models import Task -from cvat.apps.engine.rq_job_handler import RQJobMetaField +from cvat.apps.engine.rq_job_handler import RQMeta from cvat.apps.engine.serializers import RqIdSerializer from cvat.apps.engine.utils import get_server_url from cvat.apps.quality_control import quality_reports as qc @@ -294,7 +294,7 @@ def create(self, request, *args, **kwargs): if ( not rq_job or not QualityReportPermission.create_scope_check_status( - request, job_owner_id=rq_job.meta[RQJobMetaField.USER]["id"] + request, job_owner_id=RQMeta.from_job(rq_job).user.id ) .check_access() .allow From e3d6da905eb2337dd965d06b071f4e9a9f33d624 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Fri, 7 Feb 2025 12:57:30 +0100 Subject: [PATCH 02/14] Remove outdated tests --- tests/python/rest_api/test_jobs.py | 49 +------ tests/python/rest_api/test_projects.py | 150 +++++-------------- tests/python/rest_api/test_requests.py | 23 +-- tests/python/rest_api/test_tasks.py | 81 +++-------- tests/python/rest_api/utils.py | 191 +++---------------------- 5 files changed, 91 insertions(+), 403 deletions(-) diff --git a/tests/python/rest_api/test_jobs.py b/tests/python/rest_api/test_jobs.py index e7b405dce9e9..0e5c438d03cf 100644 --- a/tests/python/rest_api/test_jobs.py +++ b/tests/python/rest_api/test_jobs.py @@ -1443,11 +1443,10 @@ def _test_export_dataset( username: str, jid: int, *, - api_version: Union[int, tuple[int]], local_download: bool = True, **kwargs, ) -> Optional[bytes]: - dataset = export_job_dataset(username, api_version, save_images=True, id=jid, **kwargs) + dataset = export_job_dataset(username, save_images=True, id=jid, **kwargs) if local_download: assert zipfile.is_zipfile(io.BytesIO(dataset)) else: @@ -1457,9 +1456,9 @@ def _test_export_dataset( @staticmethod def _test_export_annotations( - username: str, jid: int, *, api_version: int, local_download: bool = True, **kwargs + username: str, jid: int, *, local_download: bool = True, **kwargs ) -> Optional[bytes]: - dataset = export_job_dataset(username, api_version, save_images=False, id=jid, **kwargs) + dataset = export_job_dataset(username, save_images=False, id=jid, **kwargs) if local_download: assert zipfile.is_zipfile(io.BytesIO(dataset)) else: @@ -1467,34 +1466,7 @@ def _test_export_annotations( return dataset - @pytest.mark.parametrize("api_version", product((1, 2), repeat=2)) - @pytest.mark.parametrize( - "local_download", (True, pytest.param(False, marks=pytest.mark.with_external_services)) - ) - def test_can_export_dataset_locally_and_to_cloud_with_both_api_versions( - self, - admin_user: str, - jobs_with_shapes: list, - filter_tasks, - api_version: tuple[int], - local_download: bool, - ): - filter_ = "target_storage__location" - if local_download: - filter_ = "exclude_" + filter_ - - task_ids = [t["id"] for t in filter_tasks(**{filter_: "cloud_storage"})] - - job = next(j for j in jobs_with_shapes if j["task_id"] in task_ids) - self._test_export_dataset( - admin_user, - job["id"], - api_version=api_version, - local_download=local_download, - ) - - @pytest.mark.parametrize("api_version", (1, 2)) - def test_non_admin_can_export_dataset(self, users, jobs_with_shapes, api_version: int): + def test_non_admin_can_export_dataset(self, users, jobs_with_shapes): job, username = next( ( (job, self.tasks[job["task_id"]]["owner"]["username"]) @@ -1504,10 +1476,9 @@ def test_non_admin_can_export_dataset(self, users, jobs_with_shapes, api_version and self.tasks[job["task_id"]]["organization"] is None ) ) - self._test_export_dataset(username, job["id"], api_version=api_version) + self._test_export_dataset(username, job["id"]) - @pytest.mark.parametrize("api_version", (1, 2)) - def test_non_admin_can_export_annotations(self, users, jobs_with_shapes, api_version: int): + def test_non_admin_can_export_annotations(self, users, jobs_with_shapes): job, username = next( ( (job, self.tasks[job["task_id"]]["owner"]["username"]) @@ -1518,9 +1489,8 @@ def test_non_admin_can_export_annotations(self, users, jobs_with_shapes, api_ver ) ) - self._test_export_annotations(username, job["id"], api_version=api_version) + self._test_export_annotations(username, job["id"]) - @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.parametrize("username, jid", [("admin1", 14)]) @pytest.mark.parametrize( "anno_format, anno_file_name, check_func", @@ -1538,7 +1508,6 @@ def test_exported_job_dataset_structure( check_func, jobs, annotations, - api_version: int, ): job_data = jobs[jid] annotations_before = annotations["job"][str(jid)] @@ -1557,7 +1526,6 @@ def test_exported_job_dataset_structure( dataset = self._test_export_dataset( username, jid, - api_version=api_version, format=anno_format, ) @@ -1568,7 +1536,6 @@ def test_exported_job_dataset_structure( content = zip_file.read(anno_file_name) check_func(content, values_to_be_checked) - @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.parametrize("username", ["admin1"]) @pytest.mark.parametrize("jid", [25, 26]) @pytest.mark.parametrize( @@ -1592,7 +1559,6 @@ def test_export_job_among_several_jobs_in_task( check_func, jobs, annotations, - api_version: int, ): job_data = jobs[jid] annotations_before = annotations["job"][str(jid)] @@ -1611,7 +1577,6 @@ def test_export_job_among_several_jobs_in_task( dataset = self._test_export_dataset( username, jid, - api_version=api_version, format=anno_format, ) diff --git a/tests/python/rest_api/test_projects.py b/tests/python/rest_api/test_projects.py index 7785454c8839..0d2fb826a100 100644 --- a/tests/python/rest_api/test_projects.py +++ b/tests/python/rest_api/test_projects.py @@ -207,11 +207,10 @@ def _test_can_get_project_backup( username: str, pid: int, *, - api_version: int, local_download: bool = True, **kwargs, ) -> Optional[bytes]: - backup = export_project_backup(username, id=pid, api_version=api_version, **kwargs) + backup = export_project_backup(username, id=pid, **kwargs) if local_download: assert zipfile.is_zipfile(io.BytesIO(backup)) else: @@ -222,20 +221,17 @@ def _test_cannot_get_project_backup( self, username: str, pid: int, - api_version: int, **kwargs, ): with pytest.raises(ForbiddenException): - export_project_backup(username, api_version, id=pid, expect_forbidden=True, **kwargs) + export_project_backup(username, id=pid, expect_forbidden=True, **kwargs) - @pytest.mark.parametrize("api_version", (1, 2)) - def test_admin_can_get_project_backup(self, api_version: int): + def test_admin_can_get_project_backup(self): project = list(self.projects)[0] - self._test_can_get_project_backup("admin1", project["id"], api_version=api_version) + self._test_can_get_project_backup("admin1", project["id"]) # User that not in [project:owner, project:assignee] cannot get project backup. - @pytest.mark.parametrize("api_version", (1, 2)) - def test_user_cannot_get_project_backup(self, find_users, is_project_staff, api_version: int): + def test_user_cannot_get_project_backup(self, find_users, is_project_staff): users = find_users(exclude_privilege="admin") user, project = next( @@ -244,14 +240,11 @@ def test_user_cannot_get_project_backup(self, find_users, is_project_staff, api_ if not is_project_staff(user["id"], project["id"]) ) - self._test_cannot_get_project_backup( - user["username"], project["id"], api_version=api_version - ) + self._test_cannot_get_project_backup(user["username"], project["id"]) # Org worker that not in [project:owner, project:assignee] cannot get project backup. - @pytest.mark.parametrize("api_version", (1, 2)) def test_org_worker_cannot_get_project_backup( - self, find_users, is_project_staff, is_org_member, api_version: int + self, find_users, is_project_staff, is_org_member ): users = find_users(role="worker", exclude_privilege="admin") @@ -263,18 +256,14 @@ def test_org_worker_cannot_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_cannot_get_project_backup( - user["username"], project["id"], api_version=api_version - ) + self._test_cannot_get_project_backup(user["username"], project["id"]) # Org worker that in [project:owner, project:assignee] can get project backup. - @pytest.mark.parametrize("api_version", (1, 2)) def test_org_worker_can_get_project_backup( self, find_users, is_project_staff, is_org_member, - api_version: int, ): users = find_users(role="worker", exclude_privilege="admin") @@ -286,12 +275,11 @@ def test_org_worker_can_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_can_get_project_backup(user["username"], project["id"], api_version=api_version) + self._test_can_get_project_backup(user["username"], project["id"]) # Org supervisor that in [project:owner, project:assignee] can get project backup. - @pytest.mark.parametrize("api_version", (1, 2)) def test_org_supervisor_can_get_project_backup( - self, find_users, is_project_staff, is_org_member, api_version: int + self, find_users, is_project_staff, is_org_member ): users = find_users(role="supervisor", exclude_privilege="admin") @@ -303,16 +291,14 @@ def test_org_supervisor_can_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_can_get_project_backup(user["username"], project["id"], api_version=api_version) + self._test_can_get_project_backup(user["username"], project["id"]) # Org supervisor that not in [project:owner, project:assignee] cannot get project backup. - @pytest.mark.parametrize("api_version", (1, 2)) def test_org_supervisor_cannot_get_project_backup( self, find_users, is_project_staff, is_org_member, - api_version: int, ): users = find_users(exclude_privilege="admin") @@ -324,18 +310,14 @@ def test_org_supervisor_cannot_get_project_backup( and is_org_member(user["id"], project["organization"], role="supervisor") ) - self._test_cannot_get_project_backup( - user["username"], project["id"], api_version=api_version - ) + self._test_cannot_get_project_backup(user["username"], project["id"]) # Org maintainer that not in [project:owner, project:assignee] can get project backup. - @pytest.mark.parametrize("api_version", (1, 2)) def test_org_maintainer_can_get_project_backup( self, find_users, is_project_staff, is_org_member, - api_version: int, ): users = find_users(role="maintainer", exclude_privilege="admin") @@ -347,13 +329,10 @@ def test_org_maintainer_can_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_can_get_project_backup(user["username"], project["id"], api_version=api_version) + self._test_can_get_project_backup(user["username"], project["id"]) # Org owner that not in [project:owner, project:assignee] can get project backup. - @pytest.mark.parametrize("api_version", (1, 2)) - def test_org_owner_can_get_project_backup( - self, find_users, is_project_staff, is_org_member, api_version: int - ): + def test_org_owner_can_get_project_backup(self, find_users, is_project_staff, is_org_member): users = find_users(role="owner", exclude_privilege="admin") user, project = next( @@ -364,10 +343,9 @@ def test_org_owner_can_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_can_get_project_backup(user["username"], project["id"], api_version=api_version) + self._test_can_get_project_backup(user["username"], project["id"]) - @pytest.mark.parametrize("api_version", (1, 2)) - def test_can_get_backup_project_when_some_tasks_have_no_data(self, api_version: int): + def test_can_get_backup_project_when_some_tasks_have_no_data(self): project = next((p for p in self.projects if 0 < p["tasks"]["count"])) # add empty task to project @@ -376,12 +354,9 @@ def test_can_get_backup_project_when_some_tasks_have_no_data(self, api_version: ) assert response.status_code == HTTPStatus.CREATED - self._test_can_get_project_backup("admin1", project["id"], api_version=api_version) + self._test_can_get_project_backup("admin1", project["id"]) - @pytest.mark.parametrize("api_version", (1, 2)) - def test_can_get_backup_project_when_all_tasks_have_no_data( - self, api_version: int, filter_projects - ): + def test_can_get_backup_project_when_all_tasks_have_no_data(self, filter_projects): project = filter_projects(tasks__count=0)[0] # add empty tasks to empty project @@ -401,19 +376,15 @@ def test_can_get_backup_project_when_all_tasks_have_no_data( ) assert response.status_code == HTTPStatus.CREATED, response.text - self._test_can_get_project_backup("admin1", project["id"], api_version=api_version) + self._test_can_get_project_backup("admin1", project["id"]) - @pytest.mark.parametrize("api_version", (1, 2)) - def test_can_get_backup_for_empty_project(self, api_version: int): + def test_can_get_backup_for_empty_project(self): empty_project = next((p for p in self.projects if 0 == p["tasks"]["count"])) - self._test_can_get_project_backup("admin1", empty_project["id"], api_version=api_version) + self._test_can_get_project_backup("admin1", empty_project["id"]) - @pytest.mark.parametrize("api_version", (1, 2)) - def test_admin_can_get_project_backup_and_create_project_by_backup( - self, admin_user: str, api_version: int - ): + def test_admin_can_get_project_backup_and_create_project_by_backup(self, admin_user: str): project_id = 5 - backup = self._test_can_get_project_backup(admin_user, project_id, api_version=api_version) + backup = self._test_can_get_project_backup(admin_user, project_id) tmp_file = io.BytesIO(backup) tmp_file.name = "dataset.zip" @@ -631,11 +602,10 @@ def _test_export_dataset( username: str, pid: int, *, - api_version: Union[int, tuple[int]], local_download: bool = True, **kwargs, ) -> Optional[bytes]: - dataset = export_project_dataset(username, api_version, save_images=True, id=pid, **kwargs) + dataset = export_project_dataset(username, save_images=True, id=pid, **kwargs) if local_download: assert zipfile.is_zipfile(io.BytesIO(dataset)) else: @@ -645,9 +615,9 @@ def _test_export_dataset( @staticmethod def _test_export_annotations( - username: str, pid: int, *, api_version: int, local_download: bool = True, **kwargs + username: str, pid: int, *, local_download: bool = True, **kwargs ) -> Optional[bytes]: - dataset = export_project_dataset(username, api_version, save_images=False, id=pid, **kwargs) + dataset = export_project_dataset(username, save_images=False, id=pid, **kwargs) if local_download: assert zipfile.is_zipfile(io.BytesIO(dataset)) else: @@ -695,7 +665,6 @@ def test_can_import_dataset_in_org(self, admin_user: str): dataset = self._test_export_dataset( admin_user, project_id, - api_version=2, ) tmp_file = io.BytesIO(dataset) @@ -738,7 +707,6 @@ def test_can_export_and_import_dataset_with_skeletons( dataset = self._test_export_dataset( admin_user, project_id, - api_version=2, format=export_format, ) @@ -750,9 +718,8 @@ def test_can_export_and_import_dataset_with_skeletons( self._test_import_project(admin_user, project_id, import_format, import_data) - @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.parametrize("format_name", ("Datumaro 1.0", "ImageNet 1.0", "PASCAL VOC 1.1")) - def test_can_import_export_dataset_with_some_format(self, format_name: str, api_version: int): + def test_can_import_export_dataset_with_some_format(self, format_name: str): # https://github.com/cvat-ai/cvat/issues/4410 # https://github.com/cvat-ai/cvat/issues/4850 # https://github.com/cvat-ai/cvat/issues/4621 @@ -762,7 +729,6 @@ def test_can_import_export_dataset_with_some_format(self, format_name: str, api_ dataset = self._test_export_dataset( username, project_id, - api_version=api_version, format=format_name, ) @@ -775,27 +741,6 @@ def test_can_import_export_dataset_with_some_format(self, format_name: str, api_ self._test_import_project(username, project_id, format_name, import_data) - @pytest.mark.parametrize("api_version", product((1, 2), repeat=2)) - @pytest.mark.parametrize( - "local_download", (True, pytest.param(False, marks=pytest.mark.with_external_services)) - ) - def test_can_export_dataset_locally_and_to_cloud_with_both_api_versions( - self, admin_user: str, filter_projects, api_version: tuple[int], local_download: bool - ): - filter_ = "target_storage__location" - if local_download: - filter_ = "exclude_" + filter_ - - pid = filter_projects(**{filter_: "cloud_storage"})[0]["id"] - - self._test_export_dataset( - admin_user, - pid, - api_version=api_version, - local_download=local_download, - ) - - @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.parametrize("username, pid", [("admin1", 8)]) @pytest.mark.parametrize( "anno_format, anno_file_name, check_func", @@ -815,7 +760,6 @@ def test_exported_project_dataset_structure( anno_file_name, check_func, tasks, - api_version: int, ): project = self.projects[pid] @@ -837,7 +781,6 @@ def test_exported_project_dataset_structure( dataset = self._test_export_annotations( username, pid, - api_version=api_version, format=anno_format, ) @@ -845,8 +788,7 @@ def test_exported_project_dataset_structure( content = zip_file.read(anno_file_name) check_func(content, values_to_be_checked) - @pytest.mark.parametrize("api_version", (1, 2)) - def test_can_import_export_annotations_with_rotation(self, api_version: int): + def test_can_import_export_annotations_with_rotation(self): # https://github.com/cvat-ai/cvat/issues/4378 username = "admin1" project_id = 4 @@ -854,7 +796,6 @@ def test_can_import_export_annotations_with_rotation(self, api_version: int): dataset = self._test_export_dataset( username, project_id, - api_version=api_version, ) tmp_file = io.BytesIO(dataset) @@ -877,8 +818,7 @@ def test_can_import_export_annotations_with_rotation(self, api_version: int): assert task1_rotation == task2_rotation - @pytest.mark.parametrize("api_version", (1, 2)) - def test_can_export_dataset_with_skeleton_labels_with_spaces(self, api_version: int): + def test_can_export_dataset_with_skeleton_labels_with_spaces(self): # https://github.com/cvat-ai/cvat/issues/5257 # https://github.com/cvat-ai/cvat/issues/5600 username = "admin1" @@ -887,26 +827,20 @@ def test_can_export_dataset_with_skeleton_labels_with_spaces(self, api_version: self._test_export_dataset( username, project_id, - api_version=api_version, format="COCO Keypoints 1.0", ) - @pytest.mark.parametrize("api_version", (1, 2)) - def test_can_export_dataset_for_empty_project(self, filter_projects, api_version: int): + def test_can_export_dataset_for_empty_project(self, filter_projects): empty_project = filter_projects( tasks__count=0, exclude_target_storage__location="cloud_storage" )[0] self._test_export_dataset( "admin1", empty_project["id"], - api_version=api_version, format="COCO 1.0", ) - @pytest.mark.parametrize("api_version", (1, 2)) - def test_can_export_project_dataset_when_some_tasks_have_no_data( - self, filter_projects, api_version: int - ): + def test_can_export_project_dataset_when_some_tasks_have_no_data(self, filter_projects): project = filter_projects( exclude_tasks__count=0, exclude_target_storage__location="cloud_storage" )[0] @@ -926,14 +860,10 @@ def test_can_export_project_dataset_when_some_tasks_have_no_data( self._test_export_dataset( "admin1", project["id"], - api_version=api_version, format="COCO 1.0", ) - @pytest.mark.parametrize("api_version", (1, 2)) - def test_can_export_project_dataset_when_all_tasks_have_no_data( - self, filter_projects, api_version: int - ): + def test_can_export_project_dataset_when_all_tasks_have_no_data(self, filter_projects): project = filter_projects(tasks__count=0, exclude_target_storage__location="cloud_storage")[ 0 ] @@ -958,14 +888,12 @@ def test_can_export_project_dataset_when_all_tasks_have_no_data( self._test_export_dataset( "admin1", project["id"], - api_version=api_version, format="COCO 1.0", ) - @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.parametrize("cloud_storage_id", [3]) # import/export bucket def test_can_export_and_import_dataset_after_deleting_related_storage( - self, admin_user, cloud_storage_id: int, api_version: int + self, admin_user, cloud_storage_id: int ): project_id = next( p @@ -983,7 +911,7 @@ def test_can_export_and_import_dataset_after_deleting_related_storage( result, response = api_client.projects_api.retrieve(project_id) assert all([not getattr(result, field) for field in ("source_storage", "target_storage")]) - dataset = self._test_export_dataset(admin_user, project_id, api_version=api_version) + dataset = self._test_export_dataset(admin_user, project_id) with io.BytesIO(dataset) as tmp_file: tmp_file.name = "dataset.zip" @@ -1010,7 +938,6 @@ def _export_task(task_id: int, format_name: str) -> io.BytesIO: return io.BytesIO( export_dataset( api_client.tasks_api, - api_version=2, id=task_id, format=format_name, save_images=False, @@ -1084,9 +1011,8 @@ def _export_task(task_id: int, format_name: str) -> io.BytesIO: ("Ultralytics YOLO Pose 1.0", "images/{subset}/"), ], ) - @pytest.mark.parametrize("api_version", (1, 2)) def test_creates_subfolders_for_subsets_on_export( - self, filter_tasks, admin_user, export_format, subset_path_template, api_version: int + self, filter_tasks, admin_user, export_format, subset_path_template ): group_key_func = itemgetter("project_id") subsets = ["Train", "Validation"] @@ -1099,9 +1025,7 @@ def test_creates_subfolders_for_subsets_on_export( ) if sorted(task["subset"] for task in group) == subsets ) - dataset = self._test_export_dataset( - admin_user, project_id, api_version=api_version, format=export_format - ) + dataset = self._test_export_dataset(admin_user, project_id, format=export_format) with zipfile.ZipFile(io.BytesIO(dataset)) as zip_file: for subset in subsets: folder_prefix = subset_path_template.format(subset=subset) @@ -1142,7 +1066,7 @@ def test_export_project_with_honeypots(self, admin_user: str): create_task(admin_user, spec=task_params, data=data_params) dataset = export_project_dataset( - admin_user, api_version=2, save_images=True, id=project.id, format="COCO 1.0" + admin_user, save_images=True, id=project.id, format="COCO 1.0" ) with zipfile.ZipFile(io.BytesIO(dataset)) as zip_file: diff --git a/tests/python/rest_api/test_requests.py b/tests/python/rest_api/test_requests.py index a6f808f73056..1ccaead42034 100644 --- a/tests/python/rest_api/test_requests.py +++ b/tests/python/rest_api/test_requests.py @@ -153,7 +153,7 @@ def download_file(resource: str, rid: int, subresource: str): ), }[(resource, subresource)] - data = func(self.user, api_version=2, id=rid, download_result=True) + data = func(self.user, id=rid, download_result=True) assert data, f"Failed to download {resource} {subresource} locally" return data @@ -163,15 +163,12 @@ def download_file(resource: str, rid: int, subresource: str): def fxt_make_export_project_requests(self): def make_requests(project_ids: list[int]): for project_id in project_ids: - export_project_backup( - self.user, api_version=2, id=project_id, download_result=False - ) + export_project_backup(self.user, id=project_id, download_result=False) export_project_dataset( - self.user, api_version=2, save_images=True, id=project_id, download_result=False + self.user, save_images=True, id=project_id, download_result=False ) export_project_dataset( self.user, - api_version=2, save_images=False, id=project_id, download_result=False, @@ -183,13 +180,9 @@ def make_requests(project_ids: list[int]): def fxt_make_export_task_requests(self): def make_requests(task_ids: list[int]): for task_id in task_ids: - export_task_backup(self.user, api_version=2, id=task_id, download_result=False) - export_task_dataset( - self.user, api_version=2, save_images=True, id=task_id, download_result=False - ) - export_task_dataset( - self.user, api_version=2, save_images=False, id=task_id, download_result=False - ) + export_task_backup(self.user, id=task_id, download_result=False) + export_task_dataset(self.user, save_images=True, id=task_id, download_result=False) + export_task_dataset(self.user, save_images=False, id=task_id, download_result=False) return make_requests @@ -199,7 +192,6 @@ def make_requests(job_ids: list[int]): for job_id in job_ids: export_job_dataset( self.user, - api_version=2, save_images=True, id=job_id, format="COCO 1.0", @@ -207,7 +199,6 @@ def make_requests(job_ids: list[int]): ) export_job_dataset( self.user, - api_version=2, save_images=False, id=job_id, format="YOLO 1.1", @@ -284,7 +275,6 @@ def test_owner_can_retrieve_request(self, format_name: str, save_images: bool, p subresource = "dataset" if save_images else "annotations" export_project_dataset( owner["username"], - api_version=2, save_images=save_images, id=project["id"], download_result=False, @@ -326,7 +316,6 @@ def test_non_owner_cannot_retrieve_request(self, find_users, projects, format_na export_project_dataset( owner["username"], - api_version=2, save_images=True, id=project["id"], download_result=False, diff --git a/tests/python/rest_api/test_tasks.py b/tests/python/rest_api/test_tasks.py index e226935d82d0..90ef178bfe3b 100644 --- a/tests/python/rest_api/test_tasks.py +++ b/tests/python/rest_api/test_tasks.py @@ -742,11 +742,10 @@ def _test_can_export_dataset( username: str, task_id: int, *, - api_version: Union[int, tuple[int]], local_download: bool = True, **kwargs, ) -> Optional[bytes]: - dataset = export_task_dataset(username, api_version, save_images=True, id=task_id, **kwargs) + dataset = export_task_dataset(username, save_images=True, id=task_id, **kwargs) if local_download: assert zipfile.is_zipfile(io.BytesIO(dataset)) else: @@ -754,50 +753,28 @@ def _test_can_export_dataset( return dataset - @pytest.mark.usefixtures("restore_db_per_function") - @pytest.mark.parametrize("api_version", product((1, 2), repeat=2)) - @pytest.mark.parametrize( - "local_download", (True, pytest.param(False, marks=pytest.mark.with_external_services)) - ) - def test_can_export_task_dataset_locally_and_to_cloud_with_both_api_versions( - self, - admin_user, - tasks_with_shapes, - filter_tasks, - api_version: tuple[int], - local_download: bool, - ): - filter_ = "target_storage__location" - if local_download: - filter_ = "exclude_" + filter_ - filtered_ids = {t["id"] for t in filter_tasks(**{filter_: "cloud_storage"})} - - task_id = next(iter(filtered_ids & {t["id"] for t in tasks_with_shapes})) - self._test_can_export_dataset( - admin_user, - task_id, - api_version=api_version, - local_download=local_download, - ) - - @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.parametrize("tid", [21]) @pytest.mark.parametrize( "format_name", ["CVAT for images 1.1", "CVAT for video 1.1", "COCO Keypoints 1.0"] ) def test_can_export_task_with_several_jobs( - self, admin_user, tid, format_name, api_version: int + self, + admin_user, + tid, + format_name, ): self._test_can_export_dataset( admin_user, tid, format=format_name, - api_version=api_version, ) - @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.parametrize("tid", [8]) - def test_can_export_task_to_coco_format(self, admin_user: str, tid: int, api_version: int): + def test_can_export_task_to_coco_format( + self, + admin_user: str, + tid: int, + ): # these annotations contains incorrect frame numbers # in order to check that server handle such cases annotations = { @@ -886,7 +863,6 @@ def test_can_export_task_to_coco_format(self, admin_user: str, tid: int, api_ver admin_user, tid, format="COCO Keypoints 1.0", - api_version=api_version, ) # check that server saved track annotations correctly @@ -898,10 +874,12 @@ def test_can_export_task_to_coco_format(self, admin_user: str, tid: int, api_ver assert annotations["tracks"][0]["shapes"][0]["frame"] == 0 assert annotations["tracks"][0]["elements"][0]["shapes"][0]["frame"] == 0 - @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.usefixtures("restore_db_per_function") @pytest.mark.usefixtures("restore_redis_ondisk_per_function") - def test_can_download_task_with_special_chars_in_name(self, admin_user: str, api_version: int): + def test_can_download_task_with_special_chars_in_name( + self, + admin_user: str, + ): # Control characters in filenames may conflict with the Content-Disposition header # value restrictions, as it needs to include the downloaded file name. @@ -917,13 +895,14 @@ def test_can_download_task_with_special_chars_in_name(self, admin_user: str, api task_id, _ = create_task(admin_user, task_spec, task_data) - dataset = self._test_can_export_dataset(admin_user, task_id, api_version=api_version) + dataset = self._test_can_export_dataset(admin_user, task_id) assert zipfile.is_zipfile(io.BytesIO(dataset)) @pytest.mark.usefixtures("restore_db_per_function") - @pytest.mark.parametrize("api_version", (1, 2)) def test_export_dataset_after_deleting_related_cloud_storage( - self, admin_user: str, tasks, api_version: int + self, + admin_user: str, + tasks, ): related_field = "target_storage" @@ -940,7 +919,7 @@ def test_export_dataset_after_deleting_related_cloud_storage( result, response = api_client.tasks_api.retrieve(task_id) assert not result[related_field] - self._test_can_export_dataset(admin_user, task["id"], api_version=api_version) + self._test_can_export_dataset(admin_user, task["id"]) @pytest.mark.parametrize( "export_format, default_subset_name, subset_path_template", @@ -950,7 +929,6 @@ def test_export_dataset_after_deleting_related_cloud_storage( ("Ultralytics YOLO Detection 1.0", "train", "images/{subset}"), ], ) - @pytest.mark.parametrize("api_version", (1, 2)) def test_uses_subset_name( self, admin_user, @@ -958,7 +936,6 @@ def test_uses_subset_name( export_format, default_subset_name, subset_path_template, - api_version: int, ): tasks = filter_tasks(exclude_target_storage__location="cloud_storage") group_key_func = itemgetter("subset") @@ -973,7 +950,6 @@ def test_uses_subset_name( dataset = self._test_can_export_dataset( admin_user, task["id"], - api_version=api_version, format=export_format, ) with zipfile.ZipFile(io.BytesIO(dataset)) as zip_file: @@ -996,7 +972,6 @@ def test_datumaro_export_without_annotations_includes_image_info( dataset_file = io.BytesIO( export_dataset( api_client.tasks_api, - api_version=2, id=task["id"], format=DATUMARO_FORMAT_FOR_DIMENSION[dimension], save_images=False, @@ -3907,23 +3882,6 @@ def setup( with make_sdk_client(self.user) as client: self.client = client - @pytest.mark.parametrize("api_version", product((1, 2), repeat=2)) - @pytest.mark.parametrize( - "local_download", (True, pytest.param(False, marks=pytest.mark.with_external_services)) - ) - def test_can_export_backup_with_both_api_versions( - self, filter_tasks, api_version: tuple[int], local_download: bool - ): - task = filter_tasks( - **{("exclude_" if local_download else "") + "target_storage__location": "cloud_storage"} - )[0] - backup = export_task_backup(self.user, api_version, id=task["id"]) - - if local_download: - assert zipfile.is_zipfile(io.BytesIO(backup)) - else: - assert backup is None - @pytest.mark.parametrize("mode", ["annotation", "interpolation"]) def test_can_export_backup(self, tasks, mode): task_id = next(t for t in tasks if t["mode"] == mode and not t["validation_mode"])["id"] @@ -5372,7 +5330,6 @@ def test_can_import_datumaro_json(self, admin_user, tasks, dimension): dataset_archive = io.BytesIO( export_dataset( api_client.tasks_api, - api_version=2, id=task["id"], format=DATUMARO_FORMAT_FOR_DIMENSION[dimension], save_images=False, diff --git a/tests/python/rest_api/utils.py b/tests/python/rest_api/utils.py index 8d5032998358..76fd86bf80d5 100644 --- a/tests/python/rest_api/utils.py +++ b/tests/python/rest_api/utils.py @@ -41,76 +41,6 @@ def initialize_export(endpoint: Endpoint, *, expect_forbidden: bool = False, **k return rq_id -def wait_and_download_v1( - endpoint: Endpoint, - *, - max_retries: int = 50, - interval: float = 0.1, - download_result: bool = True, - **kwargs, -) -> Optional[bytes]: - for _ in range(max_retries): - (_, response) = endpoint.call_with_http_info(**kwargs, _parse_response=False) - if response.status in (HTTPStatus.CREATED, HTTPStatus.OK): - break - assert response.status == HTTPStatus.ACCEPTED - sleep(interval) - else: - assert ( - False - ), f"Export process was not finished within allowed time ({interval * max_retries}, sec)" - - if not download_result: - return None - - if response.status == HTTPStatus.CREATED: - (_, response) = endpoint.call_with_http_info( - **kwargs, action="download", _parse_response=False - ) - assert response.status == HTTPStatus.OK - - return response.data or None # return None when export was on cloud storage - - -def export_v1( - endpoint: Endpoint, - *, - max_retries: int = 50, - interval: float = 0.1, - expect_forbidden: bool = False, - wait_result: bool = True, - download_result: bool = True, - **kwargs, -) -> Optional[bytes]: - """Export datasets|annotations|backups using first version of export API - - Args: - endpoint (Endpoint): Export endpoint, will be called to initialize export process and to check status - max_retries (int, optional): Number of retries when checking process status. Defaults to 30. - interval (float, optional): Interval in seconds between retries. Defaults to 0.1. - expect_forbidden (bool, optional): Should export request be forbidden or not. Defaults to False. - wait_result (bool, optional): Wait until export process will be finished. Defaults to True. - download_result (bool, optional): Download exported file. Defaults to True. - - Returns: - bytes: The content of the file if downloaded locally. - None: If `wait_result` or `download_result` were False or the file is downloaded to cloud storage. - """ - # initialize background process and ensure that the first request returns 403 code if request should be forbidden - initialize_export(endpoint, expect_forbidden=expect_forbidden, **kwargs) - - if not wait_result: - return None - - return wait_and_download_v1( - endpoint, - max_retries=max_retries, - interval=interval, - download_result=download_result, - **kwargs, - ) - - def wait_and_download_v2( api_client: ApiClient, rq_id: str, @@ -143,7 +73,7 @@ def wait_and_download_v2( background_request.result_url, auth=(api_client.configuration.username, api_client.configuration.password), ) - assert response.status_code == HTTPStatus.OK + assert response.status_code == HTTPStatus.OK, f"Status: {response.status_code}" return response.content @@ -191,9 +121,6 @@ def export_v2( def export_dataset( api: Union[ProjectsApi, TasksApi, JobsApi], - api_version: Union[ - int, tuple[int] - ], # make this parameter required to be sure that all tests was updated and both API versions are used *, save_images: bool, max_retries: int = 50, @@ -201,126 +128,52 @@ def export_dataset( format: str = "CVAT for images 1.1", # pylint: disable=redefined-builtin **kwargs, ) -> Optional[bytes]: - def _get_endpoint_and_kwargs(version: int) -> Endpoint: - extra_kwargs = { - "format": format, - } - if version == 1: - endpoint = ( - api.retrieve_dataset_endpoint if save_images else api.retrieve_annotations_endpoint - ) - else: - endpoint = api.create_dataset_export_endpoint - extra_kwargs["save_images"] = save_images - return endpoint, extra_kwargs - - if api_version == 1: - endpoint, extra_kwargs = _get_endpoint_and_kwargs(api_version) - return export_v1( - endpoint, - max_retries=max_retries, - interval=interval, - **kwargs, - **extra_kwargs, - ) - elif api_version == 2: - endpoint, extra_kwargs = _get_endpoint_and_kwargs(api_version) - return export_v2( - endpoint, - max_retries=max_retries, - interval=interval, - **kwargs, - **extra_kwargs, - ) - elif isinstance(api_version, tuple): - assert len(api_version) == 2, "Expected 2 elements in api_version tuple" - initialize_endpoint, extra_kwargs = _get_endpoint_and_kwargs(api_version[0]) - rq_id = initialize_export(initialize_endpoint, **kwargs, **extra_kwargs) - - if api_version[1] == 1: - endpoint, extra_kwargs = _get_endpoint_and_kwargs(api_version[1]) - return wait_and_download_v1( - endpoint, max_retries=max_retries, interval=interval, **kwargs, **extra_kwargs - ) - else: - return wait_and_download_v2( - api.api_client, rq_id, max_retries=max_retries, interval=interval - ) - - assert False, "Unsupported API version" + return export_v2( + api.create_dataset_export_endpoint, + max_retries=max_retries, + interval=interval, + save_images=save_images, + format=format, + **kwargs, + ) # FUTURE-TODO: support username: optional, api_client: optional -def export_project_dataset( - username: str, api_version: Union[int, tuple[int]], *args, **kwargs -) -> Optional[bytes]: +# tODO: make func signature more userfrendly +def export_project_dataset(username: str, *args, **kwargs) -> Optional[bytes]: with make_api_client(username) as api_client: - return export_dataset(api_client.projects_api, api_version, *args, **kwargs) + return export_dataset(api_client.projects_api, *args, **kwargs) -def export_task_dataset( - username: str, api_version: Union[int, tuple[int]], *args, **kwargs -) -> Optional[bytes]: +def export_task_dataset(username: str, *args, **kwargs) -> Optional[bytes]: with make_api_client(username) as api_client: - return export_dataset(api_client.tasks_api, api_version, *args, **kwargs) + return export_dataset(api_client.tasks_api, *args, **kwargs) -def export_job_dataset( - username: str, api_version: Union[int, tuple[int]], *args, **kwargs -) -> Optional[bytes]: +def export_job_dataset(username: str, *args, **kwargs) -> Optional[bytes]: with make_api_client(username) as api_client: - return export_dataset(api_client.jobs_api, api_version, *args, **kwargs) + return export_dataset(api_client.jobs_api, *args, **kwargs) def export_backup( api: Union[ProjectsApi, TasksApi], - api_version: Union[ - int, tuple[int] - ], # make this parameter required to be sure that all tests was updated and both API versions are used *, max_retries: int = 50, interval: float = 0.1, **kwargs, ) -> Optional[bytes]: - if api_version == 1: - endpoint = api.retrieve_backup_endpoint - return export_v1(endpoint, max_retries=max_retries, interval=interval, **kwargs) - elif api_version == 2: - endpoint = api.create_backup_export_endpoint - return export_v2(endpoint, max_retries=max_retries, interval=interval, **kwargs) - elif isinstance(api_version, tuple): - assert len(api_version) == 2, "Expected 2 elements in api_version tuple" - initialize_endpoint = ( - api.retrieve_backup_endpoint - if api_version[0] == 1 - else api.create_backup_export_endpoint - ) - rq_id = initialize_export(initialize_endpoint, **kwargs) + endpoint = api.create_backup_export_endpoint + return export_v2(endpoint, max_retries=max_retries, interval=interval, **kwargs) - if api_version[1] == 1: - return wait_and_download_v1( - api.retrieve_backup_endpoint, max_retries=max_retries, interval=interval, **kwargs - ) - else: - return wait_and_download_v2( - api.api_client, rq_id, max_retries=max_retries, interval=interval - ) - assert False, "Unsupported API version" - - -def export_project_backup( - username: str, api_version: Union[int, tuple[int]], *args, **kwargs -) -> Optional[bytes]: +def export_project_backup(username: str, *args, **kwargs) -> Optional[bytes]: with make_api_client(username) as api_client: - return export_backup(api_client.projects_api, api_version, *args, **kwargs) + return export_backup(api_client.projects_api, *args, **kwargs) -def export_task_backup( - username: str, api_version: Union[int, tuple[int]], *args, **kwargs -) -> Optional[bytes]: +def export_task_backup(username: str, *args, **kwargs) -> Optional[bytes]: with make_api_client(username) as api_client: - return export_backup(api_client.tasks_api, api_version, *args, **kwargs) + return export_backup(api_client.tasks_api, *args, **kwargs) def import_resource( From 0f71c087806c0d9760c12bb1fdde73391b18431f Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Fri, 7 Feb 2025 20:16:04 +0100 Subject: [PATCH 03/14] Rename method --- cvat/apps/engine/mixins.py | 4 ++-- cvat/apps/engine/permissions.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index ad7b901f6166..7a881a9629be 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -446,7 +446,7 @@ class DatasetMixin: }, ) @action(detail=True, methods=['POST'], serializer_class=None, url_path='dataset/export') - def initialize_dataset_export(self, request: PatchedRequest, pk: int): + def initiate_dataset_export(self, request: PatchedRequest, pk: int): self._object = self.get_object() # force call of check_object_permissions() save_images = is_dataset_export(request) @@ -542,7 +542,7 @@ def import_backup_v1(self, request: PatchedRequest, import_func: Callable) -> Re }, ) @action(detail=True, methods=['POST'], serializer_class=None, url_path='backup/export') - def initialize_backup_export(self, request: PatchedRequest, pk: int): + def initiate_backup_export(self, request: PatchedRequest, pk: int): db_object = self.get_object() # force to call check_object_permissions export_manager = BackupExportManager(db_object, request) export_manager.initialize_export_args() diff --git a/cvat/apps/engine/permissions.py b/cvat/apps/engine/permissions.py index 448d9b82cd1e..056aad044ac3 100644 --- a/cvat/apps/engine/permissions.py +++ b/cvat/apps/engine/permissions.py @@ -312,8 +312,8 @@ def get_scopes(request, view, obj): ('dataset', 'POST'): Scopes.IMPORT_DATASET, ('append_dataset_chunk', 'HEAD'): Scopes.IMPORT_DATASET, ('append_dataset_chunk', 'PATCH'): Scopes.IMPORT_DATASET, - ('initialize_dataset_export', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, - ('initialize_backup_export', 'POST'): Scopes.EXPORT_BACKUP, + ('initiate_dataset_export', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, + ('initiate_backup_export', 'POST'): Scopes.EXPORT_BACKUP, ('import_backup', 'POST'): Scopes.IMPORT_BACKUP, ('append_backup_chunk', 'PATCH'): Scopes.IMPORT_BACKUP, ('append_backup_chunk', 'HEAD'): Scopes.IMPORT_BACKUP, @@ -541,7 +541,7 @@ def get_scopes(request, view, obj) -> list[Scopes]: ('annotations', 'POST'): Scopes.IMPORT_ANNOTATIONS, ('append_annotations_chunk', 'PATCH'): Scopes.UPDATE_ANNOTATIONS, ('append_annotations_chunk', 'HEAD'): Scopes.UPDATE_ANNOTATIONS, - ('initialize_dataset_export', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, + ('initiate_dataset_export', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, ('metadata', 'GET'): Scopes.VIEW_METADATA, ('metadata', 'PATCH'): Scopes.UPDATE_METADATA, ('data', 'GET'): Scopes.VIEW_DATA, @@ -552,7 +552,7 @@ def get_scopes(request, view, obj) -> list[Scopes]: ('import_backup', 'POST'): Scopes.IMPORT_BACKUP, ('append_backup_chunk', 'PATCH'): Scopes.IMPORT_BACKUP, ('append_backup_chunk', 'HEAD'): Scopes.IMPORT_BACKUP, - ('initialize_backup_export', 'POST'): Scopes.EXPORT_BACKUP, + ('initiate_backup_export', 'POST'): Scopes.EXPORT_BACKUP, ('preview', 'GET'): Scopes.VIEW, ('validation_layout', 'GET'): Scopes.VIEW_VALIDATION_LAYOUT, ('validation_layout', 'PATCH'): Scopes.UPDATE_VALIDATION_LAYOUT, @@ -787,7 +787,7 @@ def get_scopes(request, view, obj): ('metadata','GET'): Scopes.VIEW_METADATA, ('metadata','PATCH'): Scopes.UPDATE_METADATA, ('issues', 'GET'): Scopes.VIEW, - ('initialize_dataset_export', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, + ('initiate_dataset_export', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, ('preview', 'GET'): Scopes.VIEW, ('validation_layout', 'GET'): Scopes.VIEW_VALIDATION_LAYOUT, ('validation_layout', 'PATCH'): Scopes.UPDATE_VALIDATION_LAYOUT, From a22e10fb676e954c815c32d0bbeb7b122b6b4d27 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Sun, 9 Feb 2025 17:59:24 +0100 Subject: [PATCH 04/14] Split RQMeta into several classes && small fixes --- cvat/apps/dataset_manager/bindings.py | 4 +- cvat/apps/dataset_manager/project.py | 4 +- cvat/apps/dataset_manager/views.py | 4 +- cvat/apps/engine/background.py | 43 ++-- cvat/apps/engine/backup.py | 16 +- cvat/apps/engine/cache.py | 4 +- cvat/apps/engine/rq_job_handler.py | 235 +++++++++++++------ cvat/apps/engine/serializers.py | 42 ++-- cvat/apps/engine/task.py | 63 ++--- cvat/apps/engine/utils.py | 41 ---- cvat/apps/engine/views.py | 24 +- cvat/apps/events/export.py | 4 +- cvat/apps/events/handlers.py | 28 ++- cvat/apps/lambda_manager/views.py | 10 +- cvat/apps/quality_control/quality_reports.py | 4 +- cvat/apps/quality_control/views.py | 4 +- 16 files changed, 295 insertions(+), 235 deletions(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index b039c3bc463d..e3757b6e33e5 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -44,7 +44,7 @@ ShapeType, Task, ) -from cvat.apps.engine.rq_job_handler import RQMeta +from cvat.apps.engine.rq_job_handler import ImportRQMeta from ..engine.log import ServerLogManager from .annotation import AnnotationIR, AnnotationManager, TrackManager @@ -2446,7 +2446,7 @@ def load_dataset_data(project_annotation, dataset: dm.Dataset, project_data): raise CvatImportError(f'Target project does not have label with name "{label.name}"') for subset_id, subset in enumerate(dataset.subsets().values()): job = rq.get_current_job() - job_meta = RQMeta.from_job(job) + job_meta = ImportRQMeta.from_job(job) job_meta.status = 'Task from dataset is being created...' job_meta.progress = (subset_id + job_meta.task_progress or 0.) / len(dataset.subsets().keys()) job_meta.save() diff --git a/cvat/apps/dataset_manager/project.py b/cvat/apps/dataset_manager/project.py index c7bd23d9020c..53cd7df30cc8 100644 --- a/cvat/apps/dataset_manager/project.py +++ b/cvat/apps/dataset_manager/project.py @@ -17,6 +17,7 @@ from cvat.apps.dataset_manager.util import TmpDirManager from cvat.apps.engine import models from cvat.apps.engine.log import DatasetLogManager +from cvat.apps.engine.rq_job_handler import ImportRQMeta from cvat.apps.engine.serializers import DataSerializer, TaskWriteSerializer from cvat.apps.engine.task import _create_thread as create_task @@ -196,8 +197,7 @@ def data(self) -> dict: @transaction.atomic def import_dataset_as_project(src_file, project_id, format_name, conv_mask_to_poly): rq_job = rq.get_current_job() - from cvat.apps.engine.rq_job_handler import RQMeta - rq_job_meta = RQMeta.from_job(rq_job) + rq_job_meta = ImportRQMeta.from_job(rq_job) rq_job_meta.status = 'Dataset import has been started...' rq_job_meta.progress = 0. rq_job_meta.save() diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py index 331b60592cbe..10fce4133db4 100644 --- a/cvat/apps/dataset_manager/views.py +++ b/cvat/apps/dataset_manager/views.py @@ -20,7 +20,7 @@ import cvat.apps.dataset_manager.task as task from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.models import Job, Project, Task -from cvat.apps.engine.rq_job_handler import RQMeta +from cvat.apps.engine.rq_job_handler import ExportRQMeta from cvat.apps.engine.utils import get_rq_lock_by_user from .formats.registry import EXPORT_FORMATS, IMPORT_FORMATS @@ -88,7 +88,7 @@ def _patched_retry(*_1, **_2): settings.CVAT_QUEUES.EXPORT_DATA.value ) - rq_job_meta = RQMeta.from_job(current_rq_job) + rq_job_meta = ExportRQMeta.from_job(current_rq_job) user_id = rq_job_meta.user.id or -1 with get_rq_lock_by_user(settings.CVAT_QUEUES.EXPORT_DATA.value, user_id): diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 4af93e11bde6..669636e3ffe3 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -39,7 +39,7 @@ Task, ) from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export -from cvat.apps.engine.rq_job_handler import RQId, RQMeta +from cvat.apps.engine.rq_job_handler import ExportRQMeta, RQId from cvat.apps.engine.serializers import RqIdSerializer from cvat.apps.engine.utils import ( build_annotations_file_name, @@ -165,9 +165,8 @@ def get_instance_update_time(self) -> datetime: def get_timestamp(self, time_: datetime) -> str: return datetime.strftime(time_, "%Y_%m_%d_%H_%M_%S") - # TODO: drop ext support @abstractmethod - def get_result_filename_and_ext(self) -> tuple[str, str | None]: ... + def get_result_filename(self) -> str: ... def validate_rq_id(self, *, rq_id: str | None) -> HttpResponseBadRequest | None: if not rq_id: @@ -207,7 +206,7 @@ def download_file(self) -> Response: if rq_job_status != RQJobStatus.FINISHED: return Response(status=status.HTTP_204_NO_CONTENT) - rq_job_meta = RQMeta.from_job(rq_job) + rq_job_meta = ExportRQMeta.from_job(rq_job) file_path = rq_job.return_value() if not file_path: @@ -216,7 +215,7 @@ def download_file(self) -> Response: "A result for exporting job was not found for finished RQ job", status=status.HTTP_500_INTERNAL_SERVER_ERROR, ) - if rq_job_meta.get_export_result_url() + if rq_job_meta.result.url else Response(status=status.HTTP_204_NO_CONTENT) ) @@ -396,13 +395,12 @@ def setup_background_job( result_url = self.make_result_url(rq_id=rq_id) with get_rq_lock_by_user(queue, user_id): - result_filename, result_ext = self.get_result_filename_and_ext() - meta = RQMeta.build_base(request=self.request, db_obj=self.db_instance) - RQMeta.update_result_info( - meta, - result_url=result_url, + result_filename = self.get_result_filename() + meta = ExportRQMeta.build( + request=self.request, + db_obj=self.db_instance, result_filename=result_filename, - result_file_ext=result_ext, + result_url=result_url, ) queue.enqueue_call( func=func, @@ -417,11 +415,11 @@ def setup_background_job( failure_ttl=cache_ttl.total_seconds(), ) - def get_result_filename_and_ext(self) -> tuple[str, str | None]: + def get_result_filename(self) -> str: filename = self.export_args.filename if filename: - return osp.splitext(filename) + return osp.splitext(filename)[0] instance_update_time = self.get_instance_update_time() instance_timestamp = self.get_timestamp(instance_update_time) @@ -433,7 +431,7 @@ def get_result_filename_and_ext(self) -> tuple[str, str | None]: is_annotation_file=not self.export_args.save_images, ) - return filename, None + return filename def get_download_api_endpoint_view_name(self) -> str: return f"{self.resource}-download-dataset" @@ -466,11 +464,11 @@ def initialize_export_args(self) -> None: def validate_export_args(self): return - def get_result_filename_and_ext(self) -> tuple[str, str | None]: + def get_result_filename(self) -> str: filename = self.export_args.filename if filename: - return osp.splitext(filename) + return osp.splitext(filename)[0] instance_update_time = self.get_instance_update_time() instance_timestamp = self.get_timestamp(instance_update_time) @@ -481,7 +479,7 @@ def get_result_filename_and_ext(self) -> tuple[str, str | None]: timestamp=instance_timestamp, ) - return filename, None + return filename def build_rq_id(self): return RQId( @@ -551,13 +549,12 @@ def setup_background_job( user_id = self.request.user.id with get_rq_lock_by_user(queue, user_id): - result_filename, result_ext = self.get_result_filename_and_ext() - meta = RQMeta.build_base(request=self.request, db_obj=self.db_instance) - RQMeta.update_result_info( - meta, - result_url=result_url, + result_filename = self.get_result_filename() + meta = ExportRQMeta.build( + request=self.request, + db_obj=self.db_instance, result_filename=result_filename, - result_file_ext=result_ext, + result_url=result_url, ) queue.enqueue_call( diff --git a/cvat/apps/engine/backup.py b/cvat/apps/engine/backup.py index ebb44ad863c8..8304c6d287d2 100644 --- a/cvat/apps/engine/backup.py +++ b/cvat/apps/engine/backup.py @@ -62,7 +62,7 @@ StorageMethodChoice, ) from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export -from cvat.apps.engine.rq_job_handler import RQId, RQMeta +from cvat.apps.engine.rq_job_handler import ImportRQMeta, RQId from cvat.apps.engine.serializers import ( AnnotationGuideWriteSerializer, AssetWriteSerializer, @@ -84,7 +84,6 @@ from cvat.apps.engine.utils import ( av_scan_paths, define_dependent_job, - get_rq_job_meta, get_rq_lock_by_user, import_resource_with_clean_up_after, process_failed_job, @@ -1198,21 +1197,22 @@ def _import(importer, request: PatchedRequest, queue, rq_id, Serializer, file_fi user_id = request.user.id with get_rq_lock_by_user(queue, user_id): + meta = ImportRQMeta.build( + request=request, + db_obj=None, + tmp_file=filename, + ) rq_job = queue.enqueue_call( func=func, args=func_args, job_id=rq_id, - # TODO: - meta={ - 'tmp_file': filename, - **get_rq_job_meta(request=request, db_obj=None) - }, + meta=meta, depends_on=define_dependent_job(queue, user_id), result_ttl=settings.IMPORT_CACHE_SUCCESS_TTL.total_seconds(), failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds() ) else: - rq_job_meta = RQMeta.from_job(rq_job) + rq_job_meta = ImportRQMeta.from_job(rq_job) if rq_job_meta.user.id != request.user.id: return Response(status=status.HTTP_403_FORBIDDEN) diff --git a/cvat/apps/engine/cache.py b/cvat/apps/engine/cache.py index 5a4364bfce49..f655ad6e1f6c 100644 --- a/cvat/apps/engine/cache.py +++ b/cvat/apps/engine/cache.py @@ -53,7 +53,7 @@ ZipCompressedChunkWriter, load_image, ) -from cvat.apps.engine.rq_job_handler import RQMeta +from cvat.apps.engine.rq_job_handler import RQMetaWithFailureInfo from cvat.apps.engine.utils import ( CvatChunkTimestampMismatchError, format_list, @@ -108,7 +108,7 @@ def wait_for_rq_job(rq_job: rq.job.Job): return elif job_status in ("failed",): rq_job.get_meta() # refresh from Redis - job_meta = RQMeta.from_job(rq_job) + job_meta = RQMetaWithFailureInfo.from_job(rq_job) exc_type = job_meta.exc_type or Exception exc_args = job_meta.exc_args or ("Cannot create chunk",) raise exc_type(*exc_args) diff --git a/cvat/apps/engine/rq_job_handler.py b/cvat/apps/engine/rq_job_handler.py index 7a58ad60f345..217a421a2d73 100644 --- a/cvat/apps/engine/rq_job_handler.py +++ b/cvat/apps/engine/rq_job_handler.py @@ -4,6 +4,7 @@ from __future__ import annotations +from abc import ABCMeta, abstractmethod from collections.abc import Iterable from datetime import datetime from typing import Any, Optional, Union @@ -27,9 +28,8 @@ optional_float_validator = attrs.validators.optional(attrs.validators.instance_of(float)) -def _update_value(self: RQMeta, attribute: attrs.Attribute, value: Any): - setattr(self, attribute.name, value) - self.__job.meta[attribute.name] = value +def _update_value(self: AbstractRQMeta, attribute: attrs.Attribute, value: Any): + self._job.meta[attribute.name] = value @attrs.frozen @@ -47,16 +47,70 @@ class RequestInfo: # TODO: it is not timestamp timestamp: datetime = attrs.field(validator=[attrs.validators.instance_of(datetime)]) + def to_dict(self) -> dict[str, Any]: + return asdict(self) + @attrs.frozen class ExportResultInfo: - url: str = attrs.field(validator=[str_validator]) + url: str | None = attrs.field(validator=[optional_str_validator]) filename: str = attrs.field(validator=[str_validator]) - ext: str | None = attrs.field(validator=[optional_str_validator]) + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + @attrs.define -class RQMeta: - __job: RQJob = attrs.field(init=False) +class AbstractRQMeta(metaclass=ABCMeta): + _job: RQJob | None = attrs.field(init=False, default=None) + + def to_dict(self) -> dict: + return asdict(self, filter=lambda k, _: k.name != "_job") + + @classmethod + def from_job(cls, rq_job: RQJob): + keys_to_keep = [k.name for k in attrs.fields(cls)] + meta = cls(**{k: v for k, v in rq_job.meta.items() if k in keys_to_keep}) + meta._job = rq_job + + return meta + + def save(self) -> None: + assert isinstance(self._job, RQJob), "To save meta, rq job must be set" + self._job.save_meta() + @staticmethod + @abstractmethod + def get_resettable_fields() -> list[RQJobMetaField]: + """Return a list of fields that must be reset on retry""" + + + def reset_meta_on_retry(self) -> dict[RQJobMetaField, Any]: + resettable_fields = self.get_resettable_fields() + + return { + k: v for k, v in self._job.meta.items() if k not in resettable_fields + } + + +@attrs.define(kw_only=True) +class RQMetaWithFailureInfo(AbstractRQMeta): + # immutable and optional fields + formatted_exception: str | None = attrs.field(validator=[optional_str_validator], default=None) + exc_type: str | None = attrs.field(validator=[optional_str_validator], default=None) + exc_args: Iterable | None = attrs.field(default=None) + + @staticmethod + def get_resettable_fields() -> list[RQJobMetaField]: + """Return a list of fields that must be reset on retry""" + return [ + RQJobMetaField.FORMATTED_EXCEPTION, + RQJobMetaField.EXCEPTION_TYPE, + RQJobMetaField.EXCEPTION_ARGS, + ] + +@attrs.define(kw_only=True) +class BaseRQMeta(RQMetaWithFailureInfo): # immutable and required fields user: UserInfo = attrs.field( validator=[ @@ -77,69 +131,21 @@ class RQMeta: project_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) task_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) job_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) - function_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) - lambda_: bool | None = attrs.field(validator=[optional_bool_validator], default=None, on_setattr=attrs.setters.frozen) - result: ExportResultInfo | None = attrs.field(default=None, converter=lambda d: ExportResultInfo(**d) if d else None) - - # mutable fields - status: str = attrs.field(validator=[optional_str_validator], default="", on_setattr=_update_value) + # import && lambda progress: float | None = attrs.field(validator=[optional_float_validator], default=None) - task_progress: float | None = attrs.field(validator=[optional_float_validator],default=None) - - formatted_exception: str | None = attrs.field(validator=[optional_str_validator], default=None) - exc_type: str | None = attrs.field(validator=[optional_str_validator], default=None) - exc_args: Iterable | None = attrs.field(default=None) - - def get_export_result_url(self) -> str | None: - # keep backward compatibility - return self.result.url or self.__job.meta.get(RQJobMetaField.RESULT_URL) - - # todo: - def get_export_filename(self): - pass # and ext - - @classmethod - def from_job(cls, rq_job: RQJob) -> "RQMeta": - meta = cls(**rq_job.meta) - meta.__job = rq_job - - return meta - - def save(self) -> None: - assert hasattr(self, "__job") and isinstance(self.__job, RQJob) - self.__job.save_meta() @staticmethod def get_resettable_fields() -> list[RQJobMetaField]: """Return a list of fields that must be reset on retry""" - return [ - RQJobMetaField.FORMATTED_EXCEPTION, - RQJobMetaField.PROGRESS, - RQJobMetaField.TASK_PROGRESS, - RQJobMetaField.STATUS - ] - - def reset_meta_on_retry(self) -> dict[RQJobMetaField, Any]: - resettable_fields = self.get_resettable_fields() - - return { - k: v for k, v in self.__job.meta.items() if k not in resettable_fields - } - - def to_dict(self) -> dict: - d = asdict(self) - if v := d.pop(RQJobMetaField.LAMBDA + "_", None) is not None: - d[RQJobMetaField.LAMBDA] = v - - return d + return RQMetaWithFailureInfo.get_resettable_fields() + [RQJobMetaField.PROGRESS] @classmethod - def build_base( + def build( cls, *, request: PatchedRequest, - db_obj: Model, + db_obj: Model | None, ): # to prevent circular import from cvat.apps.events.handlers import job_id, organization_slug, task_id @@ -153,41 +159,116 @@ def build_base( user = request.user - meta = cls( - user=asdict(UserInfo( + return cls( + user=UserInfo( id=getattr(user, "id", None), username=getattr(user, "username", None), email=getattr(user, "email", None), - )), - request=asdict(RequestInfo( + ).to_dict(), + request=RequestInfo( uuid=request.uuid, timestamp=timezone.localtime(), - )), + ).to_dict(), org_id=oid, org_slug=oslug, project_id=pid, task_id=tid, job_id=jid, - ) + ).to_dict() + +@attrs.define(kw_only=True) +class ExportRQMeta(BaseRQMeta): + result: ExportResultInfo= attrs.field(converter=lambda d: ExportResultInfo(**d)) + + @staticmethod + def get_resettable_fields() -> list[RQJobMetaField]: + """Return a list of fields that must be reset on retry""" + base_fields = BaseRQMeta.get_resettable_fields() + return base_fields + [RQJobMetaField.RESULT] + + @classmethod + def build( + cls, + *, + request: PatchedRequest, + db_obj: Model | None, + result_url: str | None, + result_filename: str, + ): + base_meta = BaseRQMeta.build(request=request, db_obj=db_obj) + + return cls( + **base_meta, + result=ExportResultInfo( + filename=result_filename, + url=result_url, + ).to_dict(), + ).to_dict() + +@attrs.define(kw_only=True) +class ImportRQMeta(BaseRQMeta): + # immutable && optional fields + tmp_file: str | None = attrs.field(validator=[optional_str_validator], default=None, on_setattr=attrs.setters.frozen) + + # mutable fields + # TODO: move into base? + status: str = attrs.field(validator=[optional_str_validator], default="", on_setattr=_update_value) + task_progress: float | None = attrs.field(validator=[optional_float_validator],default=None) - # TODO: do not include unset fields - return meta.to_dict() + @staticmethod + def get_resettable_fields() -> list[RQJobMetaField]: + """Return a list of fields that must be reset on retry""" + base_fields = BaseRQMeta.get_resettable_fields() + + return base_fields + [ + RQJobMetaField.PROGRESS, + RQJobMetaField.TASK_PROGRESS, + RQJobMetaField.STATUS + ] @classmethod - def update_result_info( + def build( cls, - original_meta: dict[RQJobMetaField, Any], *, - result_url: str, result_filename: str, result_file_ext: str | None = None - ) -> None: - original_meta[RQJobMetaField.RESULT] = asdict( - ExportResultInfo(url=result_url, filename=result_filename, ext=result_file_ext) - ) + request: PatchedRequest, + db_obj: Model | None, + tmp_file: str | None = None, + ): + base_meta = BaseRQMeta.build(request=request, db_obj=db_obj) + + return cls( + **base_meta, + tmp_file=tmp_file, + + ).to_dict() + +@attrs.define(kw_only=True) +class LambdaRQMeta(BaseRQMeta): + # immutable fields + function_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) + lambda_: bool | None = attrs.field(validator=[optional_bool_validator], init=False, default=True, on_setattr=attrs.setters.frozen) + + def to_dict(self) -> dict: + d = asdict(self) + if v := d.pop(RQJobMetaField.LAMBDA + "_", None) is not None: + d[RQJobMetaField.LAMBDA] = v + + return d @classmethod - def update_lambda_info(cls, original_meta: dict[RQJobMetaField, Any], *, function_id: int) -> None: - original_meta[RQJobMetaField.FUNCTION_ID] = function_id - original_meta[RQJobMetaField.LAMBDA] = True + def build( + cls, + *, + request: PatchedRequest, + db_obj: Model, + function_id: int, + ): + base_meta = BaseRQMeta.build(request=request, db_obj=db_obj) + return cls( + **base_meta, + function_id=function_id, + ).to_dict() + # TODO: check that RQJobMetaField is used only in this module class RQJobMetaField: diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 282cbbbdf6e3..f4508b142e0d 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -40,7 +40,14 @@ from cvat.apps.engine.frame_provider import FrameQuality, TaskFrameProvider from cvat.apps.engine.log import ServerLogManager from cvat.apps.engine.permissions import TaskPermission -from cvat.apps.engine.rq_job_handler import RQId, RQMeta +from cvat.apps.engine.rq_job_handler import ( + BaseRQMeta, + ExportRQMeta, + ImportRQMeta, + LambdaRQMeta, + RequestAction, + RQId, +) from cvat.apps.engine.task_validation import HoneypotFrameSelector from cvat.apps.engine.utils import ( CvatChunkTimestampMismatchError, @@ -3492,9 +3499,9 @@ class RequestDataOperationSerializer(serializers.Serializer): def to_representation(self, rq_job: RQJob) -> dict[str, Any]: parsed_rq_id: RQId = rq_job.parsed_rq_id - rq_job_meta = RQMeta.from_job(rq_job) - return { + base_rq_job_meta = BaseRQMeta.from_job(rq_job) + representation = { "type": ":".join( [ parsed_rq_id.action, @@ -3502,12 +3509,16 @@ def to_representation(self, rq_job: RQJob) -> dict[str, Any]: ] ), "target": parsed_rq_id.target, - "project_id": rq_job_meta.project_id, - "task_id": rq_job_meta.task_id, - "job_id": rq_job_meta.job_id, - "format": parsed_rq_id.format, - "function_id": rq_job_meta.function_id, + "project_id": base_rq_job_meta.project_id, + "task_id": base_rq_job_meta.task_id, + "job_id": base_rq_job_meta.job_id, } + if parsed_rq_id.action == RequestAction.AUTOANNOTATE: + representation["function_id"] = LambdaRQMeta.from_job(rq_job).function_id + elif parsed_rq_id.action in (RequestAction.IMPORT, RequestAction.EXPORT): + representation["format"] = parsed_rq_id.format + + return representation class RequestSerializer(serializers.Serializer): # SerializerMethodField is not used here to mark "status" field as required and fix schema generation. @@ -3534,7 +3545,7 @@ class RequestSerializer(serializers.Serializer): @extend_schema_field(UserIdentifiersSerializer()) def get_owner(self, rq_job: RQJob) -> dict[str, Any]: # TODO: define parsed meta once - rq_job_meta = RQMeta.from_job(rq_job) + rq_job_meta = BaseRQMeta.from_job(rq_job) return UserIdentifiersSerializer(rq_job_meta.user.to_dict()).data @extend_schema_field( @@ -3542,7 +3553,7 @@ def get_owner(self, rq_job: RQJob) -> dict[str, Any]: ) def get_progress(self, rq_job: RQJob) -> Decimal: # TODO: define parsed meta once - rq_job_meta = RQMeta.from_job(rq_job) + rq_job_meta = ImportRQMeta.from_job(rq_job) # progress of task creation is stored in "task_progress" field # progress of project import is stored in "progress" field return Decimal(rq_job_meta.progress or rq_job_meta.task_progress or 0.) @@ -3564,7 +3575,7 @@ def get_expiry_date(self, rq_job: RQJob) -> Optional[str]: @extend_schema_field(serializers.CharField(allow_blank=True)) def get_message(self, rq_job: RQJob) -> str: # TODO: define parsed meta once - rq_job_meta = RQMeta.from_job(rq_job) + rq_job_meta = ImportRQMeta.from_job(rq_job) rq_job_status = rq_job.get_status() message = '' @@ -3578,16 +3589,13 @@ def get_message(self, rq_job: RQJob) -> str: def to_representation(self, rq_job: RQJob) -> dict[str, Any]: representation = super().to_representation(rq_job) - # TODO: define parsed meta once - rq_job_meta = RQMeta.from_job(rq_job) - # FUTURE-TODO: support such statuses on UI if representation["status"] in (RQJobStatus.DEFERRED, RQJobStatus.SCHEDULED): representation["status"] = RQJobStatus.QUEUED - if representation["status"] == RQJobStatus.FINISHED: - if result_url := rq_job_meta.get_export_result_url(): - representation["result_url"] = result_url + if representation["status"] == RQJobStatus.FINISHED: + if rq_job.parsed_rq_id.action == models.RequestAction.EXPORT: + representation["result_url"] = ExportRQMeta.from_job(rq_job).result.url if ( rq_job.parsed_rq_id.action == models.RequestAction.IMPORT diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 7eeab55acbed..9533807ac1ac 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -9,12 +9,13 @@ import os import re import shutil -from collections.abc import Iterator, Sequence +from collections.abc import Iterable, Iterator, Sequence from contextlib import closing from copy import deepcopy from datetime import datetime, timezone +from functools import partial from pathlib import Path -from typing import Any, NamedTuple, Optional, Union +from typing import Any, Callable, NamedTuple, Optional, Union from urllib import parse as urlparse from urllib import request as urlrequest @@ -47,7 +48,7 @@ sort, ) from cvat.apps.engine.models import RequestAction, RequestTarget -from cvat.apps.engine.rq_job_handler import RQId, RQMeta +from cvat.apps.engine.rq_job_handler import ImportRQMeta, RQId from cvat.apps.engine.task_validation import HoneypotFrameSelector from cvat.apps.engine.utils import ( av_scan_paths, @@ -82,7 +83,7 @@ def create( func=_create_thread, args=(db_task.pk, data), job_id=rq_id, - meta=RQMeta.build_base(request=request, db_obj=db_task), + meta=ImportRQMeta.build(request=request, db_obj=db_task), depends_on=define_dependent_job(q, user_id), failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds(), ) @@ -107,13 +108,12 @@ class SegmentsParams(NamedTuple): def _copy_data_from_share_point( server_files: list[str], *, + update_status_callback: Callable[[str], None], upload_dir: str, - server_dir: Optional[str] = None, - server_files_exclude: Optional[list[str]] = None, - rq_job_meta: RQMeta, + server_dir: str | None = None, + server_files_exclude: list[str] | None = None, ): - rq_job_meta.status = 'Data are being copied from source..' - rq_job_meta.save() + update_status_callback('Data are being copied from source..') filtered_server_files = server_files.copy() @@ -198,12 +198,10 @@ def _segments(): def _create_segments_and_jobs( db_task: models.Task, *, + update_status_callback: Callable[[str], None], job_file_mapping: Optional[JobFileMapping] = None, ): - rq_job = rq.get_current_job() - rq_job_meta = RQMeta.from_job(rq_job) - rq_job_meta.status = 'Task is being saved in database' - rq_job_meta.save() + update_status_callback('Task is being saved in database') segments, segment_size, overlap = _generate_segment_params( db_task=db_task, job_file_mapping=job_file_mapping, @@ -442,8 +440,12 @@ def _validate_scheme(url): if parsed_url.scheme not in ALLOWED_SCHEMES: raise ValueError('Unsupported URL scheme: {}. Only http and https are supported'.format(parsed_url.scheme)) -def _download_data(urls, upload_dir, *, rq_job_meta: RQMeta): - job = rq.get_current_job() +def _download_data( + urls: Iterable[str], + upload_dir: str, + *, + update_status_callback: Callable[[str], None], +): local_files = {} with make_requests_session() as session: @@ -453,8 +455,7 @@ def _download_data(urls, upload_dir, *, rq_job_meta: RQMeta): raise Exception("filename collision: {}".format(name)) _validate_scheme(url) slogger.glob.info("Downloading: {}".format(url)) - rq_job_meta.status = '{} is being downloaded..'.format(url) - rq_job_meta.save() + update_status_callback('{} is being downloaded..'.format(url)) response = session.get(url, stream=True, proxies=PROXIES_FOR_UNTRUSTED_URLS) if response.status_code == 200: @@ -592,12 +593,14 @@ def _create_thread( slogger.glob.info("create task #{}".format(db_task.id)) job = rq.get_current_job() - rq_job_meta = RQMeta.from_job(job) + rq_job_meta = ImportRQMeta.from_job(job) - def _update_status(msg: str) -> None: + def _update_status(rq_job_meta: ImportRQMeta, msg: str) -> None: rq_job_meta.status = msg rq_job_meta.save() + update_status = partial(_update_status, rq_job_meta) + job_file_mapping = _validate_job_file_mapping(db_task, data) validation_params = _validate_validation_params( @@ -609,7 +612,7 @@ def _update_status(msg: str) -> None: is_data_in_cloud = db_data.storage == models.StorageChoice.CLOUD_STORAGE if data['remote_files'] and not is_dataset_import: - data['remote_files'] = _download_data(data['remote_files'], upload_dir, rq_job_meta=rq_job_meta) + data['remote_files'] = _download_data(data['remote_files'], upload_dir, update_status_callback=update_status) # find and validate manifest file manifest_files = _find_manifest_files(data) @@ -753,7 +756,7 @@ def _update_status(msg: str) -> None: # Packed media must be downloaded for task creation any(v for k, v in media.items() if k != 'image') ): - _update_status("Downloading input media") + update_status("Downloading input media") filtered_data = [] for files in (i for i in media.values() if i): @@ -790,7 +793,7 @@ def _update_status(msg: str) -> None: upload_dir=upload_dir, server_dir=data.get('server_files_path'), server_files_exclude=data.get('server_files_exclude'), - rq_job_meta=rq_job_meta, + update_status_callback=update_status, ) manifest_root = upload_dir elif is_data_in_cloud: @@ -813,7 +816,7 @@ def _update_status(msg: str) -> None: av_scan_paths(upload_dir) - _update_status('Media files are being extracted...') + update_status('Media files are being extracted...') # If upload from server_files image and directories # need to update images list by all found images in directories @@ -1037,7 +1040,7 @@ def _update_status(msg: str) -> None: if task_mode == MEDIA_TYPES['video']['mode']: if manifest_file: try: - _update_status('Validating the input manifest file') + update_status('Validating the input manifest file') manifest = VideoManifestValidator( source_path=os.path.join(upload_dir, media_files[0]), @@ -1059,13 +1062,13 @@ def _update_status(msg: str) -> None: base_msg = "Failed to parse the uploaded manifest file" slogger.glob.warning(ex, exc_info=True) - _update_status(base_msg) + update_status(base_msg) else: manifest = None if not manifest: try: - _update_status('Preparing a manifest file') + update_status('Preparing a manifest file') # TODO: maybe generate manifest in a temp directory manifest = VideoManifestManager(db_data.get_manifest_path()) @@ -1077,7 +1080,7 @@ def _update_status(msg: str) -> None: ) manifest.create() - _update_status('A manifest has been created') + update_status('A manifest has been created') except Exception as ex: manifest.remove() @@ -1089,7 +1092,7 @@ def _update_status(msg: str) -> None: base_msg = "" slogger.glob.warning(ex, exc_info=True) - _update_status( + update_status( f"Failed to create manifest for the uploaded video{base_msg}. " "A manifest will not be used in this task" ) @@ -1402,7 +1405,7 @@ def _update_status(msg: str) -> None: slogger.glob.info("Found frames {} for Data #{}".format(db_data.size, db_data.id)) - _create_segments_and_jobs(db_task, job_file_mapping=job_file_mapping) + _create_segments_and_jobs(db_task, job_file_mapping=job_file_mapping, update_status_callback=update_status) if validation_params and validation_params['mode'] == models.ValidationMode.GT: # The RNG backend must not change to yield reproducible frame picks, @@ -1552,7 +1555,7 @@ def update_progress(self, progress: float): status_message, progress_animation[self._call_counter] ) - rq_job_meta = RQMeta.from_job(self._rq_job) + rq_job_meta = ImportRQMeta.from_job(self._rq_job) rq_job_meta.status = status_message rq_job_meta.task_progress = progress or 0. rq_job_meta.save() diff --git a/cvat/apps/engine/utils.py b/cvat/apps/engine/utils.py index 067b521baeb1..b59dc1e5c999 100644 --- a/cvat/apps/engine/utils.py +++ b/cvat/apps/engine/utils.py @@ -29,7 +29,6 @@ from django.conf import settings from django.core.exceptions import ValidationError from django.http.request import HttpRequest -from django.utils import timezone from django.utils.http import urlencode from django_rq.queues import DjangoRQ from django_sendfile import sendfile as _sendfile @@ -220,46 +219,6 @@ def get_rq_lock_for_job(queue: DjangoRQ, rq_id: str, *, timeout: int = 60, block blocking_timeout=blocking_timeout, ) -# TODO: delete -def get_rq_job_meta( - request: HttpRequest, - db_obj: Any, - *, - result_url: Optional[str] = None, -): - # to prevent circular import - from cvat.apps.events.handlers import job_id, organization_slug, task_id - from cvat.apps.webhooks.signals import organization_id, project_id - - oid = organization_id(db_obj) - oslug = organization_slug(db_obj) - pid = project_id(db_obj) - tid = task_id(db_obj) - jid = job_id(db_obj) - - meta = { - 'user': { - 'id': getattr(request.user, "id", None), - 'username': getattr(request.user, "username", None), - 'email': getattr(request.user, "email", None), - }, - 'request': { - "uuid": request.uuid, - "timestamp": timezone.localtime(), - }, - 'org_id': oid, - 'org_slug': oslug, - 'project_id': pid, - 'task_id': tid, - 'job_id': jid, - } - - - if result_url: - meta['result_url'] = result_url - - return meta - def reverse(viewname, *, args=None, kwargs=None, query_params: Optional[dict[str, str]] = None, request: Optional[HttpRequest] = None, diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index beb4413d3ee3..6822453ec7a9 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -128,7 +128,12 @@ get_cloud_storage_for_import_or_export, get_iam_context, ) -from cvat.apps.engine.rq_job_handler import RQId, RQMeta, is_rq_job_owner +from cvat.apps.engine.rq_job_handler import ( + ImportRQMeta, + RQId, + RQMetaWithFailureInfo, + is_rq_job_owner, +) from cvat.apps.engine.serializers import ( AboutSerializer, AnnotationFileSerializer, @@ -172,7 +177,6 @@ from cvat.apps.engine.utils import ( av_scan_paths, define_dependent_job, - get_rq_job_meta, get_rq_lock_by_user, import_resource_with_clean_up_after, process_failed_job, @@ -3113,7 +3117,7 @@ def perform_destroy(self, instance): target.touch() def rq_exception_handler(rq_job: RQJob, exc_type: Type[Exception], exc_value, tb): - rq_job_meta = RQMeta.from_job(rq_job) + rq_job_meta = RQMetaWithFailureInfo.from_job(rq_job) rq_job_meta.formatted_exception = "".join( traceback.format_exception_only(exc_type, exc_value)) if rq_job.origin == settings.CVAT_QUEUES.CHUNKS.value: @@ -3203,16 +3207,13 @@ def _import_annotations(request, rq_id_factory, rq_func, db_obj, format_name, user_id = request.user.id with get_rq_lock_by_user(queue, user_id): + meta = ImportRQMeta.build(request=request, db_obj=db_obj, tmp_file=filename) rq_job = queue.enqueue_call( func=func, args=func_args, job_id=rq_id, depends_on=define_dependent_job(queue, user_id, rq_id=rq_id), - # TODO: - meta={ - 'tmp_file': filename, - **get_rq_job_meta(request=request, db_obj=db_obj), - }, + meta=meta, result_ttl=settings.IMPORT_CACHE_SUCCESS_TTL.total_seconds(), failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds() ) @@ -3308,15 +3309,12 @@ def _import_project_dataset( user_id = request.user.id with get_rq_lock_by_user(queue, user_id): + meta = ImportRQMeta.build(request=request, db_obj=db_obj, tmp_file=filename) rq_job = queue.enqueue_call( func=func, args=func_args, job_id=rq_id, - # TODO: - meta={ - 'tmp_file': filename, - **get_rq_job_meta(request=request, db_obj=db_obj), - }, + meta=meta, depends_on=define_dependent_job(queue, user_id, rq_id=rq_id), result_ttl=settings.IMPORT_CACHE_SUCCESS_TTL.total_seconds(), failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds() diff --git a/cvat/apps/events/export.py b/cvat/apps/events/export.py index 5f610e478add..9d3f5e184092 100644 --- a/cvat/apps/events/export.py +++ b/cvat/apps/events/export.py @@ -17,7 +17,7 @@ from cvat.apps.dataset_manager.views import log_exception from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.rq_job_handler import RQMeta +from cvat.apps.engine.rq_job_handler import RQMetaWithFailureInfo from cvat.apps.engine.utils import sendfile slogger = ServerLogManager(__name__) @@ -152,7 +152,7 @@ def export(request, filter_query, queue_name): if os.path.exists(file_path): return Response(status=status.HTTP_201_CREATED) elif rq_job.is_failed: - rq_job_meta = RQMeta.from_job(rq_job) + rq_job_meta = RQMetaWithFailureInfo.from_job(rq_job) exc_info = rq_job_meta.formatted_exception or str(rq_job.exc_info) rq_job.delete() return Response(exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR) diff --git a/cvat/apps/events/handlers.py b/cvat/apps/events/handlers.py index f571b9c2a9b9..28078e39744a 100644 --- a/cvat/apps/events/handlers.py +++ b/cvat/apps/events/handlers.py @@ -22,7 +22,7 @@ Task, User, ) -from cvat.apps.engine.rq_job_handler import RQMeta +from cvat.apps.engine.rq_job_handler import BaseRQMeta from cvat.apps.engine.serializers import ( BasicUserSerializer, CloudStorageReadSerializer, @@ -97,7 +97,14 @@ def job_id(instance): return None -def get_user(instance=None): +def get_user(instance=None) -> User | dict | None: + def _get_user_from_rq_job(rq_job: rq.job.Job) -> dict | None: + # RQ jobs created in the chunks queue have no user info + try: + return BaseRQMeta.from_job(instance).user.to_dict() + except AttributeError: + return None + # Try to get current user from request user = get_current_user() if user is not None: @@ -105,11 +112,11 @@ def get_user(instance=None): # Try to get user from rq_job if isinstance(instance, rq.job.Job): - return RQMeta.from_job(instance).user + return _get_user_from_rq_job(instance) else: rq_job = rq.get_current_job() if rq_job: - return RQMeta.from_job(rq_job).user + return _get_user_from_rq_job(rq_job) if isinstance(instance, User): return instance @@ -118,16 +125,23 @@ def get_user(instance=None): def get_request(instance=None): + def _get_request_from_rq_job(rq_job: rq.job.Job) -> dict | None: + # RQ jobs created in the chunks queue have no request info + try: + return BaseRQMeta.from_job(instance).request.to_dict() + except AttributeError: + return None + request = get_current_request() if request is not None: return request if isinstance(instance, rq.job.Job): - return RQMeta.from_job(instance).request + return _get_request_from_rq_job(instance) else: rq_job = rq.get_current_job() if rq_job: - return RQMeta.from_job(rq_job).request + return _get_request_from_rq_job(rq_job) return None @@ -569,7 +583,7 @@ def handle_function_call( def handle_rq_exception(rq_job, exc_type, exc_value, tb): - rq_job_meta = RQMeta.from_job(rq_job) + rq_job_meta = BaseRQMeta.from_job(rq_job) oid = rq_job_meta.org_id oslug = rq_job_meta.org_slug pid = rq_job_meta.project_id diff --git a/cvat/apps/lambda_manager/views.py b/cvat/apps/lambda_manager/views.py index a1eb7600ead3..bf2308173db9 100644 --- a/cvat/apps/lambda_manager/views.py +++ b/cvat/apps/lambda_manager/views.py @@ -46,7 +46,7 @@ SourceType, Task, ) -from cvat.apps.engine.rq_job_handler import RQId, RQMeta +from cvat.apps.engine.rq_job_handler import LambdaRQMeta, RQId from cvat.apps.engine.serializers import LabeledDataSerializer from cvat.apps.engine.utils import define_dependent_job, get_rq_lock_by_user from cvat.apps.events.handlers import handle_function_call @@ -640,11 +640,11 @@ def enqueue( user_id = request.user.id with get_rq_lock_by_user(queue, user_id): - meta = RQMeta.build_base( + meta = LambdaRQMeta.build( request=request, db_obj=Job.objects.get(pk=job) if job else Task.objects.get(pk=task), + function_id=lambda_func.id, ) - RQMeta.update_lambda_info(meta, function_id=lambda_func.id) rq_job = queue.create_job( LambdaJob(None), @@ -672,7 +672,7 @@ def enqueue( def fetch_job(self, pk): queue = self._get_queue() rq_job = queue.fetch_job(pk) - if rq_job is None or not RQMeta.from_job(rq_job).lambda_: + if rq_job is None or not LambdaRQMeta.from_job(rq_job).lambda_: raise ValidationError( "{} lambda job is not found".format(pk), code=status.HTTP_404_NOT_FOUND ) @@ -915,7 +915,7 @@ def _map(sublabel_body): # progress is in [0, 1] range def _update_progress(progress): job = rq.get_current_job() - rq_job_meta = RQMeta.from_job(job) + rq_job_meta = LambdaRQMeta.from_job(job) # If the job has been deleted, get_status will return None. Thus it will # exist the loop. rq_job_meta.progress = int(progress * 100) diff --git a/cvat/apps/quality_control/quality_reports.py b/cvat/apps/quality_control/quality_reports.py index 2f3de7235fcd..b3f50b90bb8c 100644 --- a/cvat/apps/quality_control/quality_reports.py +++ b/cvat/apps/quality_control/quality_reports.py @@ -54,7 +54,7 @@ User, ValidationMode, ) -from cvat.apps.engine.rq_job_handler import RQMeta +from cvat.apps.engine.rq_job_handler import BaseRQMeta from cvat.apps.engine.utils import define_dependent_job, get_rq_lock_by_user from cvat.apps.profiler import silk_profile from cvat.apps.quality_control import models @@ -2283,7 +2283,7 @@ def schedule_custom_quality_check_job( self._check_task_quality, task_id=task.id, job_id=rq_id, - meta=RQMeta.build_base(request=request, db_obj=task), + meta=BaseRQMeta.build(request=request, db_obj=task), result_ttl=self._JOB_RESULT_TTL, failure_ttl=self._JOB_RESULT_TTL, depends_on=dependency, diff --git a/cvat/apps/quality_control/views.py b/cvat/apps/quality_control/views.py index 7c72742fe468..01c5176be85e 100644 --- a/cvat/apps/quality_control/views.py +++ b/cvat/apps/quality_control/views.py @@ -21,7 +21,7 @@ from cvat.apps.engine.mixins import PartialUpdateModelMixin from cvat.apps.engine.models import Task -from cvat.apps.engine.rq_job_handler import RQMeta +from cvat.apps.engine.rq_job_handler import BaseRQMeta from cvat.apps.engine.serializers import RqIdSerializer from cvat.apps.engine.utils import get_server_url from cvat.apps.quality_control import quality_reports as qc @@ -294,7 +294,7 @@ def create(self, request, *args, **kwargs): if ( not rq_job or not QualityReportPermission.create_scope_check_status( - request, job_owner_id=RQMeta.from_job(rq_job).user.id + request, job_owner_id=BaseRQMeta.from_job(rq_job).user.id ) .check_access() .allow From 19a322a4c797a52e0ed08a5f04deda8a5213b6ae Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Sun, 9 Feb 2025 18:03:13 +0100 Subject: [PATCH 05/14] Black code --- cvat/apps/engine/rq_job_handler.py | 148 ++++++++++++++++------------- 1 file changed, 81 insertions(+), 67 deletions(-) diff --git a/cvat/apps/engine/rq_job_handler.py b/cvat/apps/engine/rq_job_handler.py index 217a421a2d73..a1684b477430 100644 --- a/cvat/apps/engine/rq_job_handler.py +++ b/cvat/apps/engine/rq_job_handler.py @@ -41,6 +41,7 @@ class UserInfo: def to_dict(self) -> dict[str, Any]: return asdict(self) + @attrs.frozen class RequestInfo: uuid: str = attrs.field(validator=[str_validator]) @@ -50,6 +51,7 @@ class RequestInfo: def to_dict(self) -> dict[str, Any]: return asdict(self) + @attrs.frozen class ExportResultInfo: url: str | None = attrs.field(validator=[optional_str_validator]) @@ -59,7 +61,6 @@ def to_dict(self) -> dict[str, Any]: return asdict(self) - @attrs.define class AbstractRQMeta(metaclass=ABCMeta): _job: RQJob | None = attrs.field(init=False, default=None) @@ -81,16 +82,13 @@ def save(self) -> None: @staticmethod @abstractmethod - def get_resettable_fields() -> list[RQJobMetaField]: + def _get_resettable_fields() -> list[RQJobMetaField]: """Return a list of fields that must be reset on retry""" - def reset_meta_on_retry(self) -> dict[RQJobMetaField, Any]: - resettable_fields = self.get_resettable_fields() + resettable_fields = self._get_resettable_fields() - return { - k: v for k, v in self._job.meta.items() if k not in resettable_fields - } + return {k: v for k, v in self._job.meta.items() if k not in resettable_fields} @attrs.define(kw_only=True) @@ -101,7 +99,7 @@ class RQMetaWithFailureInfo(AbstractRQMeta): exc_args: Iterable | None = attrs.field(default=None) @staticmethod - def get_resettable_fields() -> list[RQJobMetaField]: + def _get_resettable_fields() -> list[RQJobMetaField]: """Return a list of fields that must be reset on retry""" return [ RQJobMetaField.FORMATTED_EXCEPTION, @@ -109,13 +107,12 @@ def get_resettable_fields() -> list[RQJobMetaField]: RQJobMetaField.EXCEPTION_ARGS, ] + @attrs.define(kw_only=True) class BaseRQMeta(RQMetaWithFailureInfo): # immutable and required fields user: UserInfo = attrs.field( - validator=[ - attrs.validators.instance_of(UserInfo) - ], + validator=[attrs.validators.instance_of(UserInfo)], converter=lambda d: UserInfo(**d), on_setattr=attrs.setters.frozen, ) @@ -126,19 +123,29 @@ class BaseRQMeta(RQMetaWithFailureInfo): ) # immutable and optional fields - org_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) - org_slug: str | None = attrs.field(validator=[optional_str_validator], default=None, on_setattr=attrs.setters.frozen) - project_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) - task_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) - job_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) + org_id: int | None = attrs.field( + validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen + ) + org_slug: str | None = attrs.field( + validator=[optional_str_validator], default=None, on_setattr=attrs.setters.frozen + ) + project_id: int | None = attrs.field( + validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen + ) + task_id: int | None = attrs.field( + validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen + ) + job_id: int | None = attrs.field( + validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen + ) # import && lambda progress: float | None = attrs.field(validator=[optional_float_validator], default=None) @staticmethod - def get_resettable_fields() -> list[RQJobMetaField]: + def _get_resettable_fields() -> list[RQJobMetaField]: """Return a list of fields that must be reset on retry""" - return RQMetaWithFailureInfo.get_resettable_fields() + [RQJobMetaField.PROGRESS] + return RQMetaWithFailureInfo._get_resettable_fields() + [RQJobMetaField.PROGRESS] @classmethod def build( @@ -176,14 +183,15 @@ def build( job_id=jid, ).to_dict() + @attrs.define(kw_only=True) class ExportRQMeta(BaseRQMeta): - result: ExportResultInfo= attrs.field(converter=lambda d: ExportResultInfo(**d)) + result: ExportResultInfo = attrs.field(converter=lambda d: ExportResultInfo(**d)) @staticmethod - def get_resettable_fields() -> list[RQJobMetaField]: + def _get_resettable_fields() -> list[RQJobMetaField]: """Return a list of fields that must be reset on retry""" - base_fields = BaseRQMeta.get_resettable_fields() + base_fields = BaseRQMeta._get_resettable_fields() return base_fields + [RQJobMetaField.RESULT] @classmethod @@ -205,25 +213,30 @@ def build( ).to_dict(), ).to_dict() + @attrs.define(kw_only=True) class ImportRQMeta(BaseRQMeta): # immutable && optional fields - tmp_file: str | None = attrs.field(validator=[optional_str_validator], default=None, on_setattr=attrs.setters.frozen) + tmp_file: str | None = attrs.field( + validator=[optional_str_validator], default=None, on_setattr=attrs.setters.frozen + ) # mutable fields # TODO: move into base? - status: str = attrs.field(validator=[optional_str_validator], default="", on_setattr=_update_value) - task_progress: float | None = attrs.field(validator=[optional_float_validator],default=None) + status: str = attrs.field( + validator=[optional_str_validator], default="", on_setattr=_update_value + ) + task_progress: float | None = attrs.field(validator=[optional_float_validator], default=None) @staticmethod - def get_resettable_fields() -> list[RQJobMetaField]: + def _get_resettable_fields() -> list[RQJobMetaField]: """Return a list of fields that must be reset on retry""" - base_fields = BaseRQMeta.get_resettable_fields() + base_fields = BaseRQMeta._get_resettable_fields() return base_fields + [ RQJobMetaField.PROGRESS, RQJobMetaField.TASK_PROGRESS, - RQJobMetaField.STATUS + RQJobMetaField.STATUS, ] @classmethod @@ -239,14 +252,21 @@ def build( return cls( **base_meta, tmp_file=tmp_file, - ).to_dict() + @attrs.define(kw_only=True) class LambdaRQMeta(BaseRQMeta): # immutable fields - function_id: int | None = attrs.field(validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen) - lambda_: bool | None = attrs.field(validator=[optional_bool_validator], init=False, default=True, on_setattr=attrs.setters.frozen) + function_id: int | None = attrs.field( + validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen + ) + lambda_: bool | None = attrs.field( + validator=[optional_bool_validator], + init=False, + default=True, + on_setattr=attrs.setters.frozen, + ) def to_dict(self) -> dict: d = asdict(self) @@ -270,55 +290,51 @@ def build( ).to_dict() -# TODO: check that RQJobMetaField is used only in this module class RQJobMetaField: # common fields FORMATTED_EXCEPTION = "formatted_exception" - REQUEST = 'request' - USER = 'user' - PROJECT_ID = 'project_id' - TASK_ID = 'task_id' - JOB_ID = 'job_id' - LAMBDA = 'lambda' - ORG_ID = 'org_id' - ORG_SLUG = 'org_slug' - STATUS = 'status' - PROGRESS = 'progress' - TASK_PROGRESS = 'task_progress' + REQUEST = "request" + USER = "user" + PROJECT_ID = "project_id" + TASK_ID = "task_id" + JOB_ID = "job_id" + LAMBDA = "lambda" + ORG_ID = "org_id" + ORG_SLUG = "org_slug" + STATUS = "status" + PROGRESS = "progress" + TASK_PROGRESS = "task_progress" # export specific fields - RESULT_URL = 'result_url' - RESULT = 'result' - FUNCTION_ID = 'function_id' - EXCEPTION_TYPE = 'exc_type' - EXCEPTION_ARGS = 'exc_args' + RESULT_URL = "result_url" + RESULT = "result" + FUNCTION_ID = "function_id" + EXCEPTION_TYPE = "exc_type" + EXCEPTION_ARGS = "exc_args" + def is_rq_job_owner(rq_job: RQJob, user_id: int) -> bool: - return rq_job.meta.get(RQJobMetaField.USER, {}).get('id') == user_id + return rq_job.meta.get(RQJobMetaField.USER, {}).get("id") == user_id + @attrs.frozen() class RQId: - action: RequestAction = attrs.field( - validator=attrs.validators.instance_of(RequestAction) - ) - target: RequestTarget = attrs.field( - validator=attrs.validators.instance_of(RequestTarget) - ) - identifier: Union[int, UUID] = attrs.field( - validator=attrs.validators.instance_of((int, UUID)) - ) + action: RequestAction = attrs.field(validator=attrs.validators.instance_of(RequestAction)) + target: RequestTarget = attrs.field(validator=attrs.validators.instance_of(RequestTarget)) + identifier: Union[int, UUID] = attrs.field(validator=attrs.validators.instance_of((int, UUID))) subresource: Optional[RequestSubresource] = attrs.field( - validator=attrs.validators.optional( - attrs.validators.instance_of(RequestSubresource) - ), - kw_only=True, default=None, + validator=attrs.validators.optional(attrs.validators.instance_of(RequestSubresource)), + kw_only=True, + default=None, ) user_id: Optional[int] = attrs.field( validator=attrs.validators.optional(attrs.validators.instance_of(int)), - kw_only=True, default=None, + kw_only=True, + default=None, ) format: Optional[str] = attrs.field( validator=attrs.validators.optional(attrs.validators.instance_of(str)), - kw_only=True, default=None, + kw_only=True, + default=None, ) _OPTIONAL_FIELD_REQUIREMENTS = { @@ -353,9 +369,7 @@ def render( return f"{common_prefix}-{self.subresource}" elif RequestAction.EXPORT == self.action: if self.format is None: - return ( - f"{common_prefix}-{self.subresource}-by-{self.user_id}" - ) + return f"{common_prefix}-{self.subresource}-by-{self.user_id}" format_to_be_used_in_urls = self.format.replace(" ", "_").replace(".", "@") return f"{common_prefix}-{self.subresource}-in-{format_to_be_used_in_urls}-format-by-{self.user_id}" @@ -382,7 +396,7 @@ def parse(rq_id: str) -> RQId: elif RequestAction.IMPORT == action: identifier, subresource_str = unparsed.rsplit("-", maxsplit=1) subresource = RequestSubresource(subresource_str) - else: # action == export + else: # action == export identifier, subresource_str, unparsed = unparsed.split("-", maxsplit=2) subresource = RequestSubresource(subresource_str) From f535d3288c4a1a4708c6f91746ad43e15f34a731 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Sun, 9 Feb 2025 18:10:25 +0100 Subject: [PATCH 06/14] f --- cvat/apps/engine/rq_job_handler.py | 35 ++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/cvat/apps/engine/rq_job_handler.py b/cvat/apps/engine/rq_job_handler.py index a1684b477430..7efc63b49065 100644 --- a/cvat/apps/engine/rq_job_handler.py +++ b/cvat/apps/engine/rq_job_handler.py @@ -32,7 +32,7 @@ def _update_value(self: AbstractRQMeta, attribute: attrs.Attribute, value: Any): self._job.meta[attribute.name] = value -@attrs.frozen +@attrs.frozen(kw_only=True) class UserInfo: id: int = attrs.field(validator=[int_validator]) username: str = attrs.field(validator=[str_validator]) @@ -42,7 +42,7 @@ def to_dict(self) -> dict[str, Any]: return asdict(self) -@attrs.frozen +@attrs.frozen(kw_only=True) class RequestInfo: uuid: str = attrs.field(validator=[str_validator]) # TODO: it is not timestamp @@ -52,7 +52,7 @@ def to_dict(self) -> dict[str, Any]: return asdict(self) -@attrs.frozen +@attrs.frozen(kw_only=True) class ExportResultInfo: url: str | None = attrs.field(validator=[optional_str_validator]) filename: str = attrs.field(validator=[str_validator]) @@ -94,9 +94,17 @@ def reset_meta_on_retry(self) -> dict[RQJobMetaField, Any]: @attrs.define(kw_only=True) class RQMetaWithFailureInfo(AbstractRQMeta): # immutable and optional fields - formatted_exception: str | None = attrs.field(validator=[optional_str_validator], default=None) - exc_type: str | None = attrs.field(validator=[optional_str_validator], default=None) - exc_args: Iterable | None = attrs.field(default=None) + formatted_exception: str | None = attrs.field( + validator=[optional_str_validator], + default=None, + on_setattr=_update_value, + ) + exc_type: str | None = attrs.field( + validator=[optional_str_validator], + default=None, + on_setattr=_update_value, + ) + exc_args: Iterable | None = attrs.field(default=None, on_setattr=_update_value) @staticmethod def _get_resettable_fields() -> list[RQJobMetaField]: @@ -140,7 +148,11 @@ class BaseRQMeta(RQMetaWithFailureInfo): ) # import && lambda - progress: float | None = attrs.field(validator=[optional_float_validator], default=None) + progress: float | None = attrs.field( + validator=[optional_float_validator], + default=None, + on_setattr=_update_value, + ) @staticmethod def _get_resettable_fields() -> list[RQJobMetaField]: @@ -186,7 +198,10 @@ def build( @attrs.define(kw_only=True) class ExportRQMeta(BaseRQMeta): - result: ExportResultInfo = attrs.field(converter=lambda d: ExportResultInfo(**d)) + result: ExportResultInfo = attrs.field( + converter=lambda d: ExportResultInfo(**d), + on_setattr=attrs.setters.frozen, + ) @staticmethod def _get_resettable_fields() -> list[RQJobMetaField]: @@ -226,7 +241,9 @@ class ImportRQMeta(BaseRQMeta): status: str = attrs.field( validator=[optional_str_validator], default="", on_setattr=_update_value ) - task_progress: float | None = attrs.field(validator=[optional_float_validator], default=None) + task_progress: float | None = attrs.field( + validator=[optional_float_validator], default=None, on_setattr=_update_value + ) @staticmethod def _get_resettable_fields() -> list[RQJobMetaField]: From 3c9895bd2afa0e2023e7a45728413ca62ed4ad0f Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Sun, 9 Feb 2025 18:23:25 +0100 Subject: [PATCH 07/14] Fix typo --- cvat/apps/dataset_manager/bindings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index e3757b6e33e5..740fe65ccf30 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -2448,7 +2448,7 @@ def load_dataset_data(project_annotation, dataset: dm.Dataset, project_data): job = rq.get_current_job() job_meta = ImportRQMeta.from_job(job) job_meta.status = 'Task from dataset is being created...' - job_meta.progress = (subset_id + job_meta.task_progress or 0.) / len(dataset.subsets().keys()) + job_meta.progress = (subset_id + (job_meta.task_progress or 0.)) / len(dataset.subsets().keys()) job_meta.save() task_fields = { From 1762178c553c451671070f8fb86e6de404e73a48 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 10 Feb 2025 13:00:39 +0100 Subject: [PATCH 08/14] Revert some changes --- cvat/apps/engine/background.py | 579 +++++++++++++++++-------- cvat/apps/engine/mixins.py | 86 ++-- cvat/apps/engine/permissions.py | 96 +--- cvat/apps/engine/rq_job_handler.py | 39 +- cvat/apps/engine/serializers.py | 14 +- cvat/apps/engine/utils.py | 14 +- cvat/apps/engine/views.py | 540 ++++++++++++++++------- tests/python/rest_api/test_jobs.py | 49 ++- tests/python/rest_api/test_projects.py | 150 +++++-- tests/python/rest_api/test_requests.py | 23 +- tests/python/rest_api/test_tasks.py | 81 +++- tests/python/rest_api/utils.py | 191 +++++++- 12 files changed, 1279 insertions(+), 583 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 669636e3ffe3..d7a8121c6bd6 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -7,7 +7,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from datetime import datetime -from typing import Any, Callable, ClassVar, Optional, Union +from typing import Any, Callable, Optional, Union import django_rq from attrs.converters import to_bool @@ -23,8 +23,7 @@ from rq.job import JobStatus as RQJobStatus import cvat.apps.dataset_manager as dm -from cvat.apps.dataset_manager.util import get_export_cache_lock -from cvat.apps.dataset_manager.views import get_export_cache_ttl +from cvat.apps.dataset_manager.util import extend_export_file_lifetime from cvat.apps.engine import models from cvat.apps.engine.backup import ProjectExporter, TaskExporter, create_backup from cvat.apps.engine.cloud_provider import export_resource_to_cloud_storage @@ -39,7 +38,7 @@ Task, ) from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export -from cvat.apps.engine.rq_job_handler import ExportRQMeta, RQId +from cvat.apps.engine.rq_job_handler import RQId, ExportRQMeta from cvat.apps.engine.serializers import RqIdSerializer from cvat.apps.engine.utils import ( build_annotations_file_name, @@ -59,63 +58,43 @@ LOCK_ACQUIRE_TIMEOUT = LOCK_TTL - 5 -class ResourceExportManager(ABC): +class _ResourceExportManager(ABC): QUEUE_NAME = settings.CVAT_QUEUES.EXPORT_DATA.value - SUPPORTED_RESOURCES: ClassVar[set[RequestSubresource]] - SUPPORTEd_SUBRESOURCES: ClassVar[set[RequestSubresource]] def __init__( self, + version: int, db_instance: Union[models.Project, models.Task, models.Job], - request: Request, + *, + export_callback: Callable, ) -> None: """ Args: + version (int): API endpoint version to use. Possible options: 1 or 2 db_instance (Union[models.Project, models.Task, models.Job]): Model instance export_callback (Callable): Main function that will be executed in the background """ + self.version = version self.db_instance = db_instance - self.request = request self.resource = db_instance.__class__.__name__.lower() if self.resource not in self.SUPPORTED_RESOURCES: raise ValueError("Unexpected type of db_instance: {}".format(type(db_instance))) - def initialize_export_args(self, *, export_callback: Callable[..., str]) -> None: self.export_callback = export_callback @abstractmethod - def validate_export_args(self) -> Response | None: - pass - def export(self) -> Response: - assert hasattr(self, "export_callback") - assert hasattr(self, "export_args") - - if invalid_response := self.validate_export_args(): - return invalid_response - - queue: DjangoRQ = django_rq.get_queue(self.QUEUE_NAME) - rq_id = self.build_rq_id() - - # ensure that there is no race condition when processing parallel requests - with get_rq_lock_for_job(queue, rq_id): - rq_job = queue.fetch_job(rq_id) - if response := self.handle_rq_job(rq_job, queue): - return response - self.setup_background_job(queue, rq_id) - - self.send_events() - - serializer = RqIdSerializer(data={"rq_id": rq_id}) - serializer.is_valid(raise_exception=True) - - return Response(serializer.data, status=status.HTTP_202_ACCEPTED) + pass @abstractmethod def setup_background_job(self, queue: DjangoRQ, rq_id: str) -> None: pass - def handle_rq_job(self, rq_job: Optional[RQJob], queue: DjangoRQ) -> Optional[Response]: + @abstractmethod + def _handle_rq_job_v1(self, rq_job: Optional[RQJob], queue: DjangoRQ) -> Optional[Response]: + pass + + def _handle_rq_job_v2(self, rq_job: Optional[RQJob], queue: DjangoRQ) -> Optional[Response]: if not rq_job: return None @@ -139,16 +118,27 @@ def handle_rq_job(self, rq_job: Optional[RQJob], queue: DjangoRQ) -> Optional[Re rq_job.delete() return None + def handle_rq_job(self, rq_job: RQJob | None, queue: DjangoRQ) -> Optional[Response]: + if self.version == 1: + return self._handle_rq_job_v1(rq_job, queue) + elif self.version == 2: + return self._handle_rq_job_v2(rq_job, queue) + + raise ValueError("Unsupported version") + @abstractmethod - def get_download_api_endpoint_view_name(self) -> str: ... + def get_v1_endpoint_view_name(self) -> str: + pass - def make_result_url(self, *, rq_id: str) -> str: - view_name = self.get_download_api_endpoint_view_name() + def make_result_url(self) -> str: + view_name = self.get_v1_endpoint_view_name() result_url = reverse(view_name, args=[self.db_instance.pk], request=self.request) + query_dict = self.request.query_params.copy() + query_dict["action"] = "download" + result_url += "?" + query_dict.urlencode() - return result_url + f"?rq_id={rq_id}" + return result_url - # TODO: move method to the model class (or remove it and use just instance.updated_date) def get_instance_update_time(self) -> datetime: instance_update_time = timezone.localtime(self.db_instance.updated_date) if isinstance(self.db_instance, Project): @@ -161,83 +151,9 @@ def get_instance_update_time(self) -> datetime: instance_update_time = max(tasks_update + [instance_update_time]) return instance_update_time - # TODO: move into a model class def get_timestamp(self, time_: datetime) -> str: return datetime.strftime(time_, "%Y_%m_%d_%H_%M_%S") - @abstractmethod - def get_result_filename(self) -> str: ... - - def validate_rq_id(self, *, rq_id: str | None) -> HttpResponseBadRequest | None: - if not rq_id: - return HttpResponseBadRequest("Missing request id in query parameters") - - parsed_rq_id = RQId.parse(rq_id) - assert parsed_rq_id.action == RequestAction.EXPORT - assert parsed_rq_id.target == RequestTarget(self.resource) - assert parsed_rq_id.identifier == self.db_instance.pk - assert parsed_rq_id.subresource in self.SUPPORTEd_SUBRESOURCES - - @abstractmethod - def build_rq_id(self) -> str: ... - - @abstractmethod - def send_events(self) -> None: ... - - def download_file(self) -> Response: - queue: DjangoRQ = django_rq.get_queue(self.QUEUE_NAME) - rq_id = self.request.query_params.get("rq_id") - - if invalid_response := self.validate_rq_id(rq_id=rq_id): - return invalid_response - - # ensure that there is no race condition when processing parallel requests - with get_rq_lock_for_job(queue, rq_id): - rq_job = queue.fetch_job(rq_id) - - if not rq_job: - return HttpResponseBadRequest("Unknown export request id") - - # define status once to avoid refreshing it on each check - # FUTURE-TODO: get_status will raise InvalidJobOperation exception instead of returning None in one of the next releases - rq_job_status = rq_job.get_status(refresh=False) - - # handle cases where the status is None for some reason - if rq_job_status != RQJobStatus.FINISHED: - return Response(status=status.HTTP_204_NO_CONTENT) - - rq_job_meta = ExportRQMeta.from_job(rq_job) - file_path = rq_job.return_value() - - if not file_path: - return ( - Response( - "A result for exporting job was not found for finished RQ job", - status=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - if rq_job_meta.result.url - else Response(status=status.HTTP_204_NO_CONTENT) - ) - - with get_export_cache_lock( - file_path, ttl=LOCK_TTL, acquire_timeout=LOCK_ACQUIRE_TIMEOUT - ): - if not osp.exists(file_path): - return Response( - "The exported file has expired, please retry exporting", - status=status.HTTP_404_NOT_FOUND, - ) - - # TODO: write redis migration - filename = rq_job_meta.result.filename + osp.splitext(file_path)[1] - - return sendfile( - self.request, - file_path, - attachment=True, - attachment_filename=filename, - ) - def cancel_and_delete(rq_job: RQJob) -> None: # In the case the server is configured with ONE_RUNNING_JOB_IN_QUEUE_PER_USER @@ -246,9 +162,8 @@ def cancel_and_delete(rq_job: RQJob) -> None: rq_job.delete() -class DatasetExportManager(ResourceExportManager): - SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK, RequestTarget.JOB} - SUPPORTEd_SUBRESOURCES = {RequestSubresource.DATASET, RequestSubresource.ANNOTATIONS} +class DatasetExportManager(_ResourceExportManager): + SUPPORTED_RESOURCES = {"project", "task", "job"} @dataclass class ExportArgs: @@ -261,27 +176,32 @@ class ExportArgs: def location(self) -> Location: return self.location_config["location"] - def initialize_export_args( + def __init__( self, + db_instance: Union[models.Project, models.Task, models.Job], + request: Request, + export_callback: Callable, + save_images: Optional[bool] = None, *, - export_callback: Callable | None = None, - save_images: bool | None = None, + version: int = 2, ) -> None: - super().initialize_export_args(export_callback=export_callback) - format_name = self.request.query_params.get("format", "") - filename = self.request.query_params.get("filename", "") + super().__init__(version, db_instance, export_callback=export_callback) + self.request = request + + format_name = request.query_params.get("format", "") + filename = request.query_params.get("filename", "") # can be passed directly when it is initialized based on API request, not query param save_images = ( save_images if save_images is not None - else to_bool(self.request.query_params.get("save_images", False)) + else to_bool(request.query_params.get("save_images", False)) ) try: location_config = get_location_configuration( - db_instance=self.db_instance, - query_params=self.request.query_params, + db_instance=db_instance, + query_params=request.query_params, field_name=StorageType.TARGET, ) except ValueError as ex: @@ -301,7 +221,162 @@ def initialize_export_args( location_config=location_config, ) - def validate_export_args(self): + def _handle_rq_job_v1( + self, + rq_job: Optional[RQJob], + queue: DjangoRQ, + ) -> Optional[Response]: + + def is_result_outdated() -> bool: + return ExportRQMeta.from_job(rq_job).request.timestamp < instance_update_time + + def handle_local_download() -> Response: + with dm.util.get_export_cache_lock( + file_path, ttl=LOCK_TTL, acquire_timeout=LOCK_ACQUIRE_TIMEOUT + ): + if not osp.exists(file_path): + return Response( + "The exported file has expired, please retry exporting", + status=status.HTTP_404_NOT_FOUND, + ) + + filename = self.export_args.filename or build_annotations_file_name( + class_name=self.resource, + identifier=( + self.db_instance.name + if isinstance(self.db_instance, (Task, Project)) + else self.db_instance.id + ), + timestamp=instance_timestamp, + format_name=self.export_args.format, + is_annotation_file=not self.export_args.save_images, + extension=osp.splitext(file_path)[1], + ) + + rq_job.delete() + return sendfile( + self.request, + file_path, + attachment=True, + attachment_filename=filename, + ) + + action = self.request.query_params.get("action") + + if action not in {None, "download"}: + raise serializers.ValidationError( + f"Unexpected action {action!r} specified for the request" + ) + + msg_no_such_job_when_downloading = ( + "Unknown export request id. " + "Please request export first by sending a request without the action=download parameter." + ) + if not rq_job: + return ( + None + if action != "download" + else HttpResponseBadRequest(msg_no_such_job_when_downloading) + ) + + # define status once to avoid refreshing it on each check + # FUTURE-TODO: get_status will raise InvalidJobOperation exception instead of returning None in one of the next releases + rq_job_status = rq_job.get_status(refresh=False) + + # handle cases where the status is None for some reason + if not rq_job_status: + rq_job.delete() + return ( + None + if action != "download" + else HttpResponseBadRequest(msg_no_such_job_when_downloading) + ) + + if action == "download": + if self.export_args.location != Location.LOCAL: + return HttpResponseBadRequest( + 'Action "download" is only supported for a local dataset location' + ) + if rq_job_status not in { + RQJobStatus.FINISHED, + RQJobStatus.FAILED, + RQJobStatus.CANCELED, + RQJobStatus.STOPPED, + }: + return HttpResponseBadRequest("Dataset export has not been finished yet") + + instance_update_time = self.get_instance_update_time() + instance_timestamp = self.get_timestamp(instance_update_time) + + if rq_job_status == RQJobStatus.FINISHED: + if self.export_args.location == Location.CLOUD_STORAGE: + rq_job.delete() + return Response(status=status.HTTP_200_OK) + elif self.export_args.location == Location.LOCAL: + file_path = rq_job.return_value() + + if not file_path: + return Response( + "A result for exporting job was not found for finished RQ job", + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + if action == "download": + return handle_local_download() + else: + with dm.util.get_export_cache_lock( + file_path, + ttl=LOCK_TTL, + acquire_timeout=LOCK_ACQUIRE_TIMEOUT, + ): + if osp.exists(file_path) and not is_result_outdated(): + extend_export_file_lifetime(file_path) + + return Response(status=status.HTTP_201_CREATED) + + cancel_and_delete(rq_job) + return None + else: + raise NotImplementedError( + f"Export to {self.export_args.location} location is not implemented yet" + ) + elif rq_job_status == RQJobStatus.FAILED: + exc_info = ExportRQMeta.from_job(rq_job).formatted_exception or str(rq_job.exc_info) + rq_job.delete() + return Response(exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + elif ( + rq_job_status == RQJobStatus.DEFERRED + and rq_job.id not in queue.deferred_job_registry.get_job_ids() + ): + # Sometimes jobs can depend on outdated jobs in the deferred jobs registry. + # They can be fetched by their specific ids, but are not listed by get_job_ids(). + # Supposedly, this can happen because of the server restarts + # (potentially, because the redis used for the queue is in memory). + # Another potential reason is canceling without enqueueing dependents. + # Such dependencies are never removed or finished, + # as there is no TTL for deferred jobs, + # so the current job can be blocked indefinitely. + cancel_and_delete(rq_job) + return None + + elif rq_job_status in {RQJobStatus.CANCELED, RQJobStatus.STOPPED}: + rq_job.delete() + return ( + None + if action != "download" + else Response( + "Export was cancelled, please request it one more time", + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + ) + + if is_result_outdated(): + cancel_and_delete(rq_job) + return None + + return Response(RqIdSerializer({"rq_id": rq_job.id}).data, status=status.HTTP_202_ACCEPTED) + + def export(self) -> Response: format_desc = {f.DISPLAY_NAME: f for f in dm.views.get_export_formats()}.get( self.export_args.format ) @@ -310,8 +385,8 @@ def validate_export_args(self): elif not format_desc.ENABLED: return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED) - def build_rq_id(self): - return RQId( + queue: DjangoRQ = django_rq.get_queue(self.QUEUE_NAME) + rq_id = RQId( RequestAction.EXPORT, RequestTarget(self.resource), self.db_instance.pk, @@ -324,7 +399,13 @@ def build_rq_id(self): user_id=self.request.user.id, ).render() - def send_events(self): + # ensure that there is no race condition when processing parallel requests + with get_rq_lock_for_job(queue, rq_id): + rq_job = queue.fetch_job(rq_id) + if response := self.handle_rq_job(rq_job, queue): + return response + self.setup_background_job(queue, rq_id) + handle_dataset_export( self.db_instance, format_name=self.export_args.format, @@ -332,6 +413,11 @@ def send_events(self): save_images=self.export_args.save_images, ) + serializer = RqIdSerializer(data={"rq_id": rq_id}) + serializer.is_valid(raise_exception=True) + + return Response(serializer.data, status=status.HTTP_202_ACCEPTED) + def setup_background_job( self, queue: DjangoRQ, @@ -344,7 +430,7 @@ def setup_background_job( except Exception: server_address = None - cache_ttl = get_export_cache_ttl(self.db_instance) + cache_ttl = dm.views.get_export_cache_ttl(self.db_instance) user_id = self.request.user.id @@ -366,11 +452,8 @@ def setup_background_job( request=self.request, is_default=self.export_args.location_config["is_default"], ) - ###----------------------------------------### instance_update_time = self.get_instance_update_time() instance_timestamp = self.get_timestamp(instance_update_time) - # todo: think how improve it - # TODO: check that there is no filename.zip.zip in case when filename is specified filename_pattern = build_annotations_file_name( class_name=self.db_instance.__class__.__name__, identifier=( @@ -382,7 +465,6 @@ def setup_background_job( format_name=self.export_args.format, is_annotation_file=not self.export_args.save_images, ) - ###----------------------------------------### func = export_resource_to_cloud_storage func_args = ( db_storage, @@ -392,14 +474,13 @@ def setup_background_job( ) + func_args else: db_storage = None - result_url = self.make_result_url(rq_id=rq_id) + result_url = self.make_result_url() with get_rq_lock_by_user(queue, user_id): - result_filename = self.get_result_filename() meta = ExportRQMeta.build( request=self.request, db_obj=self.db_instance, - result_filename=result_filename, + # result_filename=result_filename, result_url=result_url, ) queue.enqueue_call( @@ -415,31 +496,25 @@ def setup_background_job( failure_ttl=cache_ttl.total_seconds(), ) - def get_result_filename(self) -> str: - filename = self.export_args.filename - - if filename: - return osp.splitext(filename)[0] - - instance_update_time = self.get_instance_update_time() - instance_timestamp = self.get_timestamp(instance_update_time) - filename = build_annotations_file_name( - class_name=self.resource, - identifier=self.db_instance.id, - timestamp=instance_timestamp, - format_name=self.export_args.format, - is_annotation_file=not self.export_args.save_images, - ) + def get_v1_endpoint_view_name(self) -> str: + """ + Get view name of the endpoint for the first API version - return filename + Possible view names: + - project-dataset + - task|job-dataset-export + - project|task|job-annotations + """ + if self.export_args.save_images: + template = "{}-dataset" + ("-export" if self.resource != "project" else "") + else: + template = "{}-annotations" - def get_download_api_endpoint_view_name(self) -> str: - return f"{self.resource}-download-dataset" + return template.format(self.resource) -class BackupExportManager(ResourceExportManager): - SUPPORTED_RESOURCES = {RequestTarget.PROJECT, RequestTarget.TASK} - SUPPORTEd_SUBRESOURCES = {RequestSubresource.BACKUP} +class BackupExportManager(_ResourceExportManager): + SUPPORTED_RESOURCES = {"project", "task"} @dataclass class ExportArgs: @@ -450,9 +525,17 @@ class ExportArgs: def location(self) -> Location: return self.location_config["location"] - def initialize_export_args(self) -> None: - super().initialize_export_args(export_callback=create_backup) - filename = self.request.query_params.get("filename", "") + def __init__( + self, + db_instance: Union[models.Project, models.Task], + request: Request, + *, + version: int = 2, + ) -> None: + super().__init__(version, db_instance, export_callback=create_backup) + self.request = request + + filename = request.query_params.get("filename", "") location_config = get_location_configuration( db_instance=self.db_instance, @@ -461,28 +544,143 @@ def initialize_export_args(self) -> None: ) self.export_args = self.ExportArgs(filename, location_config) - def validate_export_args(self): - return + def _handle_rq_job_v1( + self, + rq_job: Optional[RQJob], + queue: DjangoRQ, + ) -> Optional[Response]: - def get_result_filename(self) -> str: - filename = self.export_args.filename + def is_result_outdated() -> bool: + return ExportRQMeta.from_job(rq_job).request.timestamp < last_instance_update_time - if filename: - return osp.splitext(filename)[0] + last_instance_update_time = timezone.localtime(self.db_instance.updated_date) + timestamp = self.get_timestamp(last_instance_update_time) - instance_update_time = self.get_instance_update_time() - instance_timestamp = self.get_timestamp(instance_update_time) + action = self.request.query_params.get("action") + if action not in (None, "download"): + raise serializers.ValidationError( + f"Unexpected action {action!r} specified for the request" + ) - filename = build_backup_file_name( - class_name=self.resource, - identifier=self.db_instance.name, - timestamp=instance_timestamp, + msg_no_such_job_when_downloading = ( + "Unknown export request id. " + "Please request export first by sending a request without the action=download parameter." ) + if not rq_job: + return ( + None + if action != "download" + else HttpResponseBadRequest(msg_no_such_job_when_downloading) + ) + + # define status once to avoid refreshing it on each check + # FUTURE-TODO: get_status will raise InvalidJobOperation exception instead of None in one of the next releases + rq_job_status = rq_job.get_status(refresh=False) + + # handle cases where the status is None for some reason + if not rq_job_status: + rq_job.delete() + return ( + None + if action != "download" + else HttpResponseBadRequest(msg_no_such_job_when_downloading) + ) + + if action == "download": + if self.export_args.location != Location.LOCAL: + return HttpResponseBadRequest( + 'Action "download" is only supported for a local backup location' + ) + if rq_job_status not in { + RQJobStatus.FINISHED, + RQJobStatus.FAILED, + RQJobStatus.CANCELED, + RQJobStatus.STOPPED, + }: + return HttpResponseBadRequest("Backup export has not been finished yet") + + if rq_job_status == RQJobStatus.FINISHED: + if self.export_args.location == Location.CLOUD_STORAGE: + rq_job.delete() + return Response(status=status.HTTP_200_OK) + elif self.export_args.location == Location.LOCAL: + file_path = rq_job.return_value() + + if not file_path: + return Response( + "Export is completed, but has no results", + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + if action == "download": + with dm.util.get_export_cache_lock( + file_path, ttl=LOCK_TTL, acquire_timeout=LOCK_ACQUIRE_TIMEOUT + ): + if not os.path.exists(file_path): + return Response( + "The backup file has been expired, please retry backing up", + status=status.HTTP_404_NOT_FOUND, + ) + + filename = self.export_args.filename or build_backup_file_name( + class_name=self.resource, + identifier=self.db_instance.name, + timestamp=timestamp, + extension=os.path.splitext(file_path)[1], + ) + + rq_job.delete() + return sendfile( + self.request, file_path, attachment=True, attachment_filename=filename + ) + with dm.util.get_export_cache_lock( + file_path, ttl=LOCK_TTL, acquire_timeout=LOCK_ACQUIRE_TIMEOUT + ): + if osp.exists(file_path) and not is_result_outdated(): + extend_export_file_lifetime(file_path) + return Response(status=status.HTTP_201_CREATED) + + cancel_and_delete(rq_job) + return None + else: + raise NotImplementedError( + f"Export to {self.export_args.location} location is not implemented yet" + ) + elif rq_job_status == RQJobStatus.FAILED: + exc_info = ExportRQMeta.from_job(rq_job).formatted_exception or str(rq_job.exc_info) + rq_job.delete() + return Response(exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + elif ( + rq_job_status == RQJobStatus.DEFERRED + and rq_job.id not in queue.deferred_job_registry.get_job_ids() + ): + # Sometimes jobs can depend on outdated jobs in the deferred jobs registry. + # They can be fetched by their specific ids, but are not listed by get_job_ids(). + # Supposedly, this can happen because of the server restarts + # (potentially, because the redis used for the queue is in memory). + # Another potential reason is canceling without enqueueing dependents. + # Such dependencies are never removed or finished, + # as there is no TTL for deferred jobs, + # so the current job can be blocked indefinitely. + cancel_and_delete(rq_job) + return None + + elif rq_job_status in {RQJobStatus.CANCELED, RQJobStatus.STOPPED}: + rq_job.delete() + return ( + None + if action != "download" + else Response( + "Export was cancelled, please request it one more time", + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + ) - return filename + return Response(RqIdSerializer({"rq_id": rq_job.id}).data, status=status.HTTP_202_ACCEPTED) - def build_rq_id(self): - return RQId( + def export(self) -> Response: + queue: DjangoRQ = django_rq.get_queue(self.QUEUE_NAME) + rq_id = RQId( RequestAction.EXPORT, RequestTarget(self.resource), self.db_instance.pk, @@ -490,6 +688,18 @@ def build_rq_id(self): user_id=self.request.user.id, ).render() + # ensure that there is no race condition when processing parallel requests + with get_rq_lock_for_job(queue, rq_id): + rq_job = queue.fetch_job(rq_id) + if response := self.handle_rq_job(rq_job, queue): + return response + self.setup_background_job(queue, rq_id) + + serializer = RqIdSerializer(data={"rq_id": rq_id}) + serializer.is_valid(raise_exception=True) + + return Response(serializer.data, status=status.HTTP_202_ACCEPTED) + def setup_background_job( self, queue: DjangoRQ, @@ -544,19 +754,17 @@ def setup_background_job( self.export_callback, ) + func_args else: - result_url = self.make_result_url(rq_id=rq_id) + result_url = self.make_result_url() user_id = self.request.user.id with get_rq_lock_by_user(queue, user_id): - result_filename = self.get_result_filename() meta = ExportRQMeta.build( request=self.request, db_obj=self.db_instance, - result_filename=result_filename, + # result_filename=result_filename, result_url=result_url, ) - queue.enqueue_call( func=func, args=func_args, @@ -567,8 +775,7 @@ def setup_background_job( failure_ttl=cache_ttl.total_seconds(), ) - def get_download_api_endpoint_view_name(self) -> str: - return f"{self.resource}-download-backup" + def get_v1_endpoint_view_name(self) -> str: + """Get view name of the endpoint for the first API version""" - def send_events(self): - pass + return f"{self.resource}-export-backup" diff --git a/cvat/apps/engine/mixins.py b/cvat/apps/engine/mixins.py index 7a881a9629be..6f600bd4f5dd 100644 --- a/cvat/apps/engine/mixins.py +++ b/cvat/apps/engine/mixins.py @@ -13,13 +13,14 @@ from pathlib import Path from tempfile import NamedTemporaryFile from textwrap import dedent -from typing import Callable +from typing import Any, Callable, Optional from unittest import mock from urllib.parse import urljoin import django_rq from attr.converters import to_bool from django.conf import settings +from django.http import HttpRequest from drf_spectacular.types import OpenApiTypes from drf_spectacular.utils import OpenApiParameter, OpenApiResponse, extend_schema from rest_framework import mixins, status @@ -32,7 +33,6 @@ from cvat.apps.engine.handlers import clear_import_cache from cvat.apps.engine.location import StorageType, get_location_configuration from cvat.apps.engine.log import ServerLogManager -from cvat.apps.engine.middleware import PatchedRequest from cvat.apps.engine.models import Location, RequestAction, RequestSubresource, RequestTarget from cvat.apps.engine.rq_job_handler import RQId from cvat.apps.engine.serializers import DataSerializer, RqIdSerializer @@ -416,7 +416,27 @@ def partial_update(self, request, *args, **kwargs): with mock.patch.object(self, 'update', new=self._update, create=True): return mixins.UpdateModelMixin.partial_update(self, request=request, *args, **kwargs) + class DatasetMixin: + def export_dataset_v1( + self, + request, + save_images: bool, + *, + get_data: Optional[Callable[[int], dict[str, Any]]] = None, + ) -> Response: + if request.query_params.get("format"): + callback = self.get_export_callback(save_images) + + dataset_export_manager = DatasetExportManager(self._object, request, callback, save_images=save_images, version=1) + return dataset_export_manager.export() + + if not get_data: + return Response("Format is not specified", status=status.HTTP_400_BAD_REQUEST) + + data = get_data(self._object.pk) + return Response(data) + @extend_schema( summary='Initialize process to export resource as a dataset in a specific format', description=dedent("""\ @@ -446,33 +466,14 @@ class DatasetMixin: }, ) @action(detail=True, methods=['POST'], serializer_class=None, url_path='dataset/export') - def initiate_dataset_export(self, request: PatchedRequest, pk: int): + def export_dataset_v2(self, request: HttpRequest, pk: int): self._object = self.get_object() # force call of check_object_permissions() save_images = is_dataset_export(request) callback = self.get_export_callback(save_images) - export_manager = DatasetExportManager(self._object, request) - export_manager.initialize_export_args(export_callback=callback, save_images=save_images) - - return export_manager.export() - - @extend_schema(summary='Download a prepared dataset file', - parameters=[ - OpenApiParameter('rq_id', description='Request ID', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=True), - ], - responses={ - '200': OpenApiResponse(description='Download of file started'), - '204': OpenApiResponse(description='No prepared dataset file related with provider request ID'), - }, - exclude=True, # private API endpoint that should be used only as result_url - ) - @action(methods=['GET'], detail=True, url_path='dataset/download') - def download_dataset(self, request: PatchedRequest, pk: int): - obj = self.get_object() # force to call check_object_permissions - export_manager = DatasetExportManager(obj, request) - return export_manager.download_file() + dataset_export_manager = DatasetExportManager(self._object, request, callback, save_images=save_images, version=2) + return dataset_export_manager.export() # FUTURE-TODO: migrate to new API def import_annotations(self, request, db_obj, import_func, rq_func, rq_id_factory): @@ -507,8 +508,19 @@ def import_annotations(self, request, db_obj, import_func, rq_func, rq_id_factor class BackupMixin: + def export_backup_v1(self, request: HttpRequest) -> Response: + db_object = self.get_object() # force to call check_object_permissions + + export_backup_manager = BackupExportManager(db_object, request, version=1) + response = export_backup_manager.export() + + if request.query_params.get('action') != 'download': + response.headers['Deprecated'] = True + + return response + # FUTURE-TODO: migrate to new API - def import_backup_v1(self, request: PatchedRequest, import_func: Callable) -> Response: + def import_backup_v1(self, request: HttpRequest, import_func: Callable) -> Response: location = request.query_params.get("location", Location.LOCAL) if location == Location.CLOUD_STORAGE: file_name = request.query_params.get("filename", "") @@ -542,29 +554,11 @@ def import_backup_v1(self, request: PatchedRequest, import_func: Callable) -> Re }, ) @action(detail=True, methods=['POST'], serializer_class=None, url_path='backup/export') - def initiate_backup_export(self, request: PatchedRequest, pk: int): + def export_backup_v2(self, request: HttpRequest, pk: int): db_object = self.get_object() # force to call check_object_permissions - export_manager = BackupExportManager(db_object, request) - export_manager.initialize_export_args() - return export_manager.export() - - @extend_schema(summary='Download a prepared backup file', - parameters=[ - OpenApiParameter('rq_id', description='Request ID', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=True), - ], - responses={ - '200': OpenApiResponse(description='Download of file started'), - '204': OpenApiResponse(description='No prepared backup file related with provider request ID'), - }, - exclude=True, # private API endpoint that should be used only as result_url - ) - @action(methods=['GET'], detail=True, url_path='backup/download') - def download_backup(self, request: PatchedRequest, pk: int): - obj = self.get_object() # force to call check_object_permissions - export_manager = BackupExportManager(obj, request) - return export_manager.download_file() + export_backup_manager = BackupExportManager(db_object, request, version=2) + return export_backup_manager.export() class CsrfWorkaroundMixin(APIView): diff --git a/cvat/apps/engine/permissions.py b/cvat/apps/engine/permissions.py index 056aad044ac3..d26a700f0da7 100644 --- a/cvat/apps/engine/permissions.py +++ b/cvat/apps/engine/permissions.py @@ -12,8 +12,7 @@ from rest_framework.exceptions import PermissionDenied, ValidationError from rq.job import Job as RQJob -from cvat.apps.engine.middleware import PatchedRequest -from cvat.apps.engine.rq_job_handler import RQId, is_rq_job_owner +from cvat.apps.engine.rq_job_handler import is_rq_job_owner from cvat.apps.engine.utils import is_dataset_export from cvat.apps.iam.permissions import ( OpenPolicyAgentPermission, @@ -23,19 +22,7 @@ ) from cvat.apps.organizations.models import Organization -from .models import ( - AnnotationGuide, - CloudStorage, - Comment, - Issue, - Job, - Label, - Project, - RequestAction, - RequestTarget, - Task, - User, -) +from .models import AnnotationGuide, CloudStorage, Comment, Issue, Job, Label, Project, Task, User def _get_key(d: dict[str, Any], key_path: Union[str, Sequence[str]]) -> Optional[Any]: @@ -240,10 +227,9 @@ class Scopes(StrEnum): EXPORT_DATASET = 'export:dataset' EXPORT_BACKUP = 'export:backup' IMPORT_BACKUP = 'import:backup' - DOWNLOAD_EXPORTED_FILE = 'retrieve:exported_file' @classmethod - def create(cls, request: PatchedRequest, view, obj, iam_context): + def create(cls, request, view, obj, iam_context): permissions = [] if view.basename == 'project': assignee_id = request.data.get('assignee_id') or request.data.get('assignee') @@ -252,21 +238,6 @@ def create(cls, request: PatchedRequest, view, obj, iam_context): assignee_id=assignee_id) permissions.append(self) - - if scope == cls.Scopes.DOWNLOAD_EXPORTED_FILE: - # check that a user still has rights to export project dataset|backup - rq_id = request.query_params.get('rq_id') - assert rq_id - parsed_rq_id = RQId.parse(rq_id) - if ( - # TODO: move these checks to view class - parsed_rq_id.action != RequestAction.EXPORT - or parsed_rq_id.target != RequestTarget.PROJECT - or parsed_rq_id.identifier != obj.id - or parsed_rq_id.user_id != iam_context.get('user_id', request.user.id) - ): - raise PermissionDenied('You don\'t have permission to perform this action') - if view.action == 'tasks': perm = TaskPermission.create_scope_list(request, iam_context) permissions.append(perm) @@ -312,19 +283,15 @@ def get_scopes(request, view, obj): ('dataset', 'POST'): Scopes.IMPORT_DATASET, ('append_dataset_chunk', 'HEAD'): Scopes.IMPORT_DATASET, ('append_dataset_chunk', 'PATCH'): Scopes.IMPORT_DATASET, - ('initiate_dataset_export', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, - ('initiate_backup_export', 'POST'): Scopes.EXPORT_BACKUP, + ('annotations', 'GET'): Scopes.EXPORT_ANNOTATIONS, + ('dataset', 'GET'): Scopes.IMPORT_DATASET if request.query_params.get('action') == 'import_status' else Scopes.EXPORT_DATASET, + ('export_dataset_v2', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, + ('export_backup', 'GET'): Scopes.EXPORT_BACKUP, + ('export_backup_v2', 'POST'): Scopes.EXPORT_BACKUP, ('import_backup', 'POST'): Scopes.IMPORT_BACKUP, ('append_backup_chunk', 'PATCH'): Scopes.IMPORT_BACKUP, ('append_backup_chunk', 'HEAD'): Scopes.IMPORT_BACKUP, ('preview', 'GET'): Scopes.VIEW, - ('download_dataset', 'GET'): Scopes.DOWNLOAD_EXPORTED_FILE, - ('download_backup', 'GET'): Scopes.DOWNLOAD_EXPORTED_FILE, - # FUTURE-TODO: delete this after dropping support for deprecated API - ('annotations', 'GET'): Scopes.EXPORT_ANNOTATIONS, - ('dataset', 'GET'): Scopes.IMPORT_DATASET if request.query_params.get('action') == 'import_status' else Scopes.EXPORT_DATASET, - ('export_backup', 'GET'): Scopes.EXPORT_BACKUP, - }[(view.action, request.method)] scopes = [] @@ -432,7 +399,6 @@ class Scopes(StrEnum): EXPORT_BACKUP = 'export:backup' VIEW_VALIDATION_LAYOUT = 'view:validation_layout' UPDATE_VALIDATION_LAYOUT = 'update:validation_layout' - DOWNLOAD_EXPORTED_FILE = 'retrieve:exported_file' @classmethod def create(cls, request, view, obj, iam_context): @@ -455,20 +421,6 @@ def create(cls, request, view, obj, iam_context): elif scope == __class__.Scopes.UPDATE_OWNER: params['owner_id'] = owner - elif scope == cls.Scopes.DOWNLOAD_EXPORTED_FILE: - # check that a user still has rights to export task dataset|backup - rq_id = request.query_params.get('rq_id') - assert rq_id - parsed_rq_id = RQId.parse(rq_id) - if ( - # TODO: move these checks to view class - parsed_rq_id.action != RequestAction.EXPORT - or parsed_rq_id.target != RequestTarget.TASK - or parsed_rq_id.identifier != obj.id - or parsed_rq_id.user_id != iam_context.get('user_id', request.user.id) - ): - raise PermissionDenied('You don\'t have permission to perform this action') - self = cls.create_base_perm(request, view, scope, iam_context, obj, **params) permissions.append(self) @@ -541,7 +493,8 @@ def get_scopes(request, view, obj) -> list[Scopes]: ('annotations', 'POST'): Scopes.IMPORT_ANNOTATIONS, ('append_annotations_chunk', 'PATCH'): Scopes.UPDATE_ANNOTATIONS, ('append_annotations_chunk', 'HEAD'): Scopes.UPDATE_ANNOTATIONS, - ('initiate_dataset_export', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, + ('dataset_export', 'GET'): Scopes.EXPORT_DATASET, + ('export_dataset_v2', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, ('metadata', 'GET'): Scopes.VIEW_METADATA, ('metadata', 'PATCH'): Scopes.UPDATE_METADATA, ('data', 'GET'): Scopes.VIEW_DATA, @@ -552,15 +505,11 @@ def get_scopes(request, view, obj) -> list[Scopes]: ('import_backup', 'POST'): Scopes.IMPORT_BACKUP, ('append_backup_chunk', 'PATCH'): Scopes.IMPORT_BACKUP, ('append_backup_chunk', 'HEAD'): Scopes.IMPORT_BACKUP, - ('initiate_backup_export', 'POST'): Scopes.EXPORT_BACKUP, + ('export_backup', 'GET'): Scopes.EXPORT_BACKUP, + ('export_backup_v2', 'POST'): Scopes.EXPORT_BACKUP, ('preview', 'GET'): Scopes.VIEW, ('validation_layout', 'GET'): Scopes.VIEW_VALIDATION_LAYOUT, ('validation_layout', 'PATCH'): Scopes.UPDATE_VALIDATION_LAYOUT, - ('download_dataset', 'GET'): Scopes.DOWNLOAD_EXPORTED_FILE, - ('download_backup', 'GET'): Scopes.DOWNLOAD_EXPORTED_FILE, - # FUTURE-TODO: deprecated API - ('dataset_export', 'GET'): Scopes.EXPORT_DATASET, - ('export_backup', 'GET'): Scopes.EXPORT_BACKUP, }[(view.action, request.method)] scopes = [] @@ -680,7 +629,6 @@ class Scopes(StrEnum): UPDATE_METADATA = 'update:metadata' VIEW_VALIDATION_LAYOUT = 'view:validation_layout' UPDATE_VALIDATION_LAYOUT = 'update:validation_layout' - DOWNLOAD_EXPORTED_FILE = 'retrieve:exported_file' @classmethod def create(cls, request, view, obj, iam_context): @@ -704,20 +652,6 @@ def create(cls, request, view, obj, iam_context): request, task, iam_context=iam_context )) - elif scope == cls.Scopes.DOWNLOAD_EXPORTED_FILE: - # check that a user still has rights to export task dataset|backup - rq_id = request.query_params.get('rq_id') - assert rq_id - parsed_rq_id = RQId.parse(rq_id) - if ( - # TODO: move these checks to view class - parsed_rq_id.action != RequestAction.EXPORT - or parsed_rq_id.target != RequestTarget.JOB - or parsed_rq_id.identifier != obj.id - or parsed_rq_id.user_id != iam_context.get('user_id', request.user.id) - ): - raise PermissionDenied('You don\'t have permission to perform this action') - self = cls.create_base_perm(request, view, scope, iam_context, obj, **scope_params) permissions.append(self) @@ -787,13 +721,11 @@ def get_scopes(request, view, obj): ('metadata','GET'): Scopes.VIEW_METADATA, ('metadata','PATCH'): Scopes.UPDATE_METADATA, ('issues', 'GET'): Scopes.VIEW, - ('initiate_dataset_export', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, + ('dataset_export', 'GET'): Scopes.EXPORT_DATASET, + ('export_dataset_v2', 'POST'): Scopes.EXPORT_DATASET if is_dataset_export(request) else Scopes.EXPORT_ANNOTATIONS, ('preview', 'GET'): Scopes.VIEW, ('validation_layout', 'GET'): Scopes.VIEW_VALIDATION_LAYOUT, ('validation_layout', 'PATCH'): Scopes.UPDATE_VALIDATION_LAYOUT, - ('download_dataset', 'GET'): Scopes.DOWNLOAD_EXPORTED_FILE, - # deprecated API - ('dataset_export', 'GET'): Scopes.EXPORT_DATASET, }[(view.action, request.method)] scopes = [] diff --git a/cvat/apps/engine/rq_job_handler.py b/cvat/apps/engine/rq_job_handler.py index 7efc63b49065..9caba569a829 100644 --- a/cvat/apps/engine/rq_job_handler.py +++ b/cvat/apps/engine/rq_job_handler.py @@ -51,14 +51,14 @@ class RequestInfo: def to_dict(self) -> dict[str, Any]: return asdict(self) +# FUTURE-TODO: uncomment +# @attrs.frozen(kw_only=True) +# class ExportResultInfo: +# url: str | None = attrs.field(validator=[optional_str_validator]) +# filename: str = attrs.field(validator=[str_validator]) -@attrs.frozen(kw_only=True) -class ExportResultInfo: - url: str | None = attrs.field(validator=[optional_str_validator]) - filename: str = attrs.field(validator=[str_validator]) - - def to_dict(self) -> dict[str, Any]: - return asdict(self) +# def to_dict(self) -> dict[str, Any]: +# return asdict(self) @attrs.define @@ -183,11 +183,11 @@ def build( id=getattr(user, "id", None), username=getattr(user, "username", None), email=getattr(user, "email", None), - ).to_dict(), + ), request=RequestInfo( uuid=request.uuid, timestamp=timezone.localtime(), - ).to_dict(), + ), org_id=oid, org_slug=oslug, project_id=pid, @@ -198,10 +198,12 @@ def build( @attrs.define(kw_only=True) class ExportRQMeta(BaseRQMeta): - result: ExportResultInfo = attrs.field( - converter=lambda d: ExportResultInfo(**d), - on_setattr=attrs.setters.frozen, - ) + result_url: str | None = attrs.field(validator=[optional_str_validator]) + # FUTURE-TODO: uncomment + # result: ExportResultInfo = attrs.field( + # converter=lambda d: ExportResultInfo(**d), + # on_setattr=attrs.setters.frozen, + # ) @staticmethod def _get_resettable_fields() -> list[RQJobMetaField]: @@ -216,16 +218,17 @@ def build( request: PatchedRequest, db_obj: Model | None, result_url: str | None, - result_filename: str, + # result_filename: str, ): base_meta = BaseRQMeta.build(request=request, db_obj=db_obj) return cls( **base_meta, - result=ExportResultInfo( - filename=result_filename, - url=result_url, - ).to_dict(), + result_url=result_url, + # result=ExportResultInfo( + # filename=result_filename, + # url=result_url, + # ), ).to_dict() diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index f4508b142e0d..ba4ba079b6a9 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -1811,6 +1811,17 @@ def to_internal_value(self, data): def to_representation(self, instance): return instance.file if instance else instance +class RqStatusSerializer(serializers.Serializer): + state = serializers.ChoiceField(choices=[ + "Queued", "Started", "Finished", "Failed"]) + message = serializers.CharField(allow_blank=True, default="") + progress = serializers.FloatField(max_value=100, default=0) + + def __init__(self, instance=None, data=..., **kwargs): + warnings.warn("RqStatusSerializer is deprecated, " + "use cvat.apps.engine.serializers.RequestSerializer instead", DeprecationWarning) + super().__init__(instance, data, **kwargs) + class RqIdSerializer(serializers.Serializer): rq_id = serializers.CharField(help_text="Request id") @@ -3595,7 +3606,8 @@ def to_representation(self, rq_job: RQJob) -> dict[str, Any]: if representation["status"] == RQJobStatus.FINISHED: if rq_job.parsed_rq_id.action == models.RequestAction.EXPORT: - representation["result_url"] = ExportRQMeta.from_job(rq_job).result.url + # representation["result_url"] = ExportRQMeta.from_job(rq_job).result.url + representation["result_url"] = ExportRQMeta.from_job(rq_job).result_url if ( rq_job.parsed_rq_id.action == models.RequestAction.IMPORT diff --git a/cvat/apps/engine/utils.py b/cvat/apps/engine/utils.py index b59dc1e5c999..c6a06d5e0641 100644 --- a/cvat/apps/engine/utils.py +++ b/cvat/apps/engine/utils.py @@ -342,10 +342,11 @@ def build_backup_file_name( class_name: str, identifier: str | int, timestamp: str, + extension: str = "{}", ) -> str: - # "__backup_" - return "{}_{}_backup_{}".format( - class_name, identifier, timestamp, + # "__backup_.zip" + return "{}_{}_backup_{}{}".format( + class_name, identifier, timestamp, extension, ).lower() def build_annotations_file_name( @@ -355,11 +356,12 @@ def build_annotations_file_name( timestamp: str, format_name: str, is_annotation_file: bool = True, + extension: str = "{}", ) -> str: - # "____" - return "{}_{}_{}_{}_{}".format( + # "____.zip" + return "{}_{}_{}_{}_{}{}".format( class_name, identifier, 'annotations' if is_annotation_file else 'dataset', - timestamp, format_name + timestamp, format_name, extension, ).lower() diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 6822453ec7a9..9e06288d3df8 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -21,7 +21,7 @@ from pathlib import Path from tempfile import NamedTemporaryFile from types import SimpleNamespace -from typing import Any, Callable, Optional, Type, Union, cast +from typing import Any, Callable, Optional, Union, cast import django_rq from attr.converters import to_bool @@ -31,13 +31,7 @@ from django.db import models as django_models from django.db import transaction from django.db.models.query import Prefetch -from django.http import ( - HttpRequest, - HttpResponse, - HttpResponseBadRequest, - HttpResponseGone, - HttpResponseNotFound, -) +from django.http import HttpRequest, HttpResponse, HttpResponseBadRequest, HttpResponseNotFound from django.utils import timezone from django.utils.decorators import method_decorator from django.views.decorators.cache import never_cache @@ -59,7 +53,6 @@ from rest_framework.parsers import MultiPartParser from rest_framework.permissions import SAFE_METHODS from rest_framework.response import Response -from rest_framework.reverse import reverse from rest_framework.settings import api_settings from rq.job import Job as RQJob from rq.job import JobStatus as RQJobStatus @@ -88,7 +81,6 @@ ) from cvat.apps.engine.location import StorageType, get_location_configuration from cvat.apps.engine.media_extractors import get_mime -from cvat.apps.engine.middleware import PatchedRequest from cvat.apps.engine.mixins import ( BackupMixin, CsrfWorkaroundMixin, @@ -128,12 +120,7 @@ get_cloud_storage_for_import_or_export, get_iam_context, ) -from cvat.apps.engine.rq_job_handler import ( - ImportRQMeta, - RQId, - RQMetaWithFailureInfo, - is_rq_job_owner, -) +from cvat.apps.engine.rq_job_handler import RQId, is_rq_job_owner, ImportRQMeta, RQMetaWithFailureInfo from cvat.apps.engine.serializers import ( AboutSerializer, AnnotationFileSerializer, @@ -167,6 +154,7 @@ ProjectWriteSerializer, RequestSerializer, RqIdSerializer, + RqStatusSerializer, TaskFileSerializer, TaskReadSerializer, TaskValidationLayoutReadSerializer, @@ -179,6 +167,7 @@ define_dependent_job, get_rq_lock_by_user, import_resource_with_clean_up_after, + parse_exception_message, process_failed_job, sendfile, ) @@ -403,49 +392,46 @@ def get_export_callback(self, save_images: bool) -> Callable: @extend_schema(methods=['GET'], summary='Export a project as a dataset / Check dataset import status', description=textwrap.dedent(""" - Utilizing this endpoint: - - to export project dataset in a specific format - - to check the status of the process of importing a project dataset from a file - is deprecated. + To check the status of the process of importing a project dataset from a file: + + After initiating the dataset upload, you will receive an rq_id parameter. + Make sure to include this parameter as a query parameter in your subsequent + GET /api/projects/id/dataset requests to track the status of the dataset import. + Also you should specify action parameter: action=import_status. + Deprecation warning: + Utilizing this endpoint to export project dataset in + a specific format will be deprecated in one of the next releases. Consider using new API: - POST /api/projects//dataset/export/?save_images=True to initiate export process - - GET /api/requests/ to check process status - - GET \{result_url\} to download a prepared file, - Where: - - `rq_id` can be found in the response on initializing request - - `result_url` can be found in the response on checking status request + - GET /api/requests/ to check process status, + where rq_id is request id returned on initializing request """), parameters=[ OpenApiParameter('format', description='Desired output format name\n' 'You can get the list of supported formats at:\n/server/annotation/formats', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - deprecated=True - ), + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), OpenApiParameter('filename', description='Desired output file name', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - deprecated=True - ), + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), OpenApiParameter('action', description='Used to start downloading process locally after annotation file has been created', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, enum=['download', 'import_status'], - deprecated=True - ), + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, enum=['download', 'import_status']), OpenApiParameter('location', description='Where need to save downloaded dataset', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - enum=Location.list(), - deprecated=True - ), + enum=Location.list()), OpenApiParameter('cloud_storage_id', description='Storage id', - location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False, - deprecated=True - ), - OpenApiParameter('rq_id', description='Request ID', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=True), + location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), + OpenApiParameter('use_default_location', description='Use the location that was configured in project to import dataset', + location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, + default=True, deprecated=True), + OpenApiParameter('rq_id', description='rq id', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), ], - deprecated=True, + # deprecated=True, FUTURE-TODO: uncomment when new API for result downloading will be implemented responses={ - '301': OpenApiResponse(description='Redirects to the new API to check status of import process'), - '410': OpenApiResponse(description='API endpoint no longer supports exporting datasets'), + '200': OpenApiResponse(OpenApiTypes.BINARY, description='Download of file started'), + '201': OpenApiResponse(description='Output file is ready for downloading'), + '202': OpenApiResponse(description='Exporting has been started'), + '405': OpenApiResponse(description='Format is not available'), }) @extend_schema(methods=['POST'], summary='Import a dataset into a project', @@ -481,27 +467,58 @@ def get_export_callback(self, save_images: bool) -> Callable: }) @action(detail=True, methods=['GET', 'POST', 'OPTIONS'], serializer_class=None, url_path=r'dataset/?$', parser_classes=_UPLOAD_PARSER_CLASSES, - ) - def dataset(self, request: PatchedRequest, pk: int): + csrf_workaround_is_needed=lambda qp: + csrf_workaround_is_needed_for_export(qp) and qp.get("action") != "import_status") + def dataset(self, request, pk): self._object = self.get_object() # force call of check_object_permissions() - if request.method == "GET": - if request.query_params.get("action") == "import_status": - if rq_id := request.query_params.get("rq_id"): - return reverse('requests', request=request, args=[rq_id]) - return HttpResponseBadRequest("Missing rq_id") - # we don't redirect to the new API here since this endpoint used not only to check the status - # of exporting process|download a result file, but also to initiate export process - return HttpResponseGone("API endpoint is no longer handles exporting process") - - return self.import_annotations( - request=request, - db_obj=self._object, - import_func=_import_project_dataset, - rq_func=dm.project.import_dataset_as_project, - rq_id_factory=self.IMPORT_RQ_ID_FACTORY, - ) + if request.method in {'POST', 'OPTIONS'}: + return self.import_annotations( + request=request, + db_obj=self._object, + import_func=_import_project_dataset, + rq_func=dm.project.import_dataset_as_project, + rq_id_factory=self.IMPORT_RQ_ID_FACTORY, + ) + else: + action = request.query_params.get("action", "").lower() + if action in ("import_status",): + queue = django_rq.get_queue(settings.CVAT_QUEUES.IMPORT_DATA.value) + rq_id = request.query_params.get('rq_id') + if not rq_id: + return Response( + 'The rq_id param should be specified in the query parameters', + status=status.HTTP_400_BAD_REQUEST, + ) + + rq_job = queue.fetch_job(rq_id) + if rq_job is None: + return Response(status=status.HTTP_404_NOT_FOUND) + # check that the user has access to the current rq_job + elif not is_rq_job_owner(rq_job, request.user.id): + return Response(status=status.HTTP_403_FORBIDDEN) + + if rq_job.is_finished: + rq_job.delete() + return Response(status=status.HTTP_201_CREATED) + elif rq_job.is_failed: + exc_info = process_failed_job(rq_job) + + return Response( + data=str(exc_info), + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + else: + return Response( + data=self._get_rq_response( + settings.CVAT_QUEUES.IMPORT_DATA.value, + rq_id, + ), + status=status.HTTP_202_ACCEPTED, + ) + else: + return self.export_dataset_v1(request=request, save_images=True) @tus_chunk_action(detail=True, suffix_base="dataset") def append_dataset_chunk(self, request, pk, file_id): @@ -551,47 +568,87 @@ def upload_finished(self, request): return Response(data='Unknown upload was finished', status=status.HTTP_400_BAD_REQUEST) - @extend_schema( + @extend_schema(summary='Export project annotations as a dataset', description=textwrap.dedent("""\ + Deprecation warning: + Using this endpoint to initiate export of annotations as a dataset or to check export status is deprecated. Consider using new API: - POST /api/projects//dataset/export?save_images=False to initiate exporting process - GET /api/requests/ to check export status, where rq_id is request id returned on initializing request' - - GET \{result_url\} to download a prepared file with annotations, - where result_url can be found in the response on checking status request """), + parameters=[ + OpenApiParameter('format', description='Desired output format name\n' + 'You can get the list of supported formats at:\n/server/annotation/formats', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=True), + OpenApiParameter('filename', description='Desired output file name', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), + OpenApiParameter('action', description='Used to start downloading process locally after annotation file has been created', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, enum=['download']), + OpenApiParameter('location', description='Where need to save downloaded dataset', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, + enum=Location.list()), + OpenApiParameter('cloud_storage_id', description='Storage id', + location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), + OpenApiParameter('use_default_location', description='Use the location that was configured in project to export annotation', + location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, + default=True, deprecated=True), + ], responses={ - '410': OpenApiResponse(description="API endpoint is no longer handles exporting process"), - }, - deprecated=True, - ) + '200': OpenApiResponse(PolymorphicProxySerializer( + component_name='AnnotationsRead', + serializers=[LabeledDataSerializer, OpenApiTypes.BINARY], + resource_type_field_name=None + ), description='Download of file started'), + '201': OpenApiResponse(description='Annotations file is ready to download'), + '202': OpenApiResponse(description='Dump of annotations has been started'), + '401': OpenApiResponse(description='Format is not specified'), + '405': OpenApiResponse(description='Format is not available'), + }) @action(detail=True, methods=['GET'], serializer_class=LabeledDataSerializer, csrf_workaround_is_needed=csrf_workaround_is_needed_for_export) def annotations(self, request, pk): - return HttpResponseGone("API endpoint is no longer handles exporting process") + # FUTURE-TODO: mark exporting dataset using this endpoint as deprecated when new API for result file downloading will be implemented + self._object = self.get_object() # force call of check_object_permissions() + return self.export_dataset_v1(request=request, save_images=False) - # --- Deprecated API endpoint, should be deleted in the next release --- @extend_schema(summary='Back up a project', description=textwrap.dedent("""\ - Consider using new API: - - POST /api/projects//backup/export to initiate backup process - - GET /api/requests/ to check process status, - where rq_id can be found in the response on initializing request - - GET \{result_url\} to download a prepared file, - where result_url can be found in the response on checking status request - """ - ), + Deprecation warning: + + This endpoint will be deprecated in one of the next releases. + Consider using new API: + - POST /api/projects//backup/export to initiate backup process + - GET /api/requests/ to check process status, + where rq_id is request id returned on initializing request + """), + parameters=[ + OpenApiParameter('action', location=OpenApiParameter.QUERY, + description='Used to start downloading process after backup file had been created', + type=OpenApiTypes.STR, required=False, enum=['download']), + OpenApiParameter('filename', description='Backup file name', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), + OpenApiParameter('location', description='Where need to save downloaded backup', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, + enum=Location.list()), + OpenApiParameter('cloud_storage_id', description='Storage id', + location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), + OpenApiParameter('use_default_location', description='Use the location that was configured in project to export backup', + location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, + default=True, deprecated=True), + ], responses={ - '410': OpenApiResponse(description='Deprecated API endpoint'), - }, - deprecated=True, - ) + '200': OpenApiResponse(description='Download of file started'), + '201': OpenApiResponse(description='Output backup file is ready for downloading'), + '202': OpenApiResponse(description='Creating a backup file has been started'), + }) @action(methods=['GET'], detail=True, url_path='backup', csrf_workaround_is_needed=csrf_workaround_is_needed_for_backup) - def export_backup(self, request: PatchedRequest, pk: int): - return HttpResponseGone("API endpoint is no longer handles the project backup process") + def export_backup(self, request, pk=None): + # FUTURE-TODO: mark this endpoint as deprecated when new API for result file downloading will be implemented + return self.export_backup_v1(request) @extend_schema(methods=['POST'], summary='Recreate a project from a backup', description=textwrap.dedent(""" @@ -663,6 +720,24 @@ def preview(self, request, pk): return data_getter() + @staticmethod + def _get_rq_response(queue, job_id): + queue = django_rq.get_queue(queue) + job = queue.fetch_job(job_id) + rq_job_meta = ImportRQMeta.from_job(job) + response = {} + if job is None or job.is_finished: + response = { "state": "Finished" } + elif job.is_queued or job.is_deferred: + response = { "state": "Queued" } + elif job.is_failed: + response = { "state": "Failed", "message": job.exc_info } + else: + response = { "state": "Started" } + response['message'] = rq_job_meta.status + response['progress'] = rq_job_meta.progress or 0. + + return response class _DataGetter(metaclass=ABCMeta): def __init__( @@ -981,22 +1056,44 @@ def append_backup_chunk(self, request, file_id): @extend_schema(summary='Back up a task', description=textwrap.dedent("""\ + Deprecation warning: + This endpoint will be deprecated in one of the next releases. Consider using new API: - POST /api/tasks//backup/export to initiate backup process - GET /api/requests/ to check process status, - where rq_id can be found in the response on initializing request - - GET \{result_url\} to download a prepared file, - where result_url can be found in the response on checking status request - """ - ), + where rq_id is request id returned on initializing request' + """), + parameters=[ + OpenApiParameter('action', location=OpenApiParameter.QUERY, + description='Used to start downloading process after backup file had been created', + type=OpenApiTypes.STR, required=False, enum=['download']), + OpenApiParameter('filename', description='Backup file name', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), + OpenApiParameter('location', description='Where need to save downloaded backup', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, + enum=Location.list()), + OpenApiParameter('cloud_storage_id', description='Storage id', + location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), + OpenApiParameter('use_default_location', description='Use the location that was configured in the task to export backup', + location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, + default=True, deprecated=True), + ], responses={ - '410': OpenApiResponse(description='Deprecated API endpoint'), - }, - deprecated=True, - ) - @action(methods=['GET'], detail=True, url_path='backup') + '200': OpenApiResponse(description='Download of file started'), + '201': OpenApiResponse(description='Output backup file is ready for downloading'), + '202': OpenApiResponse(description='Creating a backup file has been started'), + '400': OpenApiResponse(description='Backup of a task without data is not allowed'), + }) + @action(methods=['GET'], detail=True, url_path='backup', + csrf_workaround_is_needed=csrf_workaround_is_needed_for_backup) def export_backup(self, request, pk=None): - return HttpResponseGone("API endpoint is no longer handles the task backup process") + # FUTURE-TODO: mark this endpoint as deprecated when new API for result file downloading will be implemented + if self.get_object().data is None: + return Response( + data='Backup of a task without data is not allowed', + status=status.HTTP_400_BAD_REQUEST + ) + return self.export_backup_v1(request) @transaction.atomic def perform_update(self, serializer): @@ -1375,50 +1472,47 @@ def append_data_chunk(self, request, pk, file_id): def get_export_callback(self, save_images: bool) -> Callable: return dm.views.export_task_as_dataset if save_images else dm.views.export_task_annotations - @extend_schema(methods=['GET'], summary='Get task annotations', + # TODO: mark this endpoint as deprecated when new endpoint for downloading results will be implemented + @extend_schema(methods=['GET'], summary='Get task annotations or export them as a dataset in a specific format', description=textwrap.dedent("""\ Deprecation warning: - Utilizing this endpoint to export annotations as a dataset in - a specific format is deprecated. + Utilizing this endpoint ot export annotations as a dataset in + a specific format will be deprecated in one of the next releases. Consider using new API: - POST /api/tasks//dataset/export?save_images=False to initiate export process - GET /api/requests/ to check process status, where rq_id is request id returned on initializing request - - GET \{result_url\} to download a prepared file, - where result_url can be found in the response on checking status request """), parameters=[ - # --- Deprecated params section --- OpenApiParameter('format', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, description="Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/formats", - deprecated=True ), OpenApiParameter('filename', description='Desired output file name', - location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - deprecated=True - ), + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), OpenApiParameter('action', location=OpenApiParameter.QUERY, description='Used to start downloading process locally after annotation file has been created', - type=OpenApiTypes.STR, required=False, enum=['download'], - deprecated=True - ), + type=OpenApiTypes.STR, required=False, enum=['download']), OpenApiParameter('location', description='Where need to save downloaded dataset', location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, - enum=Location.list(), - deprecated=True - ), + enum=Location.list()), OpenApiParameter('cloud_storage_id', description='Storage id', - location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False, - deprecated=True - ), - # --- Deprecated params section --- + location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), + OpenApiParameter('use_default_location', description='Use the location that was configured in the task to export annotation', + location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, + default=True, deprecated=True), ], responses={ - '200': OpenApiResponse(LabeledDataSerializer), - '400': OpenApiResponse(description="Exporting without data is not allowed"), - '410': OpenApiResponse(description="API endpoint is no longer handles exporting process"), + '200': OpenApiResponse(PolymorphicProxySerializer( + component_name='AnnotationsRead', + serializers=[LabeledDataSerializer, OpenApiTypes.BINARY], + resource_type_field_name=None + ), description='Download of file started'), + '201': OpenApiResponse(description='Annotations file is ready to download'), + '202': OpenApiResponse(description='Dump of annotations has been started'), + '400': OpenApiResponse(description='Exporting without data is not allowed'), + '405': OpenApiResponse(description='Format is not available'), }) @extend_schema(methods=['PUT'], summary='Replace task annotations / Get annotation import status', description=textwrap.dedent(""" @@ -1494,18 +1588,15 @@ def get_export_callback(self, save_images: bool) -> Callable: def annotations(self, request, pk): self._object = self.get_object() # force call of check_object_permissions() if request.method == 'GET': - if not self._object.data: + if self._object.data: + return self.export_dataset_v1( + request=request, + save_images=False, + get_data=dm.task.get_task_data, + ) + else: return HttpResponseBadRequest("Exporting annotations from a task without data is not allowed") - if ( - {"format", "filename", "action", "location", "cloud_storage_id"} - & request.query_params.keys() - ): - return HttpResponseGone(f"API endpoint no longer handles exporting process") - - data = dm.task.get_task_data(self._object.pk) - return Response(data) - elif request.method == 'POST' or request.method == 'OPTIONS': # NOTE: initialization process of annotations import format_name = request.query_params.get('format', '') @@ -1559,7 +1650,53 @@ def append_annotations_chunk(self, request, pk, file_id): self._object = self.get_object() return self.append_tus_chunk(request, file_id) + ### --- DEPRECATED METHOD --- ### + @extend_schema( + summary='Get the creation status of a task', + responses={ + '200': RqStatusSerializer, + }, + deprecated=True, + description="This method is deprecated and will be removed in one of the next releases. " + "To check status of task creation, use new common API " + "for managing background operations: GET /api/requests/?action=create&task_id=", + ) + @action(detail=True, methods=['GET'], serializer_class=RqStatusSerializer) + def status(self, request, pk): + task = self.get_object() # force call of check_object_permissions() + response = self._get_rq_response( + queue=settings.CVAT_QUEUES.IMPORT_DATA.value, + job_id=RQId(RequestAction.CREATE, RequestTarget.TASK, task.id).render() + ) + serializer = RqStatusSerializer(data=response) + serializer.is_valid(raise_exception=True) + return Response(serializer.data, headers={'Deprecation': 'true'}) + + ### --- DEPRECATED METHOD--- ### + @staticmethod + def _get_rq_response(queue, job_id): + queue = django_rq.get_queue(queue) + job = queue.fetch_job(job_id) + rq_job_meta = ImportRQMeta.from_job(job) + response = {} + if job is None or job.is_finished: + response = { "state": "Finished" } + elif job.is_queued or job.is_deferred: + response = { "state": "Queued" } + elif job.is_failed: + # FIXME: It seems that in some cases exc_info can be None. + # It's not really clear how it is possible, but it can + # lead to an error in serializing the response + # https://github.com/cvat-ai/cvat/issues/5215 + response = { "state": "Failed", "message": parse_exception_message(job.exc_info or "Unknown error") } + else: + response = { "state": "Started" } + if rq_job_meta.status: + response['message'] = rq_job_meta.status + response['progress'] = rq_job_meta.progress or 0. + + return response @extend_schema(methods=['GET'], summary='Get metainformation for media files in a task', responses={ @@ -1607,23 +1744,55 @@ def metadata(self, request, pk): @extend_schema(summary='Export task as a dataset in a specific format', description=textwrap.dedent("""\ + Deprecation warning: + Utilizing this endpoint to export task dataset in - a specific format is deprecated. + a specific format will be deprecated in one of the next releases. Consider using new API: - POST /api/tasks//dataset/export?save_images=True to initiate export process - GET /api/requests/ to check process status, where rq_id is request id returned on initializing request - - GET \{result_url\} to download a prepared file, - where result_url can be found in the response on checking status request """), + parameters=[ + OpenApiParameter('format', location=OpenApiParameter.QUERY, + description='Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/formats', + type=OpenApiTypes.STR, required=True), + OpenApiParameter('filename', description='Desired output file name', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), + OpenApiParameter('action', location=OpenApiParameter.QUERY, + description='Used to start downloading process locally after annotation file has been created', + type=OpenApiTypes.STR, required=False, enum=['download']), + OpenApiParameter('use_default_location', description='Use the location that was configured in task to export annotations', + location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, + default=True, deprecated=True), + OpenApiParameter('location', description='Where need to save downloaded dataset', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, + enum=Location.list()), + OpenApiParameter('cloud_storage_id', description='Storage id', + location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), + ], responses={ - '410': OpenApiResponse(description='Deprecated API endpoint'), + '200': OpenApiResponse(OpenApiTypes.BINARY, description='Download of file started'), + '201': OpenApiResponse(description='Output file is ready for downloading'), + '202': OpenApiResponse(description='Exporting has been started'), + '400': OpenApiResponse(description='Exporting without data is not allowed'), + '405': OpenApiResponse(description='Format is not available'), }, ) - @action(detail=True, methods=['GET'], serializer_class=None, url_path='dataset') + @action(detail=True, methods=['GET'], serializer_class=None, + url_path='dataset', csrf_workaround_is_needed=csrf_workaround_is_needed_for_export) def dataset_export(self, request, pk): - return HttpResponseGone("Deprecated API endpoint") + # FUTURE-TODO: mark this endpoint as deprecated when new API for result file downloading will be implemented + self._object = self.get_object() # force call of check_object_permissions() + + if self._object.data: + return self.export_dataset_v1( + request=request, + save_images=True + ) + + return HttpResponseBadRequest("Exporting a dataset from a task without data is not allowed") @extend_schema(summary='Get a preview image for a task', responses={ @@ -1876,22 +2045,48 @@ def upload_finished(self, request): status=status.HTTP_400_BAD_REQUEST) @extend_schema(methods=['GET'], - summary="Get job annotations", + summary="Get job annotations or export job annotations as a dataset in a specific format", description=textwrap.dedent("""\ + If format is specified, a ZIP archive will be returned. Otherwise, + the annotations will be returned as a JSON document. + Deprecation warning: - Utilizing this endpoint to export job dataset in a specific format is deprecated. + Utilizing this endpoint to export annotations as a dataset in + a specific format will be deprecated in one of the next releases. Consider using new API: - - POST /api/jobs//dataset/export?save_images=True to initiate export process + - POST /api/jobs//dataset/export?save_images=False to initiate export process - GET /api/requests/ to check process status, where rq_id is request id returned on initializing request - - GET \{result_url\} to download a prepared file, - where result_url can be found in the response on checking status request """), + parameters=[ + OpenApiParameter('format', location=OpenApiParameter.QUERY, + description='Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/formats', + type=OpenApiTypes.STR, required=False), + OpenApiParameter('filename', description='Desired output file name', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), + OpenApiParameter('action', location=OpenApiParameter.QUERY, + description='Used to start downloading process locally after annotation file has been created', + type=OpenApiTypes.STR, required=False, enum=['download']), + OpenApiParameter('location', description='Where need to save downloaded annotation', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, + enum=Location.list()), + OpenApiParameter('cloud_storage_id', description='Storage id', + location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), + OpenApiParameter('use_default_location', description='Use the location that was configured in the task to export annotation', + location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, + default=True, deprecated=True), + ], responses={ - '200': OpenApiResponse(LabeledDataSerializer), - '410': OpenApiResponse(description="API endpoint no longer handles dataset exporting process"), + '200': OpenApiResponse(PolymorphicProxySerializer( + component_name='AnnotationsRead', + serializers=[LabeledDataSerializer, OpenApiTypes.BINARY], + resource_type_field_name=None + ), description='Download of file started'), + '201': OpenApiResponse(description='Output file is ready for downloading'), + '202': OpenApiResponse(description='Exporting has been started'), + '405': OpenApiResponse(description='Format is not available'), }) @extend_schema(methods=['POST'], summary='Import annotations into a job', @@ -1974,15 +2169,12 @@ def upload_finished(self, request): def annotations(self, request, pk): self._object: models.Job = self.get_object() # force call of check_object_permissions() if request.method == 'GET': - - if ( - {"format", "filename", "location", "action", "cloud_storage_id"} - & request.query_params.keys() - ): - return HttpResponseGone(f"API endpoint no longer handles dataset exporting process") - - annotations = dm.task.get_job_data(self._object.pk) - return Response(annotations) + # FUTURE-TODO: mark as deprecated using this endpoint to export annotations when new API for result file downloading will be implemented + return self.export_dataset_v1( + request=request, + save_images=False, + get_data=dm.task.get_job_data, + ) elif request.method == 'POST' or request.method == 'OPTIONS': format_name = request.query_params.get('format', '') @@ -2041,6 +2233,48 @@ def append_annotations_chunk(self, request, pk, file_id): return self.append_tus_chunk(request, file_id) + @extend_schema(summary='Export job as a dataset in a specific format', + description=textwrap.dedent("""\ + Deprecation warning: + This endpoint will be deprecated in one of the next releases. + Consider using new API: + - POST /api/jobs//dataset/export?save_images=True to initiate export process + - GET /api/requests/ to check process status, + where rq_id is request id returned on initializing request + """), + parameters=[ + OpenApiParameter('format', location=OpenApiParameter.QUERY, + description='Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/formats', + type=OpenApiTypes.STR, required=True), + OpenApiParameter('filename', description='Desired output file name', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False), + OpenApiParameter('action', location=OpenApiParameter.QUERY, + description='Used to start downloading process locally after annotation file has been created', + type=OpenApiTypes.STR, required=False, enum=['download']), + OpenApiParameter('use_default_location', description='Use the location that was configured in the task to export dataset', + location=OpenApiParameter.QUERY, type=OpenApiTypes.BOOL, required=False, + default=True, deprecated=True), + OpenApiParameter('location', description='Where need to save downloaded dataset', + location=OpenApiParameter.QUERY, type=OpenApiTypes.STR, required=False, + enum=Location.list()), + OpenApiParameter('cloud_storage_id', description='Storage id', + location=OpenApiParameter.QUERY, type=OpenApiTypes.INT, required=False), + ], + responses={ + '200': OpenApiResponse(OpenApiTypes.BINARY, description='Download of file started'), + '201': OpenApiResponse(description='Output file is ready for downloading'), + '202': OpenApiResponse(description='Exporting has been started'), + '405': OpenApiResponse(description='Format is not available'), + }, + ) + @action(detail=True, methods=['GET'], serializer_class=None, + url_path='dataset', csrf_workaround_is_needed=csrf_workaround_is_needed_for_export) + def dataset_export(self, request, pk): + # FUTURE-TODO: mark this endpoint as deprecated when new API for result file downloading will be implemented + self._object = self.get_object() # force call of check_object_permissions() + + return self.export_dataset_v1(request=request, save_images=True) + def get_export_callback(self, save_images: bool) -> Callable: return dm.views.export_job_as_dataset if save_images else dm.views.export_job_annotations @@ -3116,7 +3350,7 @@ def perform_destroy(self, instance): super().perform_destroy(instance) target.touch() -def rq_exception_handler(rq_job: RQJob, exc_type: Type[Exception], exc_value, tb): +def rq_exception_handler(rq_job: RQJob, exc_type: type[Exception], exc_value, tb): rq_job_meta = RQMetaWithFailureInfo.from_job(rq_job) rq_job_meta.formatted_exception = "".join( traceback.format_exception_only(exc_type, exc_value)) diff --git a/tests/python/rest_api/test_jobs.py b/tests/python/rest_api/test_jobs.py index e5085f7e7287..c2e38df748c1 100644 --- a/tests/python/rest_api/test_jobs.py +++ b/tests/python/rest_api/test_jobs.py @@ -1444,10 +1444,11 @@ def _test_export_dataset( username: str, jid: int, *, + api_version: Union[int, tuple[int]], local_download: bool = True, **kwargs, ) -> Optional[bytes]: - dataset = export_job_dataset(username, save_images=True, id=jid, **kwargs) + dataset = export_job_dataset(username, api_version, save_images=True, id=jid, **kwargs) if local_download: assert zipfile.is_zipfile(io.BytesIO(dataset)) else: @@ -1457,9 +1458,9 @@ def _test_export_dataset( @staticmethod def _test_export_annotations( - username: str, jid: int, *, local_download: bool = True, **kwargs + username: str, jid: int, *, api_version: int, local_download: bool = True, **kwargs ) -> Optional[bytes]: - dataset = export_job_dataset(username, save_images=False, id=jid, **kwargs) + dataset = export_job_dataset(username, api_version, save_images=False, id=jid, **kwargs) if local_download: assert zipfile.is_zipfile(io.BytesIO(dataset)) else: @@ -1467,7 +1468,34 @@ def _test_export_annotations( return dataset - def test_non_admin_can_export_dataset(self, users, jobs_with_shapes): + @pytest.mark.parametrize("api_version", product((1, 2), repeat=2)) + @pytest.mark.parametrize( + "local_download", (True, pytest.param(False, marks=pytest.mark.with_external_services)) + ) + def test_can_export_dataset_locally_and_to_cloud_with_both_api_versions( + self, + admin_user: str, + jobs_with_shapes: list, + filter_tasks, + api_version: tuple[int], + local_download: bool, + ): + filter_ = "target_storage__location" + if local_download: + filter_ = "exclude_" + filter_ + + task_ids = [t["id"] for t in filter_tasks(**{filter_: "cloud_storage"})] + + job = next(j for j in jobs_with_shapes if j["task_id"] in task_ids) + self._test_export_dataset( + admin_user, + job["id"], + api_version=api_version, + local_download=local_download, + ) + + @pytest.mark.parametrize("api_version", (1, 2)) + def test_non_admin_can_export_dataset(self, users, jobs_with_shapes, api_version: int): job, username = next( ( (job, self.tasks[job["task_id"]]["owner"]["username"]) @@ -1477,9 +1505,10 @@ def test_non_admin_can_export_dataset(self, users, jobs_with_shapes): and self.tasks[job["task_id"]]["organization"] is None ) ) - self._test_export_dataset(username, job["id"]) + self._test_export_dataset(username, job["id"], api_version=api_version) - def test_non_admin_can_export_annotations(self, users, jobs_with_shapes): + @pytest.mark.parametrize("api_version", (1, 2)) + def test_non_admin_can_export_annotations(self, users, jobs_with_shapes, api_version: int): job, username = next( ( (job, self.tasks[job["task_id"]]["owner"]["username"]) @@ -1490,8 +1519,9 @@ def test_non_admin_can_export_annotations(self, users, jobs_with_shapes): ) ) - self._test_export_annotations(username, job["id"]) + self._test_export_annotations(username, job["id"], api_version=api_version) + @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.parametrize("username, jid", [("admin1", 14)]) @pytest.mark.parametrize( "anno_format, anno_file_name, check_func", @@ -1509,6 +1539,7 @@ def test_exported_job_dataset_structure( check_func, jobs, annotations, + api_version: int, ): job_data = jobs[jid] annotations_before = annotations["job"][str(jid)] @@ -1527,6 +1558,7 @@ def test_exported_job_dataset_structure( dataset = self._test_export_dataset( username, jid, + api_version=api_version, format=anno_format, ) @@ -1537,6 +1569,7 @@ def test_exported_job_dataset_structure( content = zip_file.read(anno_file_name) check_func(content, values_to_be_checked) + @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.parametrize("username", ["admin1"]) @pytest.mark.parametrize("jid", [25, 26]) @pytest.mark.parametrize( @@ -1560,6 +1593,7 @@ def test_export_job_among_several_jobs_in_task( check_func, jobs, annotations, + api_version: int, ): job_data = jobs[jid] annotations_before = annotations["job"][str(jid)] @@ -1578,6 +1612,7 @@ def test_export_job_among_several_jobs_in_task( dataset = self._test_export_dataset( username, jid, + api_version=api_version, format=anno_format, ) diff --git a/tests/python/rest_api/test_projects.py b/tests/python/rest_api/test_projects.py index beeb6acf75a6..69d9bde52710 100644 --- a/tests/python/rest_api/test_projects.py +++ b/tests/python/rest_api/test_projects.py @@ -207,10 +207,11 @@ def _test_can_get_project_backup( username: str, pid: int, *, + api_version: int, local_download: bool = True, **kwargs, ) -> Optional[bytes]: - backup = export_project_backup(username, id=pid, **kwargs) + backup = export_project_backup(username, id=pid, api_version=api_version, **kwargs) if local_download: assert zipfile.is_zipfile(io.BytesIO(backup)) else: @@ -221,17 +222,20 @@ def _test_cannot_get_project_backup( self, username: str, pid: int, + api_version: int, **kwargs, ): with pytest.raises(ForbiddenException): - export_project_backup(username, id=pid, expect_forbidden=True, **kwargs) + export_project_backup(username, api_version, id=pid, expect_forbidden=True, **kwargs) - def test_admin_can_get_project_backup(self): + @pytest.mark.parametrize("api_version", (1, 2)) + def test_admin_can_get_project_backup(self, api_version: int): project = list(self.projects)[0] - self._test_can_get_project_backup("admin1", project["id"]) + self._test_can_get_project_backup("admin1", project["id"], api_version=api_version) # User that not in [project:owner, project:assignee] cannot get project backup. - def test_user_cannot_get_project_backup(self, find_users, is_project_staff): + @pytest.mark.parametrize("api_version", (1, 2)) + def test_user_cannot_get_project_backup(self, find_users, is_project_staff, api_version: int): users = find_users(exclude_privilege="admin") user, project = next( @@ -240,11 +244,14 @@ def test_user_cannot_get_project_backup(self, find_users, is_project_staff): if not is_project_staff(user["id"], project["id"]) ) - self._test_cannot_get_project_backup(user["username"], project["id"]) + self._test_cannot_get_project_backup( + user["username"], project["id"], api_version=api_version + ) # Org worker that not in [project:owner, project:assignee] cannot get project backup. + @pytest.mark.parametrize("api_version", (1, 2)) def test_org_worker_cannot_get_project_backup( - self, find_users, is_project_staff, is_org_member + self, find_users, is_project_staff, is_org_member, api_version: int ): users = find_users(role="worker", exclude_privilege="admin") @@ -256,14 +263,18 @@ def test_org_worker_cannot_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_cannot_get_project_backup(user["username"], project["id"]) + self._test_cannot_get_project_backup( + user["username"], project["id"], api_version=api_version + ) # Org worker that in [project:owner, project:assignee] can get project backup. + @pytest.mark.parametrize("api_version", (1, 2)) def test_org_worker_can_get_project_backup( self, find_users, is_project_staff, is_org_member, + api_version: int, ): users = find_users(role="worker", exclude_privilege="admin") @@ -275,11 +286,12 @@ def test_org_worker_can_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_can_get_project_backup(user["username"], project["id"]) + self._test_can_get_project_backup(user["username"], project["id"], api_version=api_version) # Org supervisor that in [project:owner, project:assignee] can get project backup. + @pytest.mark.parametrize("api_version", (1, 2)) def test_org_supervisor_can_get_project_backup( - self, find_users, is_project_staff, is_org_member + self, find_users, is_project_staff, is_org_member, api_version: int ): users = find_users(role="supervisor", exclude_privilege="admin") @@ -291,14 +303,16 @@ def test_org_supervisor_can_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_can_get_project_backup(user["username"], project["id"]) + self._test_can_get_project_backup(user["username"], project["id"], api_version=api_version) # Org supervisor that not in [project:owner, project:assignee] cannot get project backup. + @pytest.mark.parametrize("api_version", (1, 2)) def test_org_supervisor_cannot_get_project_backup( self, find_users, is_project_staff, is_org_member, + api_version: int, ): users = find_users(exclude_privilege="admin") @@ -310,14 +324,18 @@ def test_org_supervisor_cannot_get_project_backup( and is_org_member(user["id"], project["organization"], role="supervisor") ) - self._test_cannot_get_project_backup(user["username"], project["id"]) + self._test_cannot_get_project_backup( + user["username"], project["id"], api_version=api_version + ) # Org maintainer that not in [project:owner, project:assignee] can get project backup. + @pytest.mark.parametrize("api_version", (1, 2)) def test_org_maintainer_can_get_project_backup( self, find_users, is_project_staff, is_org_member, + api_version: int, ): users = find_users(role="maintainer", exclude_privilege="admin") @@ -329,10 +347,13 @@ def test_org_maintainer_can_get_project_backup( and is_org_member(user["id"], project["organization"]) ) - self._test_can_get_project_backup(user["username"], project["id"]) + self._test_can_get_project_backup(user["username"], project["id"], api_version=api_version) # Org owner that not in [project:owner, project:assignee] can get project backup. - def test_org_owner_can_get_project_backup(self, find_users, is_project_staff, is_org_member): + @pytest.mark.parametrize("api_version", (1, 2)) + def test_org_owner_can_get_project_backup( + self, find_users, is_project_staff, is_org_member, api_version: int + ): users = find_users(role="owner", exclude_privilege="admin") user, project = next( @@ -343,9 +364,10 @@ def test_org_owner_can_get_project_backup(self, find_users, is_project_staff, is and is_org_member(user["id"], project["organization"]) ) - self._test_can_get_project_backup(user["username"], project["id"]) + self._test_can_get_project_backup(user["username"], project["id"], api_version=api_version) - def test_can_get_backup_project_when_some_tasks_have_no_data(self): + @pytest.mark.parametrize("api_version", (1, 2)) + def test_can_get_backup_project_when_some_tasks_have_no_data(self, api_version: int): project = next((p for p in self.projects if 0 < p["tasks"]["count"])) # add empty task to project @@ -354,9 +376,12 @@ def test_can_get_backup_project_when_some_tasks_have_no_data(self): ) assert response.status_code == HTTPStatus.CREATED - self._test_can_get_project_backup("admin1", project["id"]) + self._test_can_get_project_backup("admin1", project["id"], api_version=api_version) - def test_can_get_backup_project_when_all_tasks_have_no_data(self, filter_projects): + @pytest.mark.parametrize("api_version", (1, 2)) + def test_can_get_backup_project_when_all_tasks_have_no_data( + self, api_version: int, filter_projects + ): project = filter_projects(tasks__count=0)[0] # add empty tasks to empty project @@ -376,15 +401,19 @@ def test_can_get_backup_project_when_all_tasks_have_no_data(self, filter_project ) assert response.status_code == HTTPStatus.CREATED, response.text - self._test_can_get_project_backup("admin1", project["id"]) + self._test_can_get_project_backup("admin1", project["id"], api_version=api_version) - def test_can_get_backup_for_empty_project(self): + @pytest.mark.parametrize("api_version", (1, 2)) + def test_can_get_backup_for_empty_project(self, api_version: int): empty_project = next((p for p in self.projects if 0 == p["tasks"]["count"])) - self._test_can_get_project_backup("admin1", empty_project["id"]) + self._test_can_get_project_backup("admin1", empty_project["id"], api_version=api_version) - def test_admin_can_get_project_backup_and_create_project_by_backup(self, admin_user: str): + @pytest.mark.parametrize("api_version", (1, 2)) + def test_admin_can_get_project_backup_and_create_project_by_backup( + self, admin_user: str, api_version: int + ): project_id = 5 - backup = self._test_can_get_project_backup(admin_user, project_id) + backup = self._test_can_get_project_backup(admin_user, project_id, api_version=api_version) tmp_file = io.BytesIO(backup) tmp_file.name = "dataset.zip" @@ -602,10 +631,11 @@ def _test_export_dataset( username: str, pid: int, *, + api_version: Union[int, tuple[int]], local_download: bool = True, **kwargs, ) -> Optional[bytes]: - dataset = export_project_dataset(username, save_images=True, id=pid, **kwargs) + dataset = export_project_dataset(username, api_version, save_images=True, id=pid, **kwargs) if local_download: assert zipfile.is_zipfile(io.BytesIO(dataset)) else: @@ -615,9 +645,9 @@ def _test_export_dataset( @staticmethod def _test_export_annotations( - username: str, pid: int, *, local_download: bool = True, **kwargs + username: str, pid: int, *, api_version: int, local_download: bool = True, **kwargs ) -> Optional[bytes]: - dataset = export_project_dataset(username, save_images=False, id=pid, **kwargs) + dataset = export_project_dataset(username, api_version, save_images=False, id=pid, **kwargs) if local_download: assert zipfile.is_zipfile(io.BytesIO(dataset)) else: @@ -665,6 +695,7 @@ def test_can_import_dataset_in_org(self, admin_user: str): dataset = self._test_export_dataset( admin_user, project_id, + api_version=2, ) tmp_file = io.BytesIO(dataset) @@ -707,6 +738,7 @@ def test_can_export_and_import_dataset_with_skeletons( dataset = self._test_export_dataset( admin_user, project_id, + api_version=2, format=export_format, ) @@ -718,8 +750,9 @@ def test_can_export_and_import_dataset_with_skeletons( self._test_import_project(admin_user, project_id, import_format, import_data) + @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.parametrize("format_name", ("Datumaro 1.0", "ImageNet 1.0", "PASCAL VOC 1.1")) - def test_can_import_export_dataset_with_some_format(self, format_name: str): + def test_can_import_export_dataset_with_some_format(self, format_name: str, api_version: int): # https://github.com/cvat-ai/cvat/issues/4410 # https://github.com/cvat-ai/cvat/issues/4850 # https://github.com/cvat-ai/cvat/issues/4621 @@ -729,6 +762,7 @@ def test_can_import_export_dataset_with_some_format(self, format_name: str): dataset = self._test_export_dataset( username, project_id, + api_version=api_version, format=format_name, ) @@ -741,6 +775,27 @@ def test_can_import_export_dataset_with_some_format(self, format_name: str): self._test_import_project(username, project_id, format_name, import_data) + @pytest.mark.parametrize("api_version", product((1, 2), repeat=2)) + @pytest.mark.parametrize( + "local_download", (True, pytest.param(False, marks=pytest.mark.with_external_services)) + ) + def test_can_export_dataset_locally_and_to_cloud_with_both_api_versions( + self, admin_user: str, filter_projects, api_version: tuple[int], local_download: bool + ): + filter_ = "target_storage__location" + if local_download: + filter_ = "exclude_" + filter_ + + pid = filter_projects(**{filter_: "cloud_storage"})[0]["id"] + + self._test_export_dataset( + admin_user, + pid, + api_version=api_version, + local_download=local_download, + ) + + @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.parametrize("username, pid", [("admin1", 8)]) @pytest.mark.parametrize( "anno_format, anno_file_name, check_func", @@ -760,6 +815,7 @@ def test_exported_project_dataset_structure( anno_file_name, check_func, tasks, + api_version: int, ): project = self.projects[pid] @@ -781,6 +837,7 @@ def test_exported_project_dataset_structure( dataset = self._test_export_annotations( username, pid, + api_version=api_version, format=anno_format, ) @@ -788,7 +845,8 @@ def test_exported_project_dataset_structure( content = zip_file.read(anno_file_name) check_func(content, values_to_be_checked) - def test_can_import_export_annotations_with_rotation(self): + @pytest.mark.parametrize("api_version", (1, 2)) + def test_can_import_export_annotations_with_rotation(self, api_version: int): # https://github.com/cvat-ai/cvat/issues/4378 username = "admin1" project_id = 4 @@ -796,6 +854,7 @@ def test_can_import_export_annotations_with_rotation(self): dataset = self._test_export_dataset( username, project_id, + api_version=api_version, ) tmp_file = io.BytesIO(dataset) @@ -818,7 +877,8 @@ def test_can_import_export_annotations_with_rotation(self): assert task1_rotation == task2_rotation - def test_can_export_dataset_with_skeleton_labels_with_spaces(self): + @pytest.mark.parametrize("api_version", (1, 2)) + def test_can_export_dataset_with_skeleton_labels_with_spaces(self, api_version: int): # https://github.com/cvat-ai/cvat/issues/5257 # https://github.com/cvat-ai/cvat/issues/5600 username = "admin1" @@ -827,20 +887,26 @@ def test_can_export_dataset_with_skeleton_labels_with_spaces(self): self._test_export_dataset( username, project_id, + api_version=api_version, format="COCO Keypoints 1.0", ) - def test_can_export_dataset_for_empty_project(self, filter_projects): + @pytest.mark.parametrize("api_version", (1, 2)) + def test_can_export_dataset_for_empty_project(self, filter_projects, api_version: int): empty_project = filter_projects( tasks__count=0, exclude_target_storage__location="cloud_storage" )[0] self._test_export_dataset( "admin1", empty_project["id"], + api_version=api_version, format="COCO 1.0", ) - def test_can_export_project_dataset_when_some_tasks_have_no_data(self, filter_projects): + @pytest.mark.parametrize("api_version", (1, 2)) + def test_can_export_project_dataset_when_some_tasks_have_no_data( + self, filter_projects, api_version: int + ): project = filter_projects( exclude_tasks__count=0, exclude_target_storage__location="cloud_storage" )[0] @@ -860,10 +926,14 @@ def test_can_export_project_dataset_when_some_tasks_have_no_data(self, filter_pr self._test_export_dataset( "admin1", project["id"], + api_version=api_version, format="COCO 1.0", ) - def test_can_export_project_dataset_when_all_tasks_have_no_data(self, filter_projects): + @pytest.mark.parametrize("api_version", (1, 2)) + def test_can_export_project_dataset_when_all_tasks_have_no_data( + self, filter_projects, api_version: int + ): project = filter_projects(tasks__count=0, exclude_target_storage__location="cloud_storage")[ 0 ] @@ -888,12 +958,14 @@ def test_can_export_project_dataset_when_all_tasks_have_no_data(self, filter_pro self._test_export_dataset( "admin1", project["id"], + api_version=api_version, format="COCO 1.0", ) + @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.parametrize("cloud_storage_id", [3]) # import/export bucket def test_can_export_and_import_dataset_after_deleting_related_storage( - self, admin_user, cloud_storage_id: int + self, admin_user, cloud_storage_id: int, api_version: int ): project_id = next( p @@ -911,7 +983,7 @@ def test_can_export_and_import_dataset_after_deleting_related_storage( result, response = api_client.projects_api.retrieve(project_id) assert all([not getattr(result, field) for field in ("source_storage", "target_storage")]) - dataset = self._test_export_dataset(admin_user, project_id) + dataset = self._test_export_dataset(admin_user, project_id, api_version=api_version) with io.BytesIO(dataset) as tmp_file: tmp_file.name = "dataset.zip" @@ -938,6 +1010,7 @@ def _export_task(task_id: int, format_name: str) -> io.BytesIO: return io.BytesIO( export_dataset( api_client.tasks_api, + api_version=2, id=task_id, format=format_name, save_images=False, @@ -1011,8 +1084,9 @@ def _export_task(task_id: int, format_name: str) -> io.BytesIO: ("Ultralytics YOLO Pose 1.0", "images/{subset}/"), ], ) + @pytest.mark.parametrize("api_version", (1, 2)) def test_creates_subfolders_for_subsets_on_export( - self, filter_tasks, admin_user, export_format, subset_path_template + self, filter_tasks, admin_user, export_format, subset_path_template, api_version: int ): group_key_func = itemgetter("project_id") subsets = ["Train", "Validation"] @@ -1025,7 +1099,9 @@ def test_creates_subfolders_for_subsets_on_export( ) if sorted(task["subset"] for task in group) == subsets ) - dataset = self._test_export_dataset(admin_user, project_id, format=export_format) + dataset = self._test_export_dataset( + admin_user, project_id, api_version=api_version, format=export_format + ) with zipfile.ZipFile(io.BytesIO(dataset)) as zip_file: for subset in subsets: folder_prefix = subset_path_template.format(subset=subset) @@ -1066,7 +1142,7 @@ def test_export_project_with_honeypots(self, admin_user: str): create_task(admin_user, spec=task_params, data=data_params) dataset = export_project_dataset( - admin_user, save_images=True, id=project.id, format="COCO 1.0" + admin_user, api_version=2, save_images=True, id=project.id, format="COCO 1.0" ) with zipfile.ZipFile(io.BytesIO(dataset)) as zip_file: diff --git a/tests/python/rest_api/test_requests.py b/tests/python/rest_api/test_requests.py index efadb141b8b5..c6a048db61c7 100644 --- a/tests/python/rest_api/test_requests.py +++ b/tests/python/rest_api/test_requests.py @@ -153,7 +153,7 @@ def download_file(resource: str, rid: int, subresource: str): ), }[(resource, subresource)] - data = func(self.user, id=rid, download_result=True) + data = func(self.user, api_version=2, id=rid, download_result=True) assert data, f"Failed to download {resource} {subresource} locally" return data @@ -163,12 +163,15 @@ def download_file(resource: str, rid: int, subresource: str): def fxt_make_export_project_requests(self): def make_requests(project_ids: list[int]): for project_id in project_ids: - export_project_backup(self.user, id=project_id, download_result=False) + export_project_backup( + self.user, api_version=2, id=project_id, download_result=False + ) export_project_dataset( - self.user, save_images=True, id=project_id, download_result=False + self.user, api_version=2, save_images=True, id=project_id, download_result=False ) export_project_dataset( self.user, + api_version=2, save_images=False, id=project_id, download_result=False, @@ -180,9 +183,13 @@ def make_requests(project_ids: list[int]): def fxt_make_export_task_requests(self): def make_requests(task_ids: list[int]): for task_id in task_ids: - export_task_backup(self.user, id=task_id, download_result=False) - export_task_dataset(self.user, save_images=True, id=task_id, download_result=False) - export_task_dataset(self.user, save_images=False, id=task_id, download_result=False) + export_task_backup(self.user, api_version=2, id=task_id, download_result=False) + export_task_dataset( + self.user, api_version=2, save_images=True, id=task_id, download_result=False + ) + export_task_dataset( + self.user, api_version=2, save_images=False, id=task_id, download_result=False + ) return make_requests @@ -192,6 +199,7 @@ def make_requests(job_ids: list[int]): for job_id in job_ids: export_job_dataset( self.user, + api_version=2, save_images=True, id=job_id, format="COCO 1.0", @@ -199,6 +207,7 @@ def make_requests(job_ids: list[int]): ) export_job_dataset( self.user, + api_version=2, save_images=False, id=job_id, format="YOLO 1.1", @@ -275,6 +284,7 @@ def test_owner_can_retrieve_request(self, format_name: str, save_images: bool, p subresource = "dataset" if save_images else "annotations" export_project_dataset( owner["username"], + api_version=2, save_images=save_images, id=project["id"], download_result=False, @@ -316,6 +326,7 @@ def test_non_owner_cannot_retrieve_request(self, find_users, projects, format_na export_project_dataset( owner["username"], + api_version=2, save_images=True, id=project["id"], download_result=False, diff --git a/tests/python/rest_api/test_tasks.py b/tests/python/rest_api/test_tasks.py index 3e4ce8a0080e..22d452637c51 100644 --- a/tests/python/rest_api/test_tasks.py +++ b/tests/python/rest_api/test_tasks.py @@ -743,10 +743,11 @@ def _test_can_export_dataset( username: str, task_id: int, *, + api_version: Union[int, tuple[int]], local_download: bool = True, **kwargs, ) -> Optional[bytes]: - dataset = export_task_dataset(username, save_images=True, id=task_id, **kwargs) + dataset = export_task_dataset(username, api_version, save_images=True, id=task_id, **kwargs) if local_download: assert zipfile.is_zipfile(io.BytesIO(dataset)) else: @@ -754,28 +755,50 @@ def _test_can_export_dataset( return dataset + @pytest.mark.usefixtures("restore_db_per_function") + @pytest.mark.parametrize("api_version", product((1, 2), repeat=2)) + @pytest.mark.parametrize( + "local_download", (True, pytest.param(False, marks=pytest.mark.with_external_services)) + ) + def test_can_export_task_dataset_locally_and_to_cloud_with_both_api_versions( + self, + admin_user, + tasks_with_shapes, + filter_tasks, + api_version: tuple[int], + local_download: bool, + ): + filter_ = "target_storage__location" + if local_download: + filter_ = "exclude_" + filter_ + filtered_ids = {t["id"] for t in filter_tasks(**{filter_: "cloud_storage"})} + + task_id = next(iter(filtered_ids & {t["id"] for t in tasks_with_shapes})) + self._test_can_export_dataset( + admin_user, + task_id, + api_version=api_version, + local_download=local_download, + ) + + @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.parametrize("tid", [21]) @pytest.mark.parametrize( "format_name", ["CVAT for images 1.1", "CVAT for video 1.1", "COCO Keypoints 1.0"] ) def test_can_export_task_with_several_jobs( - self, - admin_user, - tid, - format_name, + self, admin_user, tid, format_name, api_version: int ): self._test_can_export_dataset( admin_user, tid, format=format_name, + api_version=api_version, ) + @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.parametrize("tid", [8]) - def test_can_export_task_to_coco_format( - self, - admin_user: str, - tid: int, - ): + def test_can_export_task_to_coco_format(self, admin_user: str, tid: int, api_version: int): # these annotations contains incorrect frame numbers # in order to check that server handle such cases annotations = { @@ -864,6 +887,7 @@ def test_can_export_task_to_coco_format( admin_user, tid, format="COCO Keypoints 1.0", + api_version=api_version, ) # check that server saved track annotations correctly @@ -875,12 +899,10 @@ def test_can_export_task_to_coco_format( assert annotations["tracks"][0]["shapes"][0]["frame"] == 0 assert annotations["tracks"][0]["elements"][0]["shapes"][0]["frame"] == 0 + @pytest.mark.parametrize("api_version", (1, 2)) @pytest.mark.usefixtures("restore_db_per_function") @pytest.mark.usefixtures("restore_redis_ondisk_per_function") - def test_can_download_task_with_special_chars_in_name( - self, - admin_user: str, - ): + def test_can_download_task_with_special_chars_in_name(self, admin_user: str, api_version: int): # Control characters in filenames may conflict with the Content-Disposition header # value restrictions, as it needs to include the downloaded file name. @@ -896,14 +918,13 @@ def test_can_download_task_with_special_chars_in_name( task_id, _ = create_task(admin_user, task_spec, task_data) - dataset = self._test_can_export_dataset(admin_user, task_id) + dataset = self._test_can_export_dataset(admin_user, task_id, api_version=api_version) assert zipfile.is_zipfile(io.BytesIO(dataset)) @pytest.mark.usefixtures("restore_db_per_function") + @pytest.mark.parametrize("api_version", (1, 2)) def test_export_dataset_after_deleting_related_cloud_storage( - self, - admin_user: str, - tasks, + self, admin_user: str, tasks, api_version: int ): related_field = "target_storage" @@ -920,7 +941,7 @@ def test_export_dataset_after_deleting_related_cloud_storage( result, response = api_client.tasks_api.retrieve(task_id) assert not result[related_field] - self._test_can_export_dataset(admin_user, task["id"]) + self._test_can_export_dataset(admin_user, task["id"], api_version=api_version) @pytest.mark.parametrize( "export_format, default_subset_name, subset_path_template", @@ -930,6 +951,7 @@ def test_export_dataset_after_deleting_related_cloud_storage( ("Ultralytics YOLO Detection 1.0", "train", "images/{subset}"), ], ) + @pytest.mark.parametrize("api_version", (1, 2)) def test_uses_subset_name( self, admin_user, @@ -937,6 +959,7 @@ def test_uses_subset_name( export_format, default_subset_name, subset_path_template, + api_version: int, ): tasks = filter_tasks(exclude_target_storage__location="cloud_storage") group_key_func = itemgetter("subset") @@ -951,6 +974,7 @@ def test_uses_subset_name( dataset = self._test_can_export_dataset( admin_user, task["id"], + api_version=api_version, format=export_format, ) with zipfile.ZipFile(io.BytesIO(dataset)) as zip_file: @@ -973,6 +997,7 @@ def test_datumaro_export_without_annotations_includes_image_info( dataset_file = io.BytesIO( export_dataset( api_client.tasks_api, + api_version=2, id=task["id"], format=DATUMARO_FORMAT_FOR_DIMENSION[dimension], save_images=False, @@ -4089,6 +4114,23 @@ def setup( with make_sdk_client(self.user) as client: self.client = client + @pytest.mark.parametrize("api_version", product((1, 2), repeat=2)) + @pytest.mark.parametrize( + "local_download", (True, pytest.param(False, marks=pytest.mark.with_external_services)) + ) + def test_can_export_backup_with_both_api_versions( + self, filter_tasks, api_version: tuple[int], local_download: bool + ): + task = filter_tasks( + **{("exclude_" if local_download else "") + "target_storage__location": "cloud_storage"} + )[0] + backup = export_task_backup(self.user, api_version, id=task["id"]) + + if local_download: + assert zipfile.is_zipfile(io.BytesIO(backup)) + else: + assert backup is None + def _test_can_export_backup(self, task_id: int): task = self.client.tasks.retrieve(task_id) @@ -5578,6 +5620,7 @@ def test_can_import_datumaro_json(self, admin_user, tasks, dimension): dataset_archive = io.BytesIO( export_dataset( api_client.tasks_api, + api_version=2, id=task["id"], format=DATUMARO_FORMAT_FOR_DIMENSION[dimension], save_images=False, diff --git a/tests/python/rest_api/utils.py b/tests/python/rest_api/utils.py index 460695f8a887..eaa00a8193b2 100644 --- a/tests/python/rest_api/utils.py +++ b/tests/python/rest_api/utils.py @@ -41,6 +41,76 @@ def initialize_export(endpoint: Endpoint, *, expect_forbidden: bool = False, **k return rq_id +def wait_and_download_v1( + endpoint: Endpoint, + *, + max_retries: int = 50, + interval: float = 0.1, + download_result: bool = True, + **kwargs, +) -> Optional[bytes]: + for _ in range(max_retries): + (_, response) = endpoint.call_with_http_info(**kwargs, _parse_response=False) + if response.status in (HTTPStatus.CREATED, HTTPStatus.OK): + break + assert response.status == HTTPStatus.ACCEPTED + sleep(interval) + else: + assert ( + False + ), f"Export process was not finished within allowed time ({interval * max_retries}, sec)" + + if not download_result: + return None + + if response.status == HTTPStatus.CREATED: + (_, response) = endpoint.call_with_http_info( + **kwargs, action="download", _parse_response=False + ) + assert response.status == HTTPStatus.OK + + return response.data or None # return None when export was on cloud storage + + +def export_v1( + endpoint: Endpoint, + *, + max_retries: int = 50, + interval: float = 0.1, + expect_forbidden: bool = False, + wait_result: bool = True, + download_result: bool = True, + **kwargs, +) -> Optional[bytes]: + """Export datasets|annotations|backups using first version of export API + + Args: + endpoint (Endpoint): Export endpoint, will be called to initialize export process and to check status + max_retries (int, optional): Number of retries when checking process status. Defaults to 30. + interval (float, optional): Interval in seconds between retries. Defaults to 0.1. + expect_forbidden (bool, optional): Should export request be forbidden or not. Defaults to False. + wait_result (bool, optional): Wait until export process will be finished. Defaults to True. + download_result (bool, optional): Download exported file. Defaults to True. + + Returns: + bytes: The content of the file if downloaded locally. + None: If `wait_result` or `download_result` were False or the file is downloaded to cloud storage. + """ + # initialize background process and ensure that the first request returns 403 code if request should be forbidden + initialize_export(endpoint, expect_forbidden=expect_forbidden, **kwargs) + + if not wait_result: + return None + + return wait_and_download_v1( + endpoint, + max_retries=max_retries, + interval=interval, + download_result=download_result, + **kwargs, + ) + + def wait_and_download_v2( api_client: ApiClient, rq_id: str, @@ -73,7 +143,7 @@ def wait_and_download_v2( background_request.result_url, auth=(api_client.configuration.username, api_client.configuration.password), ) - assert response.status_code == HTTPStatus.OK, f"Status: {response.status_code}" + assert response.status_code == HTTPStatus.OK return response.content @@ -121,6 +191,9 @@ def export_v2( def export_dataset( api: Union[ProjectsApi, TasksApi, JobsApi], + api_version: Union[ + int, tuple[int] + ], # make this parameter required to be sure that all tests was updated and both API versions are used *, save_images: bool, max_retries: int = 300, @@ -128,52 +201,126 @@ def export_dataset( format: str = "CVAT for images 1.1", # pylint: disable=redefined-builtin **kwargs, ) -> Optional[bytes]: - return export_v2( - api.create_dataset_export_endpoint, - max_retries=max_retries, - interval=interval, - save_images=save_images, - format=format, - **kwargs, - ) + def _get_endpoint_and_kwargs(version: int) -> Endpoint: + extra_kwargs = { + "format": format, + } + if version == 1: + endpoint = ( + api.retrieve_dataset_endpoint if save_images else api.retrieve_annotations_endpoint + ) + else: + endpoint = api.create_dataset_export_endpoint + extra_kwargs["save_images"] = save_images + return endpoint, extra_kwargs + + if api_version == 1: + endpoint, extra_kwargs = _get_endpoint_and_kwargs(api_version) + return export_v1( + endpoint, + max_retries=max_retries, + interval=interval, + **kwargs, + **extra_kwargs, + ) + elif api_version == 2: + endpoint, extra_kwargs = _get_endpoint_and_kwargs(api_version) + return export_v2( + endpoint, + max_retries=max_retries, + interval=interval, + **kwargs, + **extra_kwargs, + ) + elif isinstance(api_version, tuple): + assert len(api_version) == 2, "Expected 2 elements in api_version tuple" + initialize_endpoint, extra_kwargs = _get_endpoint_and_kwargs(api_version[0]) + rq_id = initialize_export(initialize_endpoint, **kwargs, **extra_kwargs) + + if api_version[1] == 1: + endpoint, extra_kwargs = _get_endpoint_and_kwargs(api_version[1]) + return wait_and_download_v1( + endpoint, max_retries=max_retries, interval=interval, **kwargs, **extra_kwargs + ) + else: + return wait_and_download_v2( + api.api_client, rq_id, max_retries=max_retries, interval=interval + ) + + assert False, "Unsupported API version" # FUTURE-TODO: support username: optional, api_client: optional -# tODO: make func signature more userfrendly -def export_project_dataset(username: str, *args, **kwargs) -> Optional[bytes]: +def export_project_dataset( + username: str, api_version: Union[int, tuple[int]], *args, **kwargs +) -> Optional[bytes]: with make_api_client(username) as api_client: - return export_dataset(api_client.projects_api, *args, **kwargs) + return export_dataset(api_client.projects_api, api_version, *args, **kwargs) -def export_task_dataset(username: str, *args, **kwargs) -> Optional[bytes]: +def export_task_dataset( + username: str, api_version: Union[int, tuple[int]], *args, **kwargs +) -> Optional[bytes]: with make_api_client(username) as api_client: - return export_dataset(api_client.tasks_api, *args, **kwargs) + return export_dataset(api_client.tasks_api, api_version, *args, **kwargs) -def export_job_dataset(username: str, *args, **kwargs) -> Optional[bytes]: +def export_job_dataset( + username: str, api_version: Union[int, tuple[int]], *args, **kwargs +) -> Optional[bytes]: with make_api_client(username) as api_client: - return export_dataset(api_client.jobs_api, *args, **kwargs) + return export_dataset(api_client.jobs_api, api_version, *args, **kwargs) def export_backup( api: Union[ProjectsApi, TasksApi], + api_version: Union[ + int, tuple[int] + ], # make this parameter required to be sure that all tests was updated and both API versions are used *, max_retries: int = 50, interval: float = 0.1, **kwargs, ) -> Optional[bytes]: - endpoint = api.create_backup_export_endpoint - return export_v2(endpoint, max_retries=max_retries, interval=interval, **kwargs) + if api_version == 1: + endpoint = api.retrieve_backup_endpoint + return export_v1(endpoint, max_retries=max_retries, interval=interval, **kwargs) + elif api_version == 2: + endpoint = api.create_backup_export_endpoint + return export_v2(endpoint, max_retries=max_retries, interval=interval, **kwargs) + elif isinstance(api_version, tuple): + assert len(api_version) == 2, "Expected 2 elements in api_version tuple" + initialize_endpoint = ( + api.retrieve_backup_endpoint + if api_version[0] == 1 + else api.create_backup_export_endpoint + ) + rq_id = initialize_export(initialize_endpoint, **kwargs) + if api_version[1] == 1: + return wait_and_download_v1( + api.retrieve_backup_endpoint, max_retries=max_retries, interval=interval, **kwargs + ) + else: + return wait_and_download_v2( + api.api_client, rq_id, max_retries=max_retries, interval=interval + ) -def export_project_backup(username: str, *args, **kwargs) -> Optional[bytes]: + assert False, "Unsupported API version" + + +def export_project_backup( + username: str, api_version: Union[int, tuple[int]], *args, **kwargs +) -> Optional[bytes]: with make_api_client(username) as api_client: - return export_backup(api_client.projects_api, *args, **kwargs) + return export_backup(api_client.projects_api, api_version, *args, **kwargs) -def export_task_backup(username: str, *args, **kwargs) -> Optional[bytes]: +def export_task_backup( + username: str, api_version: Union[int, tuple[int]], *args, **kwargs +) -> Optional[bytes]: with make_api_client(username) as api_client: - return export_backup(api_client.tasks_api, *args, **kwargs) + return export_backup(api_client.tasks_api, api_version, *args, **kwargs) def import_resource( From 472763cf4e0c9da6eed68a88d959ff938490588e Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 10 Feb 2025 13:01:32 +0100 Subject: [PATCH 09/14] Sort imports --- cvat/apps/engine/background.py | 2 +- cvat/apps/engine/views.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index d7a8121c6bd6..17ba915f3f2a 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -38,7 +38,7 @@ Task, ) from cvat.apps.engine.permissions import get_cloud_storage_for_import_or_export -from cvat.apps.engine.rq_job_handler import RQId, ExportRQMeta +from cvat.apps.engine.rq_job_handler import ExportRQMeta, RQId from cvat.apps.engine.serializers import RqIdSerializer from cvat.apps.engine.utils import ( build_annotations_file_name, diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 9e06288d3df8..8bcc2e8d522e 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -120,7 +120,12 @@ get_cloud_storage_for_import_or_export, get_iam_context, ) -from cvat.apps.engine.rq_job_handler import RQId, is_rq_job_owner, ImportRQMeta, RQMetaWithFailureInfo +from cvat.apps.engine.rq_job_handler import ( + ImportRQMeta, + RQId, + RQMetaWithFailureInfo, + is_rq_job_owner, +) from cvat.apps.engine.serializers import ( AboutSerializer, AnnotationFileSerializer, From d9eeecbe77d1baed1b9226471d336d23fc21ce65 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 10 Feb 2025 13:10:52 +0100 Subject: [PATCH 10/14] Remove commented code --- cvat/apps/engine/background.py | 2 -- cvat/apps/engine/rq_job_handler.py | 21 +-------------------- cvat/apps/engine/serializers.py | 1 - 3 files changed, 1 insertion(+), 23 deletions(-) diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 17ba915f3f2a..02628354b2d8 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -480,7 +480,6 @@ def setup_background_job( meta = ExportRQMeta.build( request=self.request, db_obj=self.db_instance, - # result_filename=result_filename, result_url=result_url, ) queue.enqueue_call( @@ -762,7 +761,6 @@ def setup_background_job( meta = ExportRQMeta.build( request=self.request, db_obj=self.db_instance, - # result_filename=result_filename, result_url=result_url, ) queue.enqueue_call( diff --git a/cvat/apps/engine/rq_job_handler.py b/cvat/apps/engine/rq_job_handler.py index 9caba569a829..0d6b9c3ac46a 100644 --- a/cvat/apps/engine/rq_job_handler.py +++ b/cvat/apps/engine/rq_job_handler.py @@ -45,21 +45,11 @@ def to_dict(self) -> dict[str, Any]: @attrs.frozen(kw_only=True) class RequestInfo: uuid: str = attrs.field(validator=[str_validator]) - # TODO: it is not timestamp timestamp: datetime = attrs.field(validator=[attrs.validators.instance_of(datetime)]) def to_dict(self) -> dict[str, Any]: return asdict(self) -# FUTURE-TODO: uncomment -# @attrs.frozen(kw_only=True) -# class ExportResultInfo: -# url: str | None = attrs.field(validator=[optional_str_validator]) -# filename: str = attrs.field(validator=[str_validator]) - -# def to_dict(self) -> dict[str, Any]: -# return asdict(self) - @attrs.define class AbstractRQMeta(metaclass=ABCMeta): @@ -198,12 +188,8 @@ def build( @attrs.define(kw_only=True) class ExportRQMeta(BaseRQMeta): + # will be changed to ExportResultInfo in the next PR result_url: str | None = attrs.field(validator=[optional_str_validator]) - # FUTURE-TODO: uncomment - # result: ExportResultInfo = attrs.field( - # converter=lambda d: ExportResultInfo(**d), - # on_setattr=attrs.setters.frozen, - # ) @staticmethod def _get_resettable_fields() -> list[RQJobMetaField]: @@ -218,17 +204,12 @@ def build( request: PatchedRequest, db_obj: Model | None, result_url: str | None, - # result_filename: str, ): base_meta = BaseRQMeta.build(request=request, db_obj=db_obj) return cls( **base_meta, result_url=result_url, - # result=ExportResultInfo( - # filename=result_filename, - # url=result_url, - # ), ).to_dict() diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index ba4ba079b6a9..056c794088c5 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -3606,7 +3606,6 @@ def to_representation(self, rq_job: RQJob) -> dict[str, Any]: if representation["status"] == RQJobStatus.FINISHED: if rq_job.parsed_rq_id.action == models.RequestAction.EXPORT: - # representation["result_url"] = ExportRQMeta.from_job(rq_job).result.url representation["result_url"] = ExportRQMeta.from_job(rq_job).result_url if ( From b8f8a880afa29f16c7206c6578fca257f8d80fe1 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 10 Feb 2025 13:11:32 +0100 Subject: [PATCH 11/14] Fix var usage --- cvat/apps/events/handlers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cvat/apps/events/handlers.py b/cvat/apps/events/handlers.py index 28078e39744a..06d6624ae35e 100644 --- a/cvat/apps/events/handlers.py +++ b/cvat/apps/events/handlers.py @@ -101,7 +101,7 @@ def get_user(instance=None) -> User | dict | None: def _get_user_from_rq_job(rq_job: rq.job.Job) -> dict | None: # RQ jobs created in the chunks queue have no user info try: - return BaseRQMeta.from_job(instance).user.to_dict() + return BaseRQMeta.from_job(rq_job).user.to_dict() except AttributeError: return None @@ -128,7 +128,7 @@ def get_request(instance=None): def _get_request_from_rq_job(rq_job: rq.job.Job) -> dict | None: # RQ jobs created in the chunks queue have no request info try: - return BaseRQMeta.from_job(instance).request.to_dict() + return BaseRQMeta.from_job(rq_job).request.to_dict() except AttributeError: return None From 6208de4679c485f082d330d0ae1a69c067cb12a0 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 10 Feb 2025 16:58:25 +0100 Subject: [PATCH 12/14] Small fixes --- cvat/apps/engine/background.py | 4 +- cvat/apps/engine/backup.py | 2 +- cvat/apps/engine/rq_job_handler.py | 71 ++++++------------------------ cvat/apps/engine/serializers.py | 20 +++++---- cvat/apps/engine/task.py | 2 +- cvat/apps/engine/views.py | 4 +- cvat/apps/lambda_manager/rq.py | 47 ++++++++++++++++++++ cvat/apps/lambda_manager/views.py | 5 ++- 8 files changed, 80 insertions(+), 75 deletions(-) create mode 100644 cvat/apps/lambda_manager/rq.py diff --git a/cvat/apps/engine/background.py b/cvat/apps/engine/background.py index 02628354b2d8..e3683ec51d8c 100644 --- a/cvat/apps/engine/background.py +++ b/cvat/apps/engine/background.py @@ -477,7 +477,7 @@ def setup_background_job( result_url = self.make_result_url() with get_rq_lock_by_user(queue, user_id): - meta = ExportRQMeta.build( + meta = ExportRQMeta.build_for( request=self.request, db_obj=self.db_instance, result_url=result_url, @@ -758,7 +758,7 @@ def setup_background_job( user_id = self.request.user.id with get_rq_lock_by_user(queue, user_id): - meta = ExportRQMeta.build( + meta = ExportRQMeta.build_for( request=self.request, db_obj=self.db_instance, result_url=result_url, diff --git a/cvat/apps/engine/backup.py b/cvat/apps/engine/backup.py index 8304c6d287d2..dff061f0c154 100644 --- a/cvat/apps/engine/backup.py +++ b/cvat/apps/engine/backup.py @@ -1197,7 +1197,7 @@ def _import(importer, request: PatchedRequest, queue, rq_id, Serializer, file_fi user_id = request.user.id with get_rq_lock_by_user(queue, user_id): - meta = ImportRQMeta.build( + meta = ImportRQMeta.build_for( request=request, db_obj=None, tmp_file=filename, diff --git a/cvat/apps/engine/rq_job_handler.py b/cvat/apps/engine/rq_job_handler.py index 0d6b9c3ac46a..1046385f7ab5 100644 --- a/cvat/apps/engine/rq_job_handler.py +++ b/cvat/apps/engine/rq_job_handler.py @@ -83,7 +83,7 @@ def reset_meta_on_retry(self) -> dict[RQJobMetaField, Any]: @attrs.define(kw_only=True) class RQMetaWithFailureInfo(AbstractRQMeta): - # immutable and optional fields + # mutable && optional fields formatted_exception: str | None = attrs.field( validator=[optional_str_validator], default=None, @@ -98,7 +98,6 @@ class RQMetaWithFailureInfo(AbstractRQMeta): @staticmethod def _get_resettable_fields() -> list[RQJobMetaField]: - """Return a list of fields that must be reset on retry""" return [ RQJobMetaField.FORMATTED_EXCEPTION, RQJobMetaField.EXCEPTION_TYPE, @@ -111,12 +110,12 @@ class BaseRQMeta(RQMetaWithFailureInfo): # immutable and required fields user: UserInfo = attrs.field( validator=[attrs.validators.instance_of(UserInfo)], - converter=lambda d: UserInfo(**d), + converter=lambda x: x if isinstance(x, UserInfo) else UserInfo(**x), on_setattr=attrs.setters.frozen, ) request: RequestInfo = attrs.field( validator=[attrs.validators.instance_of(RequestInfo)], - converter=lambda d: RequestInfo(**d), + converter=lambda x: x if isinstance(x, RequestInfo) else RequestInfo(**x), on_setattr=attrs.setters.frozen, ) @@ -137,17 +136,19 @@ class BaseRQMeta(RQMetaWithFailureInfo): validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen ) - # import && lambda + # mutable fields progress: float | None = attrs.field( validator=[optional_float_validator], default=None, on_setattr=_update_value, ) + status: str = attrs.field( + validator=[str_validator], default="", on_setattr=_update_value + ) @staticmethod def _get_resettable_fields() -> list[RQJobMetaField]: - """Return a list of fields that must be reset on retry""" - return RQMetaWithFailureInfo._get_resettable_fields() + [RQJobMetaField.PROGRESS] + return RQMetaWithFailureInfo._get_resettable_fields() + [RQJobMetaField.PROGRESS, RQJobMetaField.STATUS] @classmethod def build( @@ -189,16 +190,15 @@ def build( @attrs.define(kw_only=True) class ExportRQMeta(BaseRQMeta): # will be changed to ExportResultInfo in the next PR - result_url: str | None = attrs.field(validator=[optional_str_validator]) + result_url: str | None = attrs.field(validator=[optional_str_validator], default=None) @staticmethod def _get_resettable_fields() -> list[RQJobMetaField]: - """Return a list of fields that must be reset on retry""" base_fields = BaseRQMeta._get_resettable_fields() return base_fields + [RQJobMetaField.RESULT] @classmethod - def build( + def build_for( cls, *, request: PatchedRequest, @@ -221,27 +221,18 @@ class ImportRQMeta(BaseRQMeta): ) # mutable fields - # TODO: move into base? - status: str = attrs.field( - validator=[optional_str_validator], default="", on_setattr=_update_value - ) task_progress: float | None = attrs.field( validator=[optional_float_validator], default=None, on_setattr=_update_value - ) + ) # used when importing project dataset @staticmethod def _get_resettable_fields() -> list[RQJobMetaField]: - """Return a list of fields that must be reset on retry""" base_fields = BaseRQMeta._get_resettable_fields() - return base_fields + [ - RQJobMetaField.PROGRESS, - RQJobMetaField.TASK_PROGRESS, - RQJobMetaField.STATUS, - ] + return base_fields + [RQJobMetaField.TASK_PROGRESS] @classmethod - def build( + def build_for( cls, *, request: PatchedRequest, @@ -255,42 +246,6 @@ def build( tmp_file=tmp_file, ).to_dict() - -@attrs.define(kw_only=True) -class LambdaRQMeta(BaseRQMeta): - # immutable fields - function_id: int | None = attrs.field( - validator=[optional_int_validator], default=None, on_setattr=attrs.setters.frozen - ) - lambda_: bool | None = attrs.field( - validator=[optional_bool_validator], - init=False, - default=True, - on_setattr=attrs.setters.frozen, - ) - - def to_dict(self) -> dict: - d = asdict(self) - if v := d.pop(RQJobMetaField.LAMBDA + "_", None) is not None: - d[RQJobMetaField.LAMBDA] = v - - return d - - @classmethod - def build( - cls, - *, - request: PatchedRequest, - db_obj: Model, - function_id: int, - ): - base_meta = BaseRQMeta.build(request=request, db_obj=db_obj) - return cls( - **base_meta, - function_id=function_id, - ).to_dict() - - class RQJobMetaField: # common fields FORMATTED_EXCEPTION = "formatted_exception" diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 056c794088c5..fc12af4a9d2f 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -44,7 +44,6 @@ BaseRQMeta, ExportRQMeta, ImportRQMeta, - LambdaRQMeta, RequestAction, RQId, ) @@ -60,6 +59,7 @@ reverse, take_by, ) +from cvat.apps.lambda_manager.rq import LambdaRQMeta from utils.dataset_manifest import ImageManifestManager slogger = ServerLogManager(__name__) @@ -3553,17 +3553,19 @@ class RequestSerializer(serializers.Serializer): result_url = serializers.URLField(required=False, allow_null=True) result_id = serializers.IntegerField(required=False, allow_null=True) + def __init__(self, *args, **kwargs): + self._base_rq_job_meta: BaseRQMeta | None = None + super().__init__(*args, **kwargs) + @extend_schema_field(UserIdentifiersSerializer()) def get_owner(self, rq_job: RQJob) -> dict[str, Any]: - # TODO: define parsed meta once - rq_job_meta = BaseRQMeta.from_job(rq_job) - return UserIdentifiersSerializer(rq_job_meta.user.to_dict()).data + assert self._base_rq_job_meta + return UserIdentifiersSerializer(self._base_rq_job_meta.user.to_dict()).data @extend_schema_field( serializers.FloatField(min_value=0, max_value=1, required=False, allow_null=True) ) def get_progress(self, rq_job: RQJob) -> Decimal: - # TODO: define parsed meta once rq_job_meta = ImportRQMeta.from_job(rq_job) # progress of task creation is stored in "task_progress" field # progress of project import is stored in "progress" field @@ -3585,19 +3587,19 @@ def get_expiry_date(self, rq_job: RQJob) -> Optional[str]: @extend_schema_field(serializers.CharField(allow_blank=True)) def get_message(self, rq_job: RQJob) -> str: - # TODO: define parsed meta once - rq_job_meta = ImportRQMeta.from_job(rq_job) + assert self._base_rq_job_meta rq_job_status = rq_job.get_status() message = '' if RQJobStatus.STARTED == rq_job_status: - message = rq_job_meta.status + message = self._base_rq_job_meta.status elif RQJobStatus.FAILED == rq_job_status: - message = rq_job_meta.formatted_exception or parse_exception_message(str(rq_job.exc_info or "Unknown error")) + message = self._base_rq_job_meta.formatted_exception or parse_exception_message(str(rq_job.exc_info or "Unknown error")) return message def to_representation(self, rq_job: RQJob) -> dict[str, Any]: + self._base_rq_job_meta = BaseRQMeta.from_job(rq_job) representation = super().to_representation(rq_job) # FUTURE-TODO: support such statuses on UI diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 9533807ac1ac..d5835639a0bf 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -83,7 +83,7 @@ def create( func=_create_thread, args=(db_task.pk, data), job_id=rq_id, - meta=ImportRQMeta.build(request=request, db_obj=db_task), + meta=ImportRQMeta.build_for(request=request, db_obj=db_task), depends_on=define_dependent_job(q, user_id), failure_ttl=settings.IMPORT_CACHE_FAILED_TTL.total_seconds(), ) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 8bcc2e8d522e..086749e9db02 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -3446,7 +3446,7 @@ def _import_annotations(request, rq_id_factory, rq_func, db_obj, format_name, user_id = request.user.id with get_rq_lock_by_user(queue, user_id): - meta = ImportRQMeta.build(request=request, db_obj=db_obj, tmp_file=filename) + meta = ImportRQMeta.build_for(request=request, db_obj=db_obj, tmp_file=filename) rq_job = queue.enqueue_call( func=func, args=func_args, @@ -3548,7 +3548,7 @@ def _import_project_dataset( user_id = request.user.id with get_rq_lock_by_user(queue, user_id): - meta = ImportRQMeta.build(request=request, db_obj=db_obj, tmp_file=filename) + meta = ImportRQMeta.build_for(request=request, db_obj=db_obj, tmp_file=filename) rq_job = queue.enqueue_call( func=func, args=func_args, diff --git a/cvat/apps/lambda_manager/rq.py b/cvat/apps/lambda_manager/rq.py new file mode 100644 index 000000000000..5d6cd6f3f52a --- /dev/null +++ b/cvat/apps/lambda_manager/rq.py @@ -0,0 +1,47 @@ +# Copyright (C) CVAT.ai Corporation +# +# SPDX-License-Identifier: MIT + +from __future__ import annotations + +import attrs +from attrs import asdict +from django.db.models import Model + +from cvat.apps.engine.middleware import PatchedRequest +from cvat.apps.engine.rq_job_handler import BaseRQMeta, RQJobMetaField + + +@attrs.define(kw_only=True) +class LambdaRQMeta(BaseRQMeta): + # immutable fields + function_id: int = attrs.field( + validator=[attrs.validators.instance_of(int)], default=None, on_setattr=attrs.setters.frozen + ) + lambda_: bool = attrs.field( + validator=[attrs.validators.instance_of(bool)], + init=False, + default=True, + on_setattr=attrs.setters.frozen, + ) + + def to_dict(self) -> dict: + d = asdict(self) + if v := d.pop(RQJobMetaField.LAMBDA + "_", None) is not None: + d[RQJobMetaField.LAMBDA] = v + + return d + + @classmethod + def build_for( + cls, + *, + request: PatchedRequest, + db_obj: Model, + function_id: int, + ): + base_meta = BaseRQMeta.build(request=request, db_obj=db_obj) + return cls( + **base_meta, + function_id=function_id, + ).to_dict() diff --git a/cvat/apps/lambda_manager/views.py b/cvat/apps/lambda_manager/views.py index bf2308173db9..54d4574e0bdf 100644 --- a/cvat/apps/lambda_manager/views.py +++ b/cvat/apps/lambda_manager/views.py @@ -46,13 +46,14 @@ SourceType, Task, ) -from cvat.apps.engine.rq_job_handler import LambdaRQMeta, RQId +from cvat.apps.engine.rq_job_handler import RQId from cvat.apps.engine.serializers import LabeledDataSerializer from cvat.apps.engine.utils import define_dependent_job, get_rq_lock_by_user from cvat.apps.events.handlers import handle_function_call from cvat.apps.iam.filters import ORGANIZATION_OPEN_API_PARAMETERS from cvat.apps.lambda_manager.models import FunctionKind from cvat.apps.lambda_manager.permissions import LambdaPermission +from cvat.apps.lambda_manager.rq import LambdaRQMeta from cvat.apps.lambda_manager.serializers import ( FunctionCallRequestSerializer, FunctionCallSerializer, @@ -640,7 +641,7 @@ def enqueue( user_id = request.user.id with get_rq_lock_by_user(queue, user_id): - meta = LambdaRQMeta.build( + meta = LambdaRQMeta.build_for( request=request, db_obj=Job.objects.get(pk=job) if job else Task.objects.get(pk=task), function_id=lambda_func.id, From c9bbe486cdd3f025f074137edc810ba978ee0fc9 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Mon, 10 Feb 2025 18:02:43 +0100 Subject: [PATCH 13/14] Fix types --- cvat/apps/engine/rq_job_handler.py | 9 +++++++-- cvat/apps/engine/views.py | 2 +- cvat/apps/lambda_manager/rq.py | 6 +++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/cvat/apps/engine/rq_job_handler.py b/cvat/apps/engine/rq_job_handler.py index 1046385f7ab5..922e6406951d 100644 --- a/cvat/apps/engine/rq_job_handler.py +++ b/cvat/apps/engine/rq_job_handler.py @@ -83,17 +83,22 @@ def reset_meta_on_retry(self) -> dict[RQJobMetaField, Any]: @attrs.define(kw_only=True) class RQMetaWithFailureInfo(AbstractRQMeta): + # mutable && optional fields formatted_exception: str | None = attrs.field( validator=[optional_str_validator], default=None, on_setattr=_update_value, ) - exc_type: str | None = attrs.field( - validator=[optional_str_validator], + exc_type: type[Exception] | None = attrs.field( default=None, on_setattr=_update_value, ) + @exc_type.validator + def _check_exc_type(self, attribute: attrs.Attribute, value: Any): + if value is not None and not issubclass(value, Exception): + raise ValueError("Wrong exception type") + exc_args: Iterable | None = attrs.field(default=None, on_setattr=_update_value) @staticmethod diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 086749e9db02..5d748eb629e0 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -3355,7 +3355,7 @@ def perform_destroy(self, instance): super().perform_destroy(instance) target.touch() -def rq_exception_handler(rq_job: RQJob, exc_type: type[Exception], exc_value, tb): +def rq_exception_handler(rq_job: RQJob, exc_type: type[Exception], exc_value: Exception, tb): rq_job_meta = RQMetaWithFailureInfo.from_job(rq_job) rq_job_meta.formatted_exception = "".join( traceback.format_exception_only(exc_type, exc_value)) diff --git a/cvat/apps/lambda_manager/rq.py b/cvat/apps/lambda_manager/rq.py index 5d6cd6f3f52a..c75a48e7b9e1 100644 --- a/cvat/apps/lambda_manager/rq.py +++ b/cvat/apps/lambda_manager/rq.py @@ -15,8 +15,8 @@ @attrs.define(kw_only=True) class LambdaRQMeta(BaseRQMeta): # immutable fields - function_id: int = attrs.field( - validator=[attrs.validators.instance_of(int)], default=None, on_setattr=attrs.setters.frozen + function_id: str = attrs.field( + validator=[attrs.validators.instance_of(str)], default=None, on_setattr=attrs.setters.frozen ) lambda_: bool = attrs.field( validator=[attrs.validators.instance_of(bool)], @@ -38,7 +38,7 @@ def build_for( *, request: PatchedRequest, db_obj: Model, - function_id: int, + function_id: str, ): base_meta = BaseRQMeta.build(request=request, db_obj=db_obj) return cls( From 3b9aefc116c8f4432ed8e7607b11ee6ee109ab1c Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Wed, 12 Feb 2025 10:55:53 +0100 Subject: [PATCH 14/14] Fix meta update --- cvat/apps/engine/rq_job_handler.py | 27 +++++++++++++----------- cvat/apps/lambda_manager/rq.py | 34 ++++++++++++++++++++++++------ cvat/apps/lambda_manager/views.py | 4 ++-- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/cvat/apps/engine/rq_job_handler.py b/cvat/apps/engine/rq_job_handler.py index 922e6406951d..be7d7969be65 100644 --- a/cvat/apps/engine/rq_job_handler.py +++ b/cvat/apps/engine/rq_job_handler.py @@ -28,9 +28,6 @@ optional_float_validator = attrs.validators.optional(attrs.validators.instance_of(float)) -def _update_value(self: AbstractRQMeta, attribute: attrs.Attribute, value: Any): - self._job.meta[attribute.name] = value - @attrs.frozen(kw_only=True) class UserInfo: @@ -55,12 +52,15 @@ def to_dict(self) -> dict[str, Any]: class AbstractRQMeta(metaclass=ABCMeta): _job: RQJob | None = attrs.field(init=False, default=None) + def update_value(self, attribute: attrs.Attribute, value: Any): + self._job.meta[attribute.name] = value + def to_dict(self) -> dict: - return asdict(self, filter=lambda k, _: k.name != "_job") + return asdict(self, filter=lambda k, _: not k.name.startswith("_")) @classmethod def from_job(cls, rq_job: RQJob): - keys_to_keep = [k.name for k in attrs.fields(cls)] + keys_to_keep = [k.name for k in attrs.fields(cls) if not k.name.startswith("_")] meta = cls(**{k: v for k, v in rq_job.meta.items() if k in keys_to_keep}) meta._job = rq_job @@ -80,26 +80,29 @@ def reset_meta_on_retry(self) -> dict[RQJobMetaField, Any]: return {k: v for k, v in self._job.meta.items() if k not in resettable_fields} +on_setattr = attrs.setters.pipe(attrs.setters.validate, AbstractRQMeta.update_value) @attrs.define(kw_only=True) class RQMetaWithFailureInfo(AbstractRQMeta): - # mutable && optional fields formatted_exception: str | None = attrs.field( validator=[optional_str_validator], default=None, - on_setattr=_update_value, + on_setattr=on_setattr, ) exc_type: type[Exception] | None = attrs.field( default=None, - on_setattr=_update_value, + on_setattr=on_setattr, ) @exc_type.validator def _check_exc_type(self, attribute: attrs.Attribute, value: Any): if value is not None and not issubclass(value, Exception): raise ValueError("Wrong exception type") - exc_args: Iterable | None = attrs.field(default=None, on_setattr=_update_value) + exc_args: Iterable | None = attrs.field( + default=None, + on_setattr=on_setattr + ) @staticmethod def _get_resettable_fields() -> list[RQJobMetaField]: @@ -145,10 +148,10 @@ class BaseRQMeta(RQMetaWithFailureInfo): progress: float | None = attrs.field( validator=[optional_float_validator], default=None, - on_setattr=_update_value, + on_setattr=on_setattr, ) status: str = attrs.field( - validator=[str_validator], default="", on_setattr=_update_value + validator=[str_validator], default="", on_setattr=on_setattr ) @staticmethod @@ -227,7 +230,7 @@ class ImportRQMeta(BaseRQMeta): # mutable fields task_progress: float | None = attrs.field( - validator=[optional_float_validator], default=None, on_setattr=_update_value + validator=[optional_float_validator], default=None, on_setattr=on_setattr ) # used when importing project dataset @staticmethod diff --git a/cvat/apps/lambda_manager/rq.py b/cvat/apps/lambda_manager/rq.py index c75a48e7b9e1..e864f9069a01 100644 --- a/cvat/apps/lambda_manager/rq.py +++ b/cvat/apps/lambda_manager/rq.py @@ -5,11 +5,11 @@ from __future__ import annotations import attrs -from attrs import asdict from django.db.models import Model +from rq.job import Job as RQJob from cvat.apps.engine.middleware import PatchedRequest -from cvat.apps.engine.rq_job_handler import BaseRQMeta, RQJobMetaField +from cvat.apps.engine.rq_job_handler import BaseRQMeta, RQJobMetaField, on_setattr @attrs.define(kw_only=True) @@ -20,15 +20,34 @@ class LambdaRQMeta(BaseRQMeta): ) lambda_: bool = attrs.field( validator=[attrs.validators.instance_of(bool)], - init=False, - default=True, + default=False, on_setattr=attrs.setters.frozen, ) + # FUTURE-FIXME: progress should be in [0, 1] range + progress: float | None = attrs.field( + validator=[attrs.validators.optional(attrs.validators.instance_of(int))], + default=None, + on_setattr=on_setattr, + ) + + @classmethod + def from_job(cls, rq_job: RQJob): + keys_to_keep = [k.name for k in attrs.fields(cls) if not k.name.startswith("_")] + params = {} + for k, v in rq_job.meta.items(): + if k in keys_to_keep: + params[k] = v + elif k == RQJobMetaField.LAMBDA: + params[RQJobMetaField.LAMBDA + "_"] = v + meta = cls(**params) + meta._job = rq_job + + return meta + def to_dict(self) -> dict: - d = asdict(self) - if v := d.pop(RQJobMetaField.LAMBDA + "_", None) is not None: - d[RQJobMetaField.LAMBDA] = v + d = super().to_dict() + d[RQJobMetaField.LAMBDA] = d.pop(RQJobMetaField.LAMBDA + "_") return d @@ -44,4 +63,5 @@ def build_for( return cls( **base_meta, function_id=function_id, + lambda_=True, ).to_dict() diff --git a/cvat/apps/lambda_manager/views.py b/cvat/apps/lambda_manager/views.py index 54d4574e0bdf..b4e5a5f19364 100644 --- a/cvat/apps/lambda_manager/views.py +++ b/cvat/apps/lambda_manager/views.py @@ -591,7 +591,7 @@ def get_jobs(self): ) jobs = queue.job_class.fetch_many(job_ids, queue.connection) - return [LambdaJob(job) for job in jobs if job and job.meta.get("lambda")] + return [LambdaJob(job) for job in jobs if job and LambdaRQMeta.from_job(job).lambda_] def enqueue( self, @@ -702,7 +702,7 @@ def to_dict(self): ), }, "status": self.job.get_status(), - "progress": self.job.meta.get("progress", 0), + "progress": LambdaRQMeta.from_job(self.job).progress, "enqueued": self.job.enqueued_at, "started": self.job.started_at, "ended": self.job.ended_at,