From 22256afdc68848b092b532e77c441d27a1ac0937 Mon Sep 17 00:00:00 2001 From: kushagra Date: Thu, 10 Nov 2022 00:02:56 +0100 Subject: [PATCH 01/29] migrated: FOCA v0.6.0 -> v0.10.0 --- cwl_wes/__init__.py | 1 - ...40.workflow_execution_service.swagger.yaml | 18 +- cwl_wes/api/register_openapi.py | 96 --- cwl_wes/app.py | 77 +-- cwl_wes/celery_worker.py | 12 - cwl_wes/config.py | 36 -- cwl_wes/config.yaml | 165 +++++ cwl_wes/config/__init__.py | 0 cwl_wes/config/app_config.py | 57 -- cwl_wes/config/app_config.yaml | 111 ---- cwl_wes/config/log_config.yaml | 33 - cwl_wes/custom_config.py | 121 ++++ cwl_wes/database/__init__.py | 0 cwl_wes/database/register_mongodb.py | 100 --- cwl_wes/errors/__init__.py | 0 cwl_wes/errors/errors.py | 97 --- cwl_wes/exceptions.py | 62 ++ cwl_wes/factories/__init__.py | 0 cwl_wes/factories/celery_app.py | 57 -- cwl_wes/factories/connexion_app.py | 73 --- cwl_wes/ga4gh/wes/endpoints/cancel_run.py | 17 +- cwl_wes/ga4gh/wes/endpoints/get_run_log.py | 8 +- cwl_wes/ga4gh/wes/endpoints/get_run_status.py | 8 +- .../ga4gh/wes/endpoints/get_service_info.py | 2 +- cwl_wes/ga4gh/wes/endpoints/list_runs.py | 13 +- cwl_wes/ga4gh/wes/endpoints/run_workflow.py | 66 +- cwl_wes/ga4gh/wes/server.py | 49 +- cwl_wes/ga4gh/wes/service_info.py | 95 +++ cwl_wes/gunicorn.py | 31 + cwl_wes/security/__init__.py | 0 cwl_wes/security/cors.py | 17 - cwl_wes/security/decorators.py | 612 ------------------ cwl_wes/tasks/celery_task_monitor.py | 3 +- cwl_wes/tasks/register_celery.py | 29 +- cwl_wes/tasks/tasks/cancel_run.py | 19 +- cwl_wes/tasks/tasks/run_workflow.py | 2 +- cwl_wes/tasks/utils.py | 2 +- cwl_wes/{api => utils}/__init__.py | 0 cwl_wes/{database => utils}/db_utils.py | 23 - cwl_wes/worker.py | 8 + cwl_wes/wsgi.py | 4 +- 41 files changed, 597 insertions(+), 1527 deletions(-) delete mode 100644 cwl_wes/__init__.py delete mode 100644 cwl_wes/api/register_openapi.py delete mode 100644 cwl_wes/celery_worker.py delete mode 100644 cwl_wes/config.py create mode 100644 cwl_wes/config.yaml delete mode 100644 cwl_wes/config/__init__.py delete mode 100644 cwl_wes/config/app_config.py delete mode 100644 cwl_wes/config/app_config.yaml delete mode 100644 cwl_wes/config/log_config.yaml create mode 100644 cwl_wes/custom_config.py delete mode 100644 cwl_wes/database/__init__.py delete mode 100644 cwl_wes/database/register_mongodb.py delete mode 100644 cwl_wes/errors/__init__.py delete mode 100644 cwl_wes/errors/errors.py create mode 100644 cwl_wes/exceptions.py delete mode 100644 cwl_wes/factories/__init__.py delete mode 100644 cwl_wes/factories/celery_app.py delete mode 100644 cwl_wes/factories/connexion_app.py create mode 100644 cwl_wes/ga4gh/wes/service_info.py create mode 100644 cwl_wes/gunicorn.py delete mode 100644 cwl_wes/security/__init__.py delete mode 100644 cwl_wes/security/cors.py delete mode 100644 cwl_wes/security/decorators.py rename cwl_wes/{api => utils}/__init__.py (100%) rename cwl_wes/{database => utils}/db_utils.py (75%) create mode 100644 cwl_wes/worker.py diff --git a/cwl_wes/__init__.py b/cwl_wes/__init__.py deleted file mode 100644 index a842d05..0000000 --- a/cwl_wes/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = '0.15.0' diff --git a/cwl_wes/api/20181010.be85140.workflow_execution_service.swagger.yaml b/cwl_wes/api/20181010.be85140.workflow_execution_service.swagger.yaml index e2b686b..54603f4 100644 --- a/cwl_wes/api/20181010.be85140.workflow_execution_service.swagger.yaml +++ b/cwl_wes/api/20181010.be85140.workflow_execution_service.swagger.yaml @@ -15,8 +15,7 @@ paths: summary: Get information about Workflow Execution Service. description: |- May include information related (but not limited to) the workflow descriptor formats, versions supported, the WES API versions supported, and information about general service availability. - x-swagger-router-controller: ga4gh.wes.server - operationId: GetServiceInfo + operationId: "ga4gh.wes.server.GetServiceInfo" responses: '200': description: '' @@ -51,8 +50,7 @@ paths: contents of the list reflect the workflow list at the moment that the first page is requested. To monitor a specific workflow run, use GetRunStatus or GetRunLog. - x-swagger-router-controller: ga4gh.wes.server - operationId: ListRuns + operationId: "ga4gh.wes.server.ListRuns" responses: '200': description: '' @@ -146,8 +144,7 @@ paths: See the `RunRequest` documentation for details about other fields. - x-swagger-router-controller: ga4gh.wes.server - operationId: RunWorkflow + operationId: "ga4gh.wes.server.RunWorkflow" responses: '200': description: '' @@ -216,8 +213,7 @@ paths: (if available), a log object which allows the stderr and stdout to be retrieved, a log array so stderr/stdout for individual tasks can be retrieved, and the overall state of the workflow run (e.g. RUNNING, see the State section). - x-swagger-router-controller: ga4gh.wes.server - operationId: GetRunLog + operationId: "ga4gh.wes.server.GetRunLog" responses: '200': description: '' @@ -249,8 +245,7 @@ paths: /runs/{run_id}/cancel: post: summary: Cancel a running workflow. - x-swagger-router-controller: ga4gh.wes.server - operationId: CancelRun + operationId: "ga4gh.wes.server.CancelRun" responses: '200': description: '' @@ -286,8 +281,7 @@ paths: This provides an abbreviated (and likely fast depending on implementation) status of the running workflow, returning a simple result with the overall state of the workflow run (e.g. RUNNING, see the State section). - x-swagger-router-controller: ga4gh.wes.server - operationId: GetRunStatus + operationId: "ga4gh.wes.server.GetRunStatus" responses: '200': description: '' diff --git a/cwl_wes/api/register_openapi.py b/cwl_wes/api/register_openapi.py deleted file mode 100644 index 0ad8f57..0000000 --- a/cwl_wes/api/register_openapi.py +++ /dev/null @@ -1,96 +0,0 @@ -"""Functions for amending OpenAPI specs and registering them with a Connexion -app instance.""" - -import logging -import os -from shutil import copyfile -from typing import (List, Dict) - -from connexion import App - -from foca.config.config_parser import get_conf - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def register_openapi( - app: App, - specs: List[Dict] = [], - add_security_definitions: bool = True -) -> App: - """Registers OpenAPI specs with Connexion app.""" - # Iterate over list of API specs - for spec in specs: - - # Get _this_ directory - path = os.path.join( - os.path.abspath( - os.path.dirname( - os.path.realpath(__file__) - ) - ), - get_conf(spec, 'path') - ) - - # Add security definitions to copy of specs - if add_security_definitions: - path = __add_security_definitions(in_file=path) - - # Generate API endpoints from OpenAPI spec - try: - app.add_api( - path, - strict_validation=get_conf(spec, 'strict_validation'), - validate_responses=get_conf(spec, 'validate_responses'), - swagger_ui=get_conf(spec, 'swagger_ui'), - swagger_json=get_conf(spec, 'swagger_json'), - ) - - logger.info("API endpoints specified in '{path}' added.".format( - path=path, - )) - - except (FileNotFoundError, PermissionError) as e: - logger.critical( - ( - "API specification file not found or accessible at " - "'{path}'. Execution aborted. Original error message: " - "{type}: {msg}" - ).format( - path=path, - type=type(e).__name__, - msg=e, - ) - ) - raise SystemExit(1) - - return(app) - - -def __add_security_definitions( - in_file: str, - ext: str = 'modified.yaml' -) -> str: - """Adds 'securityDefinitions' section to OpenAPI YAML specs.""" - # Set security definitions - amend = ''' - -# Amended by cwl-WES -securityDefinitions: - jwt: - type: apiKey - name: Authorization - in: header -''' - - # Create copy for modification - out_file: str = '.'.join([os.path.splitext(in_file)[0], ext]) - copyfile(in_file, out_file) - - # Append security definitions - with open(out_file, 'a') as mod: - mod.write(amend) - - return out_file diff --git a/cwl_wes/app.py b/cwl_wes/app.py index 76ddd89..9392b4b 100644 --- a/cwl_wes/app.py +++ b/cwl_wes/app.py @@ -1,61 +1,38 @@ -"""Entry point to start service.""" - -from cwl_wes.api.register_openapi import register_openapi -from cwl_wes.config.app_config import parse_app_config -from foca.config.config_parser import (get_conf, get_conf_type) -from foca.config.log_config import configure_logging -from cwl_wes.database.register_mongodb import register_mongodb -from cwl_wes.errors.errors import register_error_handlers -from cwl_wes.factories.connexion_app import create_connexion_app -from cwl_wes.tasks.register_celery import register_task_service -from cwl_wes.security.cors import enable_cors - - -def run_server(): - - # Configure logger - configure_logging(config_var='WES_CONFIG_LOG') +"""cwl-WES application entry point.""" - # Parse app configuration - config = parse_app_config(config_var='WES_CONFIG') +from pathlib import Path - # Create Connexion app - connexion_app = create_connexion_app(config) +from connexion import App +from flask import current_app +from foca import Foca - # Register MongoDB - connexion_app.app = register_mongodb(connexion_app.app) - - # Register error handlers - connexion_app = register_error_handlers(connexion_app) +from cwl_wes.tasks.register_celery import register_task_service +from cwl_wes.ga4gh.wes.service_info import ServiceInfo +from cwl_wes.exceptions import NotFound - # Create Celery app and register background task monitoring service - register_task_service(connexion_app.app) - # Register OpenAPI specs - connexion_app = register_openapi( - app=connexion_app, - specs=get_conf_type( - config, - 'api', - 'specs', - types=(list), - ), - add_security_definitions=get_conf( - config, - 'security', - 'authorization_required' - ) +def init_app() -> App: + foca = Foca( + config_file="config.yaml", + custom_config_model='cwl_wes.custom_config.CustomConfig', ) + app = foca.create_app() + with app.app.app_context(): + service_info = ServiceInfo() + try: + service_info = service_info.get_service_info() + except NotFound: + service_info.set_service_info( + data=current_app.config.foca.custom.service_info.dict() + ) + register_task_service(app) + return app - # Enable cross-origin resource sharing - enable_cors(connexion_app.app) - return connexion_app, config +def run_app(app: App) -> None: + app.run(port=app.port) if __name__ == '__main__': - connexion_app, config = run_server() - # Run app - connexion_app.run( - use_reloader=get_conf(config, 'server', 'use_reloader') - ) + app = init_app() + run_app(app) diff --git a/cwl_wes/celery_worker.py b/cwl_wes/celery_worker.py deleted file mode 100644 index 2dbf1c3..0000000 --- a/cwl_wes/celery_worker.py +++ /dev/null @@ -1,12 +0,0 @@ -"""Entry point for Celery workers.""" - -from cwl_wes.config.app_config import parse_app_config -from cwl_wes.factories.celery_app import create_celery_app -from cwl_wes.factories.connexion_app import create_connexion_app - - -# Parse app configuration -config = parse_app_config(config_var='WES_CONFIG') - -# Create Celery app -celery = create_celery_app(create_connexion_app(config).app) diff --git a/cwl_wes/config.py b/cwl_wes/config.py deleted file mode 100644 index a66aaba..0000000 --- a/cwl_wes/config.py +++ /dev/null @@ -1,36 +0,0 @@ -import os - -from foca.config.config_parser import get_conf -from cwl_wes.config.app_config import parse_app_config - -# Source the WES config for defaults -flask_config = parse_app_config(config_var='WES_CONFIG') - -# Gunicorn number of workers and threads -workers = int(os.environ.get('GUNICORN_PROCESSES', '1')) -threads = int(os.environ.get('GUNICORN_THREADS', '1')) - -forwarded_allow_ips = '*' - -# Gunicorn bind address -bind = '{address}:{port}'.format( - address=get_conf(flask_config, 'server', 'host'), - port=get_conf(flask_config, 'server', 'port'), -) - -# Source the environment variables for the Gunicorn workers -raw_env = [ - "WES_CONFIG=%s" % os.environ.get('WES_CONFIG', ''), - "RABBIT_HOST=%s" % os.environ.get( - 'RABBIT_HOST', get_conf(flask_config, 'celery', 'broker_host')), - "RABBIT_PORT=%s" % os.environ.get( - 'RABBIT_PORT', get_conf(flask_config, 'celery', 'broker_port')), - "MONGO_HOST=%s" % os.environ.get( - 'MONGO_HOST', get_conf(flask_config, 'database', 'host')), - "MONGO_PORT=%s" % os.environ.get( - 'MONGO_PORT', get_conf(flask_config, 'database', 'port')), - "MONGO_DBNAME=%s" % os.environ.get( - 'MONGO_DBNAME', get_conf(flask_config, 'database', 'name')), - "MONGO_USERNAME=%s" % os.environ.get('MONGO_USERNAME', ''), - "MONGO_PASSWORD=%s" % os.environ.get('MONGO_PASSWORD', '') -] diff --git a/cwl_wes/config.yaml b/cwl_wes/config.yaml new file mode 100644 index 0000000..3707ecb --- /dev/null +++ b/cwl_wes/config.yaml @@ -0,0 +1,165 @@ +# FOCA configuration +# Available in app context as attributes of `current_app.config.foca` +# Automatically validated via FOCA +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html + +# Server configuration +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.ServerConfig +server: + host: '0.0.0.0' + port: 8080 + debug: True + environment: development + testing: False + use_reloader: True + +# Security configuration +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.SecurityConfig +security: + auth: + required: False + add_key_to_claims: True + algorithms: + - RS256 + allow_expired: False + audience: null + validation_methods: + - userinfo + - public_key + validation_checks: all + +# Database configuration +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.DBConfig +db: + host: mongodb + port: 27017 + dbs: + cwl-wes-db: + collections: + runs: + indexes: + - keys: + run_id: 1 + task_id: 1 + options: + 'unique': True + 'sparse': True + service_info: + indexes: + - keys: + id: 1 + options: + 'unique': True + +# API configuration +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.APIConfig +api: + specs: + - path: + - api/20181010.be85140.workflow_execution_service.swagger.yaml + add_operation_fields: + x-openapi-router-controller: cwl_wes.ga4gh.wes.controllers + add_security_fields: + x-bearerInfoFunc: app.validate_token + disable_auth: True + connexion: + strict_validation: True + validate_responses: False + options: + swagger_ui: True + serve_spec: True + +# Logging configuration +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.LogConfig +log: + version: 1 + disable_existing_loggers: False + formatters: + standard: + class: logging.Formatter + style: "{" + format: "[{asctime}: {levelname:<8}] {message} [{name}]" + long: + class: logging.Formatter + style: "{" + format: "[{asctime}: {levelname:<8}] {message} [{name}]" + handlers: + console: + class: logging.StreamHandler + level: 20 + formatter: standard + stream: ext://sys.stderr + root: + level: 10 + handlers: [console] + +# Background job configuration +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.JobsConfig +jobs: + host: rabbitmq + port: 5672 + backend: 'rpc://' + include: + - cwl_wes.tasks.tasks.run_workflow + - cwl_wes.tasks.tasks.cancel_run + +# Exception configuration +# Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.ExceptionConfig +exceptions: + required_members: [['message'], ['code']] + status_member: ['code'] + exceptions: cwl_wes.exceptions.exceptions + +# Custom configuration +# Available in app context as attributes of `current_app.config.foca` +custom: + storage: + permanent_dir: '/data/output' + tmp_dir: '/data/tmp' + remote_storage_url: 'ftp://ftp-private.ebi.ac.uk/upload/foivos' + celery: + monitor: + timeout: 0.1 + message_maxsize: 16777216 + endpoint_params: + default_page_size: 5 + timeout_cancel_run: 60 + timeout_run_workflow: null + service_info: + contact_info: 'https://github.com/elixir-cloud-aai/cwl-WES' + auth_instructions_url: 'https://www.elixir-europe.org/services/compute/aai' + supported_filesystem_protocols: + - ftp + - https + - local + supported_wes_versions: + - 1.0.0 + workflow_type_versions: + CWL: + workflow_type_version: + - v1.0 + workflow_engine_versions: + cwl-tes: 0.2.0 + default_workflow_engine_parameters: + - type: string + default_value: some_string + - type: int + default_value: '5' + tags: + known_tes_endpoints: 'https://tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|https://tesk.c01.k8s-popup.csc.fi/' + app_version: 0.15.0 + tes_server: + url: 'https://csc-tesk.c03.k8s-popup.csc.fi/' + timeout: 5 + status_query_params: 'FULL' + drs_server: + port: null # use this port for resolving DRS URIs; set to `null` to use default (443) + base_path: null # use this base path for resolving DRS URIs; set to `null` to use default (`ga4gh/drs/v1`) + use_http: False # use `http` for resolving DRS URIs; set to `False` to use default (`https`) + file_types: # extensions of files to scan for DRS URI resolution + - cwl + - yaml + - yml + runs_id: + length: 6 + charset: string.ascii_uppercase + string.digits diff --git a/cwl_wes/config/__init__.py b/cwl_wes/config/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/config/app_config.py b/cwl_wes/config/app_config.py deleted file mode 100644 index bf23c51..0000000 --- a/cwl_wes/config/app_config.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Function for configuring a Connection app instance.""" - -import logging -import os -from typing import Optional - -from foca.config.config_parser import YAMLConfigParser - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def parse_app_config( - config_var: Optional[str] = None, - default_path: str = os.path.abspath( - os.path.join( - os.path.dirname( - os.path.realpath(__file__) - ), - 'app_config.yaml' - ) - ) -) -> YAMLConfigParser: - """Parses configuration files and adds configuration to Connexion app.""" - # Create parser instance - config = YAMLConfigParser() - - # Parse config - try: - paths = config.update_from_yaml( - config_paths=[default_path], - config_vars=[config_var], - ) - - # Abort if a config file was not found/accessible - except (FileNotFoundError, PermissionError) as e: - logger.exception( - ( - 'Config file not found. Ensure that default config file is ' - "available and accessible at '{default_path}'. If " - "'{config_var}' is set, further ensure that the file or files " - 'it points are available and accessible. Execution aborted. ' - "Original error message: {type}: {msg}" - ).format( - default_path=default_path, - config_var=config_var, - type=type(e).__name__, - msg=e, - ) - ) - raise SystemExit(1) - - else: - logger.info("App config loaded from '{paths}'.".format(paths=paths)) - - return config diff --git a/cwl_wes/config/app_config.yaml b/cwl_wes/config/app_config.yaml deleted file mode 100644 index 6deec8d..0000000 --- a/cwl_wes/config/app_config.yaml +++ /dev/null @@ -1,111 +0,0 @@ -# General server/service settings -# -# Any change in this file will be detected by gunicorn and the configuration will be reloaded. -# -server: - host: '0.0.0.0' - port: 8080 - debug: True - environment: development - testing: False - use_reloader: True - -# Security settings -security: - authorization_required: False - jwt: - add_key_to_claims: True - algorithms: - - RS256 - allow_expired: False - audience: null # list of allowed audiences or 'null' (do not validate audience) - claim_identity: sub - claim_issuer: iss - claim_key_id: kid - header_name: Authorization - token_prefix: Bearer - validation_methods: - - userinfo - - public_key - validation_checks: all # 'any' or 'all' - -# Database settings -database: - host: mongodb - port: 27017 - name: cwl-wes-db - run_id: - length: 6 - charset: string.ascii_uppercase + string.digits - -# Storage -storage: - permanent_dir: '/data/output' - tmp_dir: '/data/tmp' - remote_storage_url: 'ftp://ftp-private.ebi.ac.uk/upload/foivos' - -# Celery task queue -celery: - broker_host: rabbitmq - broker_port: 5672 - result_backend: 'rpc://' - include: - - cwl_wes.tasks.tasks.run_workflow - - cwl_wes.tasks.tasks.cancel_run - monitor: - timeout: 0.1 - message_maxsize: 16777216 - -# OpenAPI specs -api: - specs: - - path: '20181010.be85140.workflow_execution_service.swagger.yaml' - strict_validation: True - validate_responses: True - swagger_ui: True - swagger_json: True - endpoint_params: - default_page_size: 5 - timeout_cancel_run: 60 - timeout_run_workflow: Null - -# WES service info settings -service_info: - contact_info: 'https://github.com/elixir-cloud-aai/cwl-WES' - auth_instructions_url: 'https://www.elixir-europe.org/services/compute/aai' - supported_filesystem_protocols: - - ftp - - https - - local - supported_wes_versions: - - 1.0.0 - workflow_type_versions: - CWL: - workflow_type_version: - - v1.0 - workflow_engine_versions: - cwl-tes: 0.2.0 - default_workflow_engine_parameters: - - type: string - default_value: some_string - - type: int - default_value: '5' - tags: - known_tes_endpoints: 'https://tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|https://tesk.c01.k8s-popup.csc.fi/' - app_version: 0.15.0 - -# TES server -tes: - url: 'https://csc-tesk.c03.k8s-popup.csc.fi/' - timeout: 5 - status_query_params: 'FULL' - -# DRS integration -drs: - port: Null # use this port for resolving DRS URIs; set to `Null` to use default (443) - base_path: Null # use this base path for resolving DRS URIs; set to `Null` to use default (`ga4gh/drs/v1`) - use_http: False # use `http` for resolving DRS URIs; set to `False` to use default (`https`) - file_types: # extensions of files to scan for DRS URI resolution - - cwl - - yaml - - yml diff --git a/cwl_wes/config/log_config.yaml b/cwl_wes/config/log_config.yaml deleted file mode 100644 index b85dc13..0000000 --- a/cwl_wes/config/log_config.yaml +++ /dev/null @@ -1,33 +0,0 @@ -version: 1 - -disable_existing_loggers: False - -formatters: - standard: - class: logging.Formatter - style: "{" - format: "[{asctime}: {levelname:<8} {module:<18}] {message}" - - long: - class: logging.Formatter - style: "{" - format: "[{asctime}: {levelname:<8}] {message} [{name}]" - - # OTHER FORMATS - #format: "{message}" - #format: "[{asctime}] [{levelname:^8}] {message} ({name})" - #format: "{asctime}-{levelno:^2}-{name}-{module}-{funcName}: {message}" - #format: "[{asctime}: {levelname:}/{name:<36}] {message}" - #format: "[{asctime}] [{levelname:^8}] [{name}] {message} ({pathname}:{funcName})" - #datefmt: "%y-%m-%d %H:%M:%S" - -handlers: - console: - class: logging.StreamHandler - level: DEBUG - formatter: long - stream: ext://sys.stderr - -root: - level: INFO - handlers: [console] \ No newline at end of file diff --git a/cwl_wes/custom_config.py b/cwl_wes/custom_config.py new file mode 100644 index 0000000..1358add --- /dev/null +++ b/cwl_wes/custom_config.py @@ -0,0 +1,121 @@ +"""Custom app config models.""" +import string +from typing import Dict, List, Optional +from foca.models.config import FOCABaseConfig + +class StorageConfig(FOCABaseConfig): + permanent_dir: str = '/data/output' + tmp_dir: str = '/data/tmp' + remote_storage_url: str = 'ftp://ftp-private.ebi.ac.uk/upload/foivos' + + +class MonitorConfig(FOCABaseConfig): + timeout: float = 0.1 + + +class CeleryConfig(FOCABaseConfig): + monitor: MonitorConfig = MonitorConfig() + message_maxsize: int = 16777216 + + +class EndpointConfig(FOCABaseConfig): + default_page_size: int = 5 + timeout_cancel_run: int = 60 + timeout_run_workflow: Optional[int] = None + + +class WorkflowTypeVersionConfig(FOCABaseConfig): + """Workflow type versions supported by this service. + Args: + workflow_type_version: List of one or more acceptable versions for the + workflow type. + """ + workflow_type_version: Optional[List[str]] = [] + + +class DefaultWorkflowEngineParameterConfig(FOCABaseConfig): + """Model for default workflow engine parameters. + Args: + name: Parameter name. + type: Parameter type. + default_value: Stringified version of default parameter. + Attributes: + name: Parameter name. + type: Parameter type. + default_value: Stringified version of default parameter. + """ + name: Optional[str] + type: Optional[str] + default_value: Optional[str] + + +class TagsConfig(FOCABaseConfig): + known_tes_endpoints: str = 'https://tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|https://tesk.c01.k8s-popup.csc.fi/' + app_version: str = '0.15.0' + + +class ServiceInfoConfig(FOCABaseConfig): + contact_info: str = 'https://github.com/elixir-cloud-aai/cwl-WES' + auth_instructions_url: str = 'https://www.elixir-europe.org/services/compute/aai' + supported_filesystem_protocols: List[str] = ['ftp', 'https', 'local'] + supported_wes_versions: List[str] = ['1.0.0'] + workflow_type_versions: Dict[str, WorkflowTypeVersionConfig] = { + 'CWL': WorkflowTypeVersionConfig(workflow_type_version=['v1.0']), + } + workflow_engine_versions: Dict[str, str] = {} + default_workflow_engine_parameters: List[ + DefaultWorkflowEngineParameterConfig + ] = [] + tags: TagsConfig = TagsConfig() + + +class TesServerConfig(FOCABaseConfig): + url: str = 'https://csc-tesk.c03.k8s-popup.csc.fi/' + timeout: int = 5 + status_query_params: str = 'FULL' + + +class DRSServerConfig(FOCABaseConfig): + port: Optional[int] = None + base_path: Optional[str] = None + use_http: bool = False + file_types: List[str] = ['cwl', 'yaml', 'yml'] + + +class IdConfig(FOCABaseConfig): + """Model for defining unique identifier for services on cloud registry. + + Args: + charset: A string of allowed characters or an expression evaluating to + a string of allowed characters. + length: Length of returned string. + + Attributes: + charset: A string of allowed characters or an expression evaluating to + a string of allowed characters. + length: Length of returned string. + + Raises: + pydantic.ValidationError: The class was instantianted with an illegal + data type. + + Example: + >>> IdConfig( + ... charset='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', + ... length=6 + ... ) + IdConfig(charset='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', length=6) + """ + length: int = 6 + charset: str = string.ascii_uppercase + string.digits + + +class CustomConfig(FOCABaseConfig): + storage: StorageConfig = StorageConfig() + celery: CeleryConfig = CeleryConfig() + endpoint_params: EndpointConfig = EndpointConfig() + service_info: ServiceInfoConfig = ServiceInfoConfig() + tes_server: TesServerConfig = TesServerConfig() + drs_server: DRSServerConfig = DRSServerConfig() + runs_id: IdConfig = IdConfig() + \ No newline at end of file diff --git a/cwl_wes/database/__init__.py b/cwl_wes/database/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/database/register_mongodb.py b/cwl_wes/database/register_mongodb.py deleted file mode 100644 index f667181..0000000 --- a/cwl_wes/database/register_mongodb.py +++ /dev/null @@ -1,100 +0,0 @@ -"""Function for Registering MongoDB with a Flask app instance.""" - -import os - -import logging -from typing import Dict - -from flask import Flask -from flask_pymongo import ASCENDING, PyMongo - -from foca.config.config_parser import get_conf -from cwl_wes.ga4gh.wes.endpoints.get_service_info import get_service_info - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def register_mongodb(app: Flask) -> Flask: - """Instantiates database and initializes collections.""" - config = app.config - - # Instantiante PyMongo client - mongo = create_mongo_client( - app=app, - config=config, - ) - - # Add database - db = mongo.db[os.environ.get( - 'MONGO_DBNAME', get_conf(config, 'database', 'name'))] - - # Add database collection for '/service-info' - collection_service_info = mongo.db['service-info'] - logger.debug("Added database collection 'service_info'.") - - # Add database collection for '/runs' - collection_runs = mongo.db['runs'] - collection_runs.create_index([ - ('run_id', ASCENDING), - ('task_id', ASCENDING), - ], - unique=True, - sparse=True - ) - logger.debug("Added database collection 'runs'.") - - # Add database and collections to app config - config['database']['database'] = db - config['database']['collections'] = dict() - config['database']['collections']['runs'] = collection_runs - config['database']['collections']['service_info'] = collection_service_info - app.config = config - - # Initialize service info - logger.debug('Initializing service info...') - get_service_info(config, silent=True) - - return app - - -def create_mongo_client( - app: Flask, - config: Dict, -): - """Register MongoDB uri and credentials.""" - if os.environ.get('MONGO_USERNAME') != '': - auth = '{username}:{password}@'.format( - username=os.environ.get('MONGO_USERNAME'), - password=os.environ.get('MONGO_PASSWORD'), - ) - else: - auth = '' - - app.config['MONGO_URI'] = 'mongodb://{auth}{host}:{port}/{dbname}'.format( - host=os.environ.get('MONGO_HOST', get_conf( - config, 'database', 'host')), - port=os.environ.get('MONGO_PORT', get_conf( - config, 'database', 'port')), - dbname=os.environ.get('MONGO_DBNAME', get_conf( - config, 'database', 'name')), - auth=auth - ) - - """Instantiate MongoDB client.""" - mongo = PyMongo(app) - logger.info( - ( - "Registered database '{name}' at URI '{uri}':'{port}' with Flask " - 'application.' - ).format( - name=os.environ.get('MONGO_DBNAME', get_conf( - config, 'database', 'name')), - uri=os.environ.get('MONGO_HOST', get_conf( - config, 'database', 'host')), - port=os.environ.get('MONGO_PORT', get_conf( - config, 'database', 'port')) - ) - ) - return mongo diff --git a/cwl_wes/errors/__init__.py b/cwl_wes/errors/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/errors/errors.py b/cwl_wes/errors/errors.py deleted file mode 100644 index ce6e7c6..0000000 --- a/cwl_wes/errors/errors.py +++ /dev/null @@ -1,97 +0,0 @@ -"""Custom errors, error handler functions and function to register error -handlers with a Connexion app instance.""" - -import logging - -from connexion import App, ProblemException -from connexion.exceptions import ( - ExtraParameterProblem, - Forbidden, - Unauthorized -) -from flask import Response -from json import dumps -from werkzeug.exceptions import (BadRequest, InternalServerError, NotFound) - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def register_error_handlers(app: App) -> App: - """Adds custom handlers for exceptions to Connexion app instance.""" - # Add error handlers - app.add_error_handler(BadRequest, handle_bad_request) - app.add_error_handler(ExtraParameterProblem, handle_bad_request) - app.add_error_handler(Forbidden, __handle_forbidden) - app.add_error_handler(InternalServerError, __handle_internal_server_error) - app.add_error_handler(Unauthorized, __handle_unauthorized) - app.add_error_handler(WorkflowNotFound, __handle_workflow_not_found) - logger.info('Registered custom error handlers with Connexion app.') - - # Return Connexion app instance - return app - - -# CUSTOM ERRORS -class WorkflowNotFound(ProblemException, NotFound): - """WorkflowNotFound(404) error compatible with Connexion.""" - - def __init__(self, title=None, **kwargs): - super(WorkflowNotFound, self).__init__(title=title, **kwargs) - - -# CUSTOM ERROR HANDLERS -def handle_bad_request(exception: Exception) -> Response: - return Response( - response=dumps({ - 'msg': 'The request is malformed.', - 'status_code': '400' - }), - status=400, - mimetype="application/problem+json" - ) - - -def __handle_unauthorized(exception: Exception) -> Response: - return Response( - response=dumps({ - 'msg': 'The request is unauthorized.', - 'status_code': '401' - }), - status=401, - mimetype="application/problem+json" - ) - - -def __handle_forbidden(exception: Exception) -> Response: - return Response( - response=dumps({ - 'msg': 'The requester is not authorized to perform this action.', - 'status_code': '403' - }), - status=403, - mimetype="application/problem+json" - ) - - -def __handle_workflow_not_found(exception: Exception) -> Response: - return Response( - response=dumps({ - 'msg': 'The requested workflow run wasn\'t found.', - 'status_code': '404' - }), - status=404, - mimetype="application/problem+json" - ) - - -def __handle_internal_server_error(exception: Exception) -> Response: - return Response( - response=dumps({ - 'msg': 'An unexpected error occurred.', - 'status_code': '500' - }), - status=500, - mimetype="application/problem+json" - ) diff --git a/cwl_wes/exceptions.py b/cwl_wes/exceptions.py new file mode 100644 index 0000000..0dd724b --- /dev/null +++ b/cwl_wes/exceptions.py @@ -0,0 +1,62 @@ +from connexion.exceptions import ( + BadRequestProblem, + ExtraParameterProblem, + Forbidden, + Unauthorized, + ProblemException +) +from pydantic import ValidationError +from werkzeug.exceptions import ( + BadRequest, + InternalServerError, + NotFound +) + + +class WorkflowNotFound(ProblemException, NotFound): + """WorkflowNotFound(404) error compatible with Connexion.""" + pass + + +exceptions = { + Exception: { + "message": "An unexpected error occurred.", + "code": '500', + }, + BadRequest: { + "message": "The request is malformed.", + "code": '400', + }, + BadRequestProblem: { + "message": "The request is malformed.", + "code": '400', + }, + ExtraParameterProblem: { + "message": "The request is malformed.", + "code": '400', + }, + ValidationError: { + "message": "The request is malformed.", + "code": '400', + }, + Unauthorized: { + "message": " The request is unauthorized.", + "code": '401', + }, + Forbidden: { + "message": "The requester is not authorized to perform this action.", + "code": '403', + }, + NotFound: { + "message": "The requested resource wasn't found.", + "code": '404', + }, + InternalServerError: { + "message": "An unexpected error occurred.", + "code": '500', + }, + WorkflowNotFound: { + "message": "The requested workflow run wasn\'t found.", + "code": '404', + }, +} \ No newline at end of file diff --git a/cwl_wes/factories/__init__.py b/cwl_wes/factories/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/factories/celery_app.py b/cwl_wes/factories/celery_app.py deleted file mode 100644 index 53f2363..0000000 --- a/cwl_wes/factories/celery_app.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Factory for creating Celery app instances based on Flask apps.""" - -import os - -from inspect import stack -import logging - -from flask import Flask -from celery import Celery - -from foca.config.config_parser import (get_conf, get_conf_type) - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def create_celery_app(app: Flask) -> Celery: - """Creates Celery application and configures it from Flask app.""" - broker = 'pyamqp://{host}:{port}//'.format( - host=os.environ.get('RABBIT_HOST', get_conf(app.config, 'celery', 'broker_host')), - port=os.environ.get('RABBIT_PORT', get_conf(app.config, 'celery', 'broker_port')), - ) - backend = get_conf(app.config, 'celery', 'result_backend') - include = get_conf_type(app.config, 'celery', 'include', types=(list)) - maxsize = get_conf(app.config, 'celery', 'message_maxsize') - - # Instantiate Celery app - celery = Celery( - app=__name__, - broker=broker, - backend=backend, - include=include, - ) - logger.info("Celery app created from '{calling_module}'.".format( - calling_module=':'.join([stack()[1].filename, stack()[1].function]) - )) - - # Set Celery options - celery.Task.resultrepr_maxsize = maxsize - celery.amqp.argsrepr_maxsize = maxsize - celery.amqp.kwargsrepr_maxsize = maxsize - - # Update Celery app configuration with Flask app configuration - celery.conf.update(app.config) - logger.info('Celery app configured.') - - class ContextTask(celery.Task): # type: ignore - # https://github.com/python/mypy/issues/4284) - def __call__(self, *args, **kwargs): - with app.app_context(): - return self.run(*args, **kwargs) - - celery.Task = ContextTask - logger.debug("App context added to 'celery.Task' class.") - - return celery diff --git a/cwl_wes/factories/connexion_app.py b/cwl_wes/factories/connexion_app.py deleted file mode 100644 index a59e669..0000000 --- a/cwl_wes/factories/connexion_app.py +++ /dev/null @@ -1,73 +0,0 @@ -"""Factory for creating and configuring Connexion app instances.""" - -from inspect import stack -import logging -from typing import (Mapping, Optional) - -from connexion import App - -from cwl_wes.errors.errors import handle_bad_request -from foca.config.config_parser import get_conf - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def create_connexion_app(config: Optional[Mapping] = None) -> App: - """Creates and configure Connexion app.""" - # Instantiate Connexion app - app = App(__name__) - logger.info("Connexion app created from '{calling_module}'.".format( - calling_module=':'.join([stack()[1].filename, stack()[1].function]) - )) - - # Workaround for adding a custom handler for `connexion.problem` responses - # Responses from request and paramater validators are not raised and - # cannot be intercepted by `add_error_handler`; see here: - # https://github.com/zalando/connexion/issues/138 - @app.app.after_request - def _rewrite_bad_request(response): - if ( - response.status_code == 400 and - response.data.decode('utf-8').find('"title":') is not None - ): - response = handle_bad_request(400) - return response - - # Configure Connexion app - if config is not None: - app = __add_config_to_connexion_app( - app=app, - config=config, - ) - - return app - - -def __add_config_to_connexion_app( - app: App, - config: Mapping -) -> App: - """Adds configuration to Flask app and replaces default Connexion and Flask - settings.""" - # Replace Connexion app settings - app.host = get_conf(config, 'server', 'host') - app.port = get_conf(config, 'server', 'port') - app.debug = get_conf(config, 'server', 'debug') - - # Replace Flask app settings - app.app.config['DEBUG'] = app.debug - app.app.config['ENV'] = get_conf(config, 'server', 'environment') - app.app.config['TESTING'] = get_conf(config, 'server', 'testing') - - # Log Flask config - logger.debug('Flask app settings:') - for (key, value) in app.app.config.items(): - logger.debug('* {}: {}'.format(key, value)) - - # Add user configuration to Flask app config - app.app.config.update(config) - - logger.info('Connexion app configured.') - return app diff --git a/cwl_wes/ga4gh/wes/endpoints/cancel_run.py b/cwl_wes/ga4gh/wes/endpoints/cancel_run.py index a94d4c0..aaafbb4 100644 --- a/cwl_wes/ga4gh/wes/endpoints/cancel_run.py +++ b/cwl_wes/ga4gh/wes/endpoints/cancel_run.py @@ -6,8 +6,9 @@ from celery import (Celery, uuid) from connexion.exceptions import Forbidden -from foca.config.config_parser import get_conf -from cwl_wes.errors.errors import WorkflowNotFound +from flask import Config + +from cwl_wes.exceptions import WorkflowNotFound from cwl_wes.ga4gh.wes.states import States from cwl_wes.tasks.tasks.cancel_run import task__cancel_run @@ -18,14 +19,15 @@ # Utility function for endpoint POST /runs//delete def cancel_run( - config: Dict, + config: Config, celery_app: Celery, run_id: str, *args, **kwargs ) -> Dict: """Cancels running workflow.""" - collection_runs = get_conf(config, 'database', 'collections', 'runs') + foca_config = config.foca + collection_runs = foca_config.db.dbs['cwl-wes-db'].collections['runs'] document = collection_runs.find_one( filter={'run_id': run_id}, projection={ @@ -59,12 +61,7 @@ def cancel_run( if document['api']['state'] in States.CANCELABLE: # Get timeout duration - timeout_duration = get_conf( - config, - 'api', - 'endpoint_params', - 'timeout_cancel_run', - ) + timeout_duration = foca_config.custom.endpoint_params.timeout_cancel_run # Execute cancelation task in background task_id = uuid() diff --git a/cwl_wes/ga4gh/wes/endpoints/get_run_log.py b/cwl_wes/ga4gh/wes/endpoints/get_run_log.py index e618773..2732ee0 100644 --- a/cwl_wes/ga4gh/wes/endpoints/get_run_log.py +++ b/cwl_wes/ga4gh/wes/endpoints/get_run_log.py @@ -5,8 +5,8 @@ from typing import Dict -from foca.config.config_parser import get_conf -from cwl_wes.errors.errors import WorkflowNotFound +from cwl_wes.exceptions import WorkflowNotFound +from flask import Config # Get logger instance @@ -15,13 +15,13 @@ # Utility function for endpoint GET /runs/ def get_run_log( - config: Dict, + config: Config, run_id: str, *args, **kwargs ) -> Dict: """Gets detailed log information for specific run.""" - collection_runs = get_conf(config, 'database', 'collections', 'runs') + collection_runs = config.foca.db.dbs['cwl-wes-db'].collections['runs'] document = collection_runs.find_one( filter={'run_id': run_id}, projection={ diff --git a/cwl_wes/ga4gh/wes/endpoints/get_run_status.py b/cwl_wes/ga4gh/wes/endpoints/get_run_status.py index dc67a0c..c1dd9b1 100644 --- a/cwl_wes/ga4gh/wes/endpoints/get_run_status.py +++ b/cwl_wes/ga4gh/wes/endpoints/get_run_status.py @@ -5,8 +5,8 @@ from typing import Dict -from foca.config.config_parser import get_conf -from cwl_wes.errors.errors import WorkflowNotFound +from flask import Config +from cwl_wes.exceptions import WorkflowNotFound # Get logger instance @@ -15,13 +15,13 @@ # Utility function for endpoint GET /runs//status def get_run_status( - config: Dict, + config: Config, run_id: str, *args, **kwargs ) -> Dict: """Gets status information for specific run.""" - collection_runs = get_conf(config, 'database', 'collections', 'runs') + collection_runs = config.foca.db.dbs['cwl-wes-db'].collections['runs'] document = collection_runs.find_one( filter={'run_id': run_id}, projection={ diff --git a/cwl_wes/ga4gh/wes/endpoints/get_service_info.py b/cwl_wes/ga4gh/wes/endpoints/get_service_info.py index ee94746..0fca70e 100644 --- a/cwl_wes/ga4gh/wes/endpoints/get_service_info.py +++ b/cwl_wes/ga4gh/wes/endpoints/get_service_info.py @@ -7,7 +7,7 @@ from pymongo import collection as Collection -import cwl_wes.database.db_utils as db_utils +import cwl_wes.utils.db_utils as db_utils from cwl_wes.ga4gh.wes.states import States diff --git a/cwl_wes/ga4gh/wes/endpoints/list_runs.py b/cwl_wes/ga4gh/wes/endpoints/list_runs.py index c45ce50..2bc6c10 100644 --- a/cwl_wes/ga4gh/wes/endpoints/list_runs.py +++ b/cwl_wes/ga4gh/wes/endpoints/list_runs.py @@ -4,7 +4,7 @@ from bson.objectid import ObjectId -from foca.config.config_parser import get_conf +from flask import Config # Get logger instance @@ -13,23 +13,18 @@ # Utility function for endpoint GET /runs def list_runs( - config: Dict, + config: Config, *args, **kwargs ) -> Dict: """Lists IDs and status for all workflow runs.""" - collection_runs = get_conf(config, 'database', 'collections', 'runs') + collection_runs = config.foca.db.dbs['cwl-wes-db'].collections['runs'] # Fall back to default page size if not provided by user if 'page_size' in kwargs: page_size = kwargs['page_size'] else: - page_size = ( - config - ['api'] - ['endpoint_params'] - ['default_page_size'] - ) + page_size = config.foca.custom.endpoint_params.default_page_size # Extract/set page token if 'page_token' in kwargs: diff --git a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py index 3c23c9b..4130bc3 100644 --- a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py +++ b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py @@ -17,10 +17,9 @@ from werkzeug.datastructures import ImmutableMultiDict from werkzeug.utils import secure_filename -from flask import request +from flask import Config, request -from foca.config.config_parser import (get_conf, get_conf_type) -from cwl_wes.errors.errors import BadRequest +from cwl_wes.exceptions import BadRequest from cwl_wes.tasks.tasks.run_workflow import task__run_workflow from cwl_wes.ga4gh.wes.endpoints.utils.drs import translate_drs_uris @@ -31,7 +30,7 @@ # Utility function for endpoint POST /runs def run_workflow( - config: Dict, + config: Config, form_data: ImmutableMultiDict, *args, **kwargs @@ -179,17 +178,17 @@ def __init_run_document(data: Dict) -> Dict: def __create_run_environment( - config: Dict, + config: Config, document: Dict, **kwargs ) -> Dict: """Creates unique run identifier and permanent and temporary storage directories for current run.""" - collection_runs = get_conf(config, 'database', 'collections', 'runs') - out_dir = get_conf(config, 'storage', 'permanent_dir') - tmp_dir = get_conf(config, 'storage', 'tmp_dir') - run_id_charset = eval(get_conf(config, 'database', 'run_id', 'charset')) - run_id_length = get_conf(config, 'database', 'run_id', 'length') + collection_runs = config.foca.db.dbs['cwl-wes-db'].collections['runs'] + out_dir = config.foca.custom.storage.permanent_dir + tmp_dir = config.foca.custom.storage.tmp_dir + run_id_charset = eval(config.foca.custom.run_id.charset) + run_id_length = config.foca.custom.run_id.length # Keep on trying until a unique run id was found and inserted # TODO: If no more possible IDs => inf loop; fix (raise custom error; 500 @@ -256,33 +255,13 @@ def __create_run_environment( break # translate DRS URIs to access URLs - file_types: List[str] = get_conf_type( - current_app.config, - 'drs', - 'file_types', - types=(list), - ) - supported_access_methods: List[str] = get_conf_type( - current_app.config, - 'service_info', - 'supported_filesystem_protocols', - types=(list), - ) - port: Optional[int] = get_conf( - current_app.config, - 'drs', - 'port', - ) - base_path: Optional[str] = get_conf( - current_app.config, - 'drs', - 'base_path', - ) - use_http: bool = get_conf( - current_app.config, - 'drs', - 'use_http', - ) + drs_server_conf = current_app.config.foca.custom.drs_server + service_info_conf = current_app.config.foca.custom.service_info + file_types: List[str] = drs_server_conf.file_types + supported_access_methods: List[str] = service_info_conf.supported_filesystem_protocols + port: Optional[int] = drs_server_conf.port + base_path: Optional[str] = drs_server_conf.base_path + use_http: bool = drs_server_conf.use_http translate_drs_uris( path=document['internal']['workflow_files'], file_types=file_types, @@ -517,13 +496,13 @@ def __process_workflow_attachments(data: Dict) -> Dict: def __run_workflow( - config: Dict, + config: Config, document: Dict, **kwargs ) -> None: """Helper function `run_workflow()`.""" - tes_url = get_conf(config, 'tes', 'url') - remote_storage_url = get_conf(config, 'storage', 'remote_storage_url') + tes_url = config.custom.tes_server.url + remote_storage_url = config.custom.storage.remote_storage_url run_id = document['run_id'] task_id = document['task_id'] tmp_dir = document['internal']['tmp_dir'] @@ -566,12 +545,7 @@ def __run_workflow( # ] # Get timeout duration - timeout_duration = get_conf( - config, - 'api', - 'endpoint_params', - 'timeout_run_workflow', - ) + timeout_duration = config.custom.endpoint_params.timeout_run_workflow # Execute command as background task logger.info( diff --git a/cwl_wes/ga4gh/wes/server.py b/cwl_wes/ga4gh/wes/server.py index d275b93..c217f3e 100644 --- a/cwl_wes/ga4gh/wes/server.py +++ b/cwl_wes/ga4gh/wes/server.py @@ -6,13 +6,14 @@ from connexion import request from flask import current_app +from foca.utils.logging import log_traffic + import cwl_wes.ga4gh.wes.endpoints.cancel_run as cancel_run import cwl_wes.ga4gh.wes.endpoints.get_run_log as get_run_log import cwl_wes.ga4gh.wes.endpoints.get_run_status as get_run_status import cwl_wes.ga4gh.wes.endpoints.list_runs as list_runs import cwl_wes.ga4gh.wes.endpoints.run_workflow as run_workflow import cwl_wes.ga4gh.wes.endpoints.get_service_info as get_service_info -from cwl_wes.security.decorators import auth_token_optional # Get logger instance @@ -20,8 +21,8 @@ # GET /runs/ -@auth_token_optional -def GetRunLog(run_id, *args, **kwargs): +@log_traffic +def GetRunLog(run_id, *args, **kwargs) -> dict: """Returns detailed run info.""" response = get_run_log.get_run_log( config=current_app.config, @@ -29,13 +30,12 @@ def GetRunLog(run_id, *args, **kwargs): *args, **kwargs ) - log_request(request, response) return response # POST /runs//cancel -@auth_token_optional -def CancelRun(run_id, *args, **kwargs): +@log_traffic +def CancelRun(run_id, *args, **kwargs) -> dict: """Cancels unfinished workflow run.""" response = cancel_run.cancel_run( config=current_app.config, @@ -44,13 +44,12 @@ def CancelRun(run_id, *args, **kwargs): *args, **kwargs ) - log_request(request, response) return response # GET /runs//status -@auth_token_optional -def GetRunStatus(run_id, *args, **kwargs): +@log_traffic +def GetRunStatus(run_id, *args, **kwargs) -> dict: """Returns run status.""" response = get_run_status.get_run_status( config=current_app.config, @@ -58,38 +57,36 @@ def GetRunStatus(run_id, *args, **kwargs): *args, **kwargs ) - log_request(request, response) return response # GET /service-info -def GetServiceInfo(*args, **kwargs): +@log_traffic +def GetServiceInfo(*args, **kwargs) -> dict: """Returns service info.""" response = get_service_info.get_service_info( config=current_app.config, *args, **kwargs ) - log_request(request, response) return response # GET /runs -@auth_token_optional -def ListRuns(*args, **kwargs): +@log_traffic +def ListRuns(*args, **kwargs) -> dict: """Lists IDs and status of all workflow runs.""" response = list_runs.list_runs( config=current_app.config, *args, **kwargs ) - log_request(request, response) return response # POST /runs -@auth_token_optional -def RunWorkflow(*args, **kwargs): +@log_traffic +def RunWorkflow(*args, **kwargs) -> dict: """Executes workflow.""" response = run_workflow.run_workflow( config=current_app.config, @@ -97,22 +94,4 @@ def RunWorkflow(*args, **kwargs): *args, **kwargs ) - log_request(request, response) return response - - -def log_request(request, response): - """Writes request and response to log.""" - # TODO: write decorator for request logging - logger.debug( - ( - "Response to request \"{method} {path} {protocol}\" from " - "{remote_addr}: {response}" - ).format( - method=request.environ['REQUEST_METHOD'], - path=request.environ['PATH_INFO'], - protocol=request.environ['SERVER_PROTOCOL'], - remote_addr=request.environ['REMOTE_ADDR'], - response=response, - ) - ) diff --git a/cwl_wes/ga4gh/wes/service_info.py b/cwl_wes/ga4gh/wes/service_info.py new file mode 100644 index 0000000..61e9b94 --- /dev/null +++ b/cwl_wes/ga4gh/wes/service_info.py @@ -0,0 +1,95 @@ +"""Controllers for the `/service-info route.""" + +import logging +from typing import Dict + +from bson.objectid import ObjectId +from foca.models.config import Config +from flask import current_app +from pymongo.collection import Collection + +from cwl_wes.exceptions import ( + NotFound, +) +from cwl_wes.ga4gh.wes.states import States + +logger = logging.getLogger(__name__) + + +class ServiceInfo: + + def __init__(self) -> None: + """Class for WES API service info server-side controller methods. + + Creates service info upon first request, if it does not exist. + + Attributes: + config: App configuration. + foca_config: FOCA configuration. + db_client_service_info: Database collection storing service info + objects. + db_client_runs: Database collection storing workflow run objects. + object_id: Database identifier for service info. + """ + self.config: Dict = current_app.config + self.foca_config: Config = self.config.foca + self.db_client_service_info: Collection = ( + self.foca_config.db.dbs['cwl-wes-db'] + .collections['service_info'].client + ) + self.db_client_runs: Collection = ( + self.foca_config.db.dbs['cwl-wes-db'].collections['runs'].client + ) + self.object_id: str = "000000000000000000000000" + + def get_service_info(self, get_counts: bool = True) -> Dict: + """Get latest service info from database. + + Args: + get_counts: Whether system state counts should be returned. + + Returns: + Latest service info details. + + Raises: + NotFound: Service info was not found. + """ + service_info = self.db_client_service_info.find_one( + {'_id': ObjectId(self.object_id)}, + {'_id': False}, + ) + if service_info is None: + raise NotFound + if get_counts: + service_info['system_state_counts'] = self._get_state_counts() + return service_info + + def set_service_info( + self, + data: Dict, + ) -> None: + """Create or update service info. + + Arguments: + data: Dictionary of service info values. Cf. + """ + self.db_client_service_info.replace_one( + filter={'_id': ObjectId(self.object_id)}, + replacement=data, + upsert=True, + ) + logger.info("Service info set.") + + def _get_state_counts(self) -> Dict[str, int]: + """Gets current system state counts.""" + current_counts = {state: 0 for state in States.ALL} + cursor = self.db_client_runs.find( + filter={}, + projection={ + 'run_log.state': True, + '_id': False, + } + ) + for record in cursor: + current_counts[record['run_log']['state']] += 1 + return current_counts diff --git a/cwl_wes/gunicorn.py b/cwl_wes/gunicorn.py new file mode 100644 index 0000000..007c074 --- /dev/null +++ b/cwl_wes/gunicorn.py @@ -0,0 +1,31 @@ +import os + +from cwl_wes.app import init_app + +# Source application configuration +app_config = init_app().app.config.foca + +# Set Gunicorn number of workers and threads +workers = int(os.environ.get('GUNICORN_PROCESSES', '1')) +threads = int(os.environ.get('GUNICORN_THREADS', '1')) + +# Set allowed IPs +forwarded_allow_ips = '*' + +# Set Gunicorn bind address +bind = '{address}:{port}'.format( + address=app_config.server.host, + port=app_config.server.port, +) + +# Source the environment variables for the Gunicorn workers +raw_env = [ + "WES_CONFIG=%s" % os.environ.get('WES_CONFIG', ''), + "RABBIT_HOST=%s" % os.environ.get('RABBIT_HOST', app_config.jobs.host), + "RABBIT_PORT=%s" % os.environ.get('RABBIT_PORT', app_config.jobs.port), + "MONGO_HOST=%s" % os.environ.get('MONGO_HOST', app_config.db.host), + "MONGO_PORT=%s" % os.environ.get('MONGO_PORT', app_config.db.port), + "MONGO_DBNAME=%s" % os.environ.get('MONGO_DBNAME', 'cwl-wes-db'), + "MONGO_USERNAME=%s" % os.environ.get('MONGO_USERNAME', ''), + "MONGO_PASSWORD=%s" % os.environ.get('MONGO_PASSWORD', '') +] diff --git a/cwl_wes/security/__init__.py b/cwl_wes/security/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/security/cors.py b/cwl_wes/security/cors.py deleted file mode 100644 index 4d55b83..0000000 --- a/cwl_wes/security/cors.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Function enabling cross-origin resource sharing for a Flask app -instance.""" - -import logging -from flask import Flask - -from flask_cors import CORS - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def enable_cors(app: Flask) -> None: - """Enables cross-origin resource sharing for Flask app.""" - CORS(app) - logger.info('Enabled CORS for Flask app.') diff --git a/cwl_wes/security/decorators.py b/cwl_wes/security/decorators.py deleted file mode 100644 index 5b729c2..0000000 --- a/cwl_wes/security/decorators.py +++ /dev/null @@ -1,612 +0,0 @@ -"""Decorator and utility functions for protecting access to endpoints.""" - -from connexion.exceptions import Unauthorized -from connexion import request -from flask import current_app -from functools import wraps -import logging -from typing import (Callable, Iterable, Mapping, Optional) - -from cryptography.hazmat.primitives import serialization -import jwt -import requests -import json - -from foca.config.config_parser import get_conf, get_conf_type - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def auth_token_optional(fn: Callable) -> Callable: - """ - **The decorator protects an endpoint from being called without a valid - authorization token. - """ - @wraps(fn) - def wrapper(*args, **kwargs): - - # Check if authentication is enabled - if get_conf( - current_app.config, - 'security', - 'authorization_required', - ): - - # Get config parameters - validation_methods = get_conf_type( - current_app.config, - 'security', - 'jwt', - 'validation_methods', - types=(list), - ) - validation_checks = get_conf( - current_app.config, - 'security', - 'jwt', - 'validation_checks', - ) - algorithms = get_conf_type( - current_app.config, - 'security', - 'jwt', - 'algorithms', - types=(list), - ) - expected_prefix = get_conf( - current_app.config, - 'security', - 'jwt', - 'token_prefix', - ) - header_name = get_conf( - current_app.config, - 'security', - 'jwt', - 'header_name', - ) - claim_key_id = get_conf( - current_app.config, - 'security', - 'jwt', - 'claim_key_id', - ) - claim_issuer = get_conf( - current_app.config, - 'security', - 'jwt', - 'claim_issuer', - ) - claim_identity = get_conf( - current_app.config, - 'security', - 'jwt', - 'claim_identity', - ) - add_key_to_claims = get_conf( - current_app.config, - 'security', - 'jwt', - 'add_key_to_claims', - ) - audience = get_conf_type( - current_app.config, - 'security', - 'jwt', - 'audience', - types=(list, type(None)), - ) - allow_expired = get_conf( - current_app.config, - 'security', - 'jwt', - 'allow_expired', - ) - - # Ensure that at least one validation method was configured - if not len(validation_methods): - logger.error("No JWT validation methods configured.") - raise Unauthorized - - # Ensure that a valid validation checks argument was configured - if validation_checks not in ['all', 'any']: - logger.error( - ( - "Illegal argument '{validation_checks} passed to " - "configuration paramater 'validation_checks'. Allowed " - "values: 'any', 'all'" - ) - ) - raise Unauthorized - - # Parse JWT token from HTTP header - token = parse_jwt_from_header( - header_name=header_name, - expected_prefix=expected_prefix, - ) - - # Initialize claims - claims = {} - - # Validate JWT via /userinfo endpoint - if 'userinfo' in validation_methods: - if not (claims and validation_checks == 'any'): - logger.debug( - ( - "Validating JWT via identity provider's " - "'/userinfo' endpoint..." - ) - ) - claims = validate_jwt_via_userinfo_endpoint( - token=token, - algorithms=algorithms, - claim_issuer=claim_issuer, - ) - if not claims and validation_checks == 'all': - logger.error( - ( - "Insufficient number of JWT validation checks " - "passed." - ) - ) - raise Unauthorized - - # Validate JWT via public key - if 'public_key' in validation_methods: - if not (claims and validation_checks == 'any'): - logger.debug( - ( - "Validating JWT via identity provider's public " - "key..." - ) - ) - claims = validate_jwt_via_public_key( - token=token, - algorithms=algorithms, - claim_key_id=claim_key_id, - claim_issuer=claim_issuer, - add_key_to_claims=add_key_to_claims, - audience=audience, - allow_expired=allow_expired, - ) - if not claims and validation_checks == 'all': - logger.error( - ( - "Insufficient number of JWT validation checks " - "passed." - ) - ) - raise Unauthorized - - # Check whether enough validation checks passed - if not claims: - logger.error( - ( - "No JWT validation checks passed." - ) - ) - raise Unauthorized - - # Ensure that specified identity claim is available - if not validate_jwt_claims( - claim_identity, - claims=claims, - ): - raise Unauthorized - - # Log result - logger.debug( - "Access granted." - ) - - # Return wrapped function with token data - return fn( - jwt=token, - claims=claims, - user_id=claims[claim_identity], - *args, - **kwargs - ) - - # Return wrapped function without token data - else: - return fn(*args, **kwargs) - - return wrapper - - -def parse_jwt_from_header( - header_name: str = 'Authorization', - expected_prefix: str = 'Bearer', -) -> str: - """Parses authorization token from HTTP header.""" - # TODO: Add custom errors - # Ensure that authorization header is present - auth_header = request.headers.get(header_name, None) - if not auth_header: - logger.error("No HTTP header with name '{header_name}' found.".format( - header_name=header_name, - )) - raise Unauthorized - - # Ensure that authorization header is formatted correctly - try: - (prefix, token) = auth_header.split() - except ValueError as e: - logger.error( - ( - "Authentication header is malformed. Original error message: " - "{type}: {msg}" - ).format( - type=type(e).__name__, - msg=e, - ) - ) - raise Unauthorized - - if prefix != expected_prefix: - logger.error( - ( - "Expected token prefix in authentication header is " - "'{expected_prefix}', but '{prefix}' was found." - ).format( - expected_prefix=expected_prefix, - prefix=prefix, - ) - ) - raise Unauthorized - - return token - - -def validate_jwt_via_userinfo_endpoint( - token: str, - algorithms: Iterable[str] = ['RS256'], - claim_issuer: str = 'iss', - service_document_field: str = 'userinfo_endpoint', -) -> Mapping: - - # Decode JWT - try: - claims = jwt.decode( - jwt=token, - verify=False, - algorithms=algorithms, - ) - except Exception as e: - logger.warning( - ( - "JWT could not be decoded. Original error message: " - "{type}: {msg}" - ).format( - type=type(e).__name__, - msg=e, - ) - ) - return {} - - # Verify existence of issuer claim - if not validate_jwt_claims( - claim_issuer, - claims=claims, - ): - return {} - - # Get /userinfo endpoint URL - url = get_entry_from_idp_service_discovery_endpoint( - issuer=claims[claim_issuer], - entry=service_document_field, - ) - - # Validate JWT via /userinfo endpoint - if url: - logger.debug(f"Issuer's '/userinfo' endpoint URL: {url}") - try: - validate_jwt_via_endpoint( - url=url, - token=token, - ) - except Exception: - return {} - else: - return {} - - # Log success and return claims - logger.debug( - f"Claims decoded: {claims}" - ) - return claims - - -def validate_jwt_via_public_key( - token: str, - algorithms: Iterable[str] = ['RS256'], - claim_key_id: str = 'kid', - claim_issuer: str = 'iss', - service_document_field: str = 'jwks_uri', - add_key_to_claims: bool = True, - audience: Optional[Iterable[str]] = None, - allow_expired: bool = False, -) -> Mapping: - - # Extract JWT claims - try: - claims = jwt.decode( - jwt=token, - verify=False, - algorithms=algorithms, - ) - except Exception as e: - logger.error( - ( - "JWT could not be decoded. Original error message: {type}: " - "{msg}" - ).format( - type=type(e).__name__, - msg=e, - ) - ) - return {} - - # Extract JWT header claims - try: - header_claims = jwt.get_unverified_header(token) - except Exception as e: - logger.error( - ( - "Could not extract JWT header claims. Original error message: " - "{type}: {msg}" - ).format( - type=type(e).__name__, - msg=e, - ) - ) - return {} - - # Get JWK set endpoint URL - url = get_entry_from_idp_service_discovery_endpoint( - issuer=claims[claim_issuer], - entry=service_document_field, - ) - - # Obtain identity provider's public keys - if url: - logger.debug(f"Issuer's JWK set endpoint URL: {url}") - public_keys = get_public_keys( - url=url, - claim_key_id=claim_key_id, - ) - else: - return {} - - # If currently used public key is specified, verify that it exists and - # remove all other keys - if claim_key_id in header_claims: - if header_claims[claim_key_id] in public_keys: - public_keys = { - header_claims[claim_key_id]: - public_keys[header_claims[claim_key_id]] - } - else: - logger.error( - "JWT key ID not found among issuer's JWKs." - ) - return {} - else: - logger.debug( - "JWT key ID not specified. Trying all available JWKs..." - ) - - # Set validations - validation_options = {} - if audience is None: - validation_options['verify_aud'] = False - if allow_expired: - validation_options['verify_exp'] = False - - # Try public keys one after the other - pem = '' - for key in public_keys.values(): - - # Get PEM representation of key - pem = key.public_bytes( - encoding=serialization.Encoding.PEM, - format=serialization.PublicFormat.SubjectPublicKeyInfo, - ).decode('utf-8').encode('unicode_escape').decode('utf-8') - - # Decode JWT and validate via public key - try: - claims = jwt.decode( - jwt=token, - verify=True, - key=key, - algorithms=algorithms, - audience=audience, - options=validation_options, - ) - # Wrong or faulty key was used; try next one - except ( - jwt.exceptions.InvalidSignatureError, - jwt.exceptions.InvalidKeyError - ) as e: - logger.debug( - "JWT could not be decoded with current JWK:\n" - f"{pem}\n" - f"Original error message: {type(e).__name__}: {e}" - ) - # Key seems okay but token seems invalid - except Exception as e: - logger.error( - "JWT could not be validated. Original error message: " - f"{type(e).__name__}: {e}" - ) - return {} - - # Do not try other keys if token was decoded - if claims: - break - - # Verify that token was decoded - if not claims: - logger.error( - "JWT could not be validated with any of the issuer's JWKs." - ) - return {} - - # Add public key to claims - if add_key_to_claims: - claims['public_key'] = pem - - # Log success and return claims - logger.debug( - f"Claims decoded: {claims}" - ) - return claims - - -def validate_jwt_claims( - *args: str, - claims: Mapping, -) -> bool: - """ - Validates the existence of JWT claims. Returns False if any are missing, - otherwise returns True. - """ - # Check for existence of required claims - for claim in args: - if claim not in claims: - logger.warning( - ( - "Required claim '{claim}' not found in JWT." - ).format( - claim=claim, - ) - ) - return False - else: - return True - - -def get_entry_from_idp_service_discovery_endpoint( - issuer: str, - entry: str, -) -> Optional[str]: - """ - Access the identity provider's service discovery endpoint to retrieve the - value of the specified entry. - """ - # Build endpoint URL - base_url = issuer.rstrip("/") - url = "{base_url}/.well-known/openid-configuration".format( - base_url=base_url - ) - - # Send GET request to service discovery endpoint - try: - response = requests.get(url) - response.raise_for_status() - except Exception as e: - logger.warning( - ( - "Could not connect to endpoint '{url}'. Original error " - "message: {type}: {msg}" - ).format( - url=url, - type=type(e).__name__, - msg=e, - ) - ) - return None - - # Return entry or None - if entry not in response.json(): - logger.warning( - ( - "Required entry '{entry}' not found in identity provider's " - "documentation accessed at endpoint '{endpoint}'." - ).format( - entry=entry, - url=url, - ) - ) - return None - else: - return response.json()[entry] - - -def validate_jwt_via_endpoint( - url: str, - token: str, - header_name: str = 'Authorization', - prefix: str = 'Bearer' -) -> None: - """ - Returns True if a JWT-headed request to a specified URL yields the - specified status code. - """ - headers = { - "{header_name}".format( - header_name=header_name - ): "{prefix} {token}".format( - header_name=header_name, - prefix=prefix, - token=token, - ) - } - try: - response = requests.get( - url, - headers=headers, - ) - response.raise_for_status() - except Exception as e: - logger.warning( - ( - "Could not connect to endpoint '{url}'. Original error " - "message: {type}: {msg}" - ).format( - url=url, - type=type(e).__name__, - msg=e, - ) - ) - raise - - return None - - -def get_public_keys( - url: str, - claim_key_id: str = 'kid', -) -> Mapping: - """ - Obtain the identity provider's list of public keys. - """ - # Get JWK sets from identity provider - try: - response = requests.get(url) - response.raise_for_status() - except Exception as e: - logger.warning( - ( - "Could not connect to endpoint '{url}'. Original error " - "message: {type}: {msg}" - ).format( - url=url, - type=type(e).__name__, - msg=e, - ) - ) - return {} - - # Iterate over all JWK sets and store public keys in dictionary - public_keys = {} - for jwk in response.json()['keys']: - public_keys[jwk[claim_key_id]] = jwt.algorithms.RSAAlgorithm.from_jwk( - json.dumps(jwk) - ) - - # Return dictionary of public keys - return public_keys diff --git a/cwl_wes/tasks/celery_task_monitor.py b/cwl_wes/tasks/celery_task_monitor.py index 20d44bd..a47660a 100644 --- a/cwl_wes/tasks/celery_task_monitor.py +++ b/cwl_wes/tasks/celery_task_monitor.py @@ -5,7 +5,6 @@ import logging import os import re -import requests from shlex import quote from threading import Thread from time import sleep @@ -18,7 +17,7 @@ from pymongo import collection as Collection import tes -import cwl_wes.database.db_utils as db_utils +import cwl_wes.utils.db_utils as db_utils # Get logger instance diff --git a/cwl_wes/tasks/register_celery.py b/cwl_wes/tasks/register_celery.py index 9400cfa..c4c76ec 100644 --- a/cwl_wes/tasks/register_celery.py +++ b/cwl_wes/tasks/register_celery.py @@ -1,10 +1,11 @@ """Function to create Celery app instance and register task monitor.""" -from flask import Flask +from connexion import App import logging import os -from cwl_wes.factories.celery_app import create_celery_app +from foca.factories.celery_app import create_celery_app + from cwl_wes.tasks.celery_task_monitor import TaskMonitor @@ -12,28 +13,24 @@ logger = logging.getLogger(__name__) -def register_task_service(app: Flask) -> None: +def register_task_service(app: App) -> None: """Instantiates Celery app and registers task monitor.""" # Ensure that code is executed only once when app reloader is used if os.environ.get("WERKZEUG_RUN_MAIN") != 'true': - - # Instantiate Celery app instance - celery_app = create_celery_app(app) - # Start task monitor daemon + foca_config = app.app.config.foca + custom_config = foca_config.custom + celery_app = create_celery_app(app.app) TaskMonitor( celery_app=celery_app, - collection=app.config['database']['collections']['runs'], + collection=foca_config.db.dbs['cwl-wes-db'].collections['runs'], tes_config={ - 'url': - app.config['tes']['url'], - 'query_params': - app.config['tes']['status_query_params'], - 'timeout': - app.config['tes']['timeout'] + 'url': custom_config.tes_server.url, + 'query_params': custom_config.tes_server.status_query_params, + 'timeout': custom_config.tes_server.timeout }, - timeout=app.config['celery']['monitor']['timeout'], - authorization=app.config['security']['authorization_required'], + timeout=custom_config.celery.monitor.timeout, + authorization=foca_config.security.auth.required, ) logger.info('Celery task monitor registered.') diff --git a/cwl_wes/tasks/tasks/cancel_run.py b/cwl_wes/tasks/tasks/cancel_run.py index 2da3810..66b9e56 100644 --- a/cwl_wes/tasks/tasks/cancel_run.py +++ b/cwl_wes/tasks/tasks/cancel_run.py @@ -10,10 +10,9 @@ from flask import current_app from pymongo import collection as Collection -from cwl_wes.celery_worker import celery -from foca.config.config_parser import get_conf -import cwl_wes.database.db_utils as db_utils -from cwl_wes.database.register_mongodb import create_mongo_client +from cwl_wes.worker import celery +import cwl_wes.utils.db_utils as db_utils +from foca.database.register_mongodb import _create_mongo_client from cwl_wes.ga4gh.wes.states import States from cwl_wes.tasks.utils import set_run_state @@ -34,11 +33,13 @@ def task__cancel_run( token: Optional[str] = None, ) -> None: """Revokes worfklow task and tries to cancel all running TES tasks.""" - config = current_app.config + foca_config = current_app.config.foca # Create MongoDB client - mongo = create_mongo_client( + mongo = _create_mongo_client( app=current_app, - config=config, + host=foca_config.db.host, + port=foca_config.db.port, + db='cwl-wes-db', ) collection = mongo.db['runs'] # Set run state to 'CANCELING' @@ -54,8 +55,8 @@ def task__cancel_run( __cancel_tes_tasks( collection=collection, run_id=run_id, - url=get_conf(config, 'tes', 'url'), - timeout=get_conf(config, 'tes', 'timeout'), + url=foca_config.custom.tes_server.tes_server.url, + timeout=foca_config.custom.tes_server.tes_server.timeout, token=token, ) except SoftTimeLimitExceeded as e: diff --git a/cwl_wes/tasks/tasks/run_workflow.py b/cwl_wes/tasks/tasks/run_workflow.py index c674310..a564fce 100644 --- a/cwl_wes/tasks/tasks/run_workflow.py +++ b/cwl_wes/tasks/tasks/run_workflow.py @@ -6,7 +6,7 @@ import subprocess from typing import (Dict, List, Optional, Tuple) -from cwl_wes.celery_worker import celery +from cwl_wes.worker import celery # Get logger instance diff --git a/cwl_wes/tasks/utils.py b/cwl_wes/tasks/utils.py index 6788f49..53dcb03 100644 --- a/cwl_wes/tasks/utils.py +++ b/cwl_wes/tasks/utils.py @@ -5,7 +5,7 @@ from pymongo import collection as Collection -import cwl_wes.database.db_utils as db_utils +import cwl_wes.utils.db_utils as db_utils # Get logger instance diff --git a/cwl_wes/api/__init__.py b/cwl_wes/utils/__init__.py similarity index 100% rename from cwl_wes/api/__init__.py rename to cwl_wes/utils/__init__.py diff --git a/cwl_wes/database/db_utils.py b/cwl_wes/utils/db_utils.py similarity index 75% rename from cwl_wes/database/db_utils.py rename to cwl_wes/utils/db_utils.py index fe1d409..d7ded8a 100644 --- a/cwl_wes/database/db_utils.py +++ b/cwl_wes/utils/db_utils.py @@ -2,33 +2,10 @@ from typing import (Any, List, Mapping, Optional) -from bson.objectid import ObjectId from pymongo.collection import ReturnDocument from pymongo import collection as Collection -def find_one_latest(collection: Collection) -> Optional[Mapping[Any, Any]]: - """Returns newest/latest object, stripped of the object id, or None if no - object exists: collection. - """ - try: - return collection.find( - {}, - {'_id': False} - ).sort([('_id', -1)]).limit(1).next() - except StopIteration: - return None - - -def find_id_latest(collection: Collection) -> Optional[ObjectId]: - """Returns object id of newest/latest object, or None if no object exists. - """ - try: - return collection.find().sort([('_id', -1)]).limit(1).next()['_id'] - except StopIteration: - return None - - def update_run_state( collection: Collection, task_id: str, diff --git a/cwl_wes/worker.py b/cwl_wes/worker.py new file mode 100644 index 0000000..21f1a5f --- /dev/null +++ b/cwl_wes/worker.py @@ -0,0 +1,8 @@ +"""Entry point for Celery workers.""" + +from foca.factories.celery_app import create_celery_app + +from cwl_wes.app import init_app + +flask_app = init_app().app +celery = create_celery_app(app=flask_app) \ No newline at end of file diff --git a/cwl_wes/wsgi.py b/cwl_wes/wsgi.py index 4c5ade2..0763c51 100644 --- a/cwl_wes/wsgi.py +++ b/cwl_wes/wsgi.py @@ -1,3 +1,3 @@ -from cwl_wes.app import run_server +from cwl_wes.app import init_app -app, config = run_server() +app = init_app() \ No newline at end of file From f6c3470a153bdc8c0825e3d608ee938ba021c215 Mon Sep 17 00:00:00 2001 From: kushagra Date: Thu, 10 Nov 2022 00:03:19 +0100 Subject: [PATCH 02/29] migrated: FOCA v0.6.0 -> v0.10.0 --- deployment/templates/wes/wes-deployment.yaml | 2 +- docker-compose.yaml | 6 +++--- requirements.txt | 8 +++----- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/deployment/templates/wes/wes-deployment.yaml b/deployment/templates/wes/wes-deployment.yaml index 93ca230..1a2ac37 100644 --- a/deployment/templates/wes/wes-deployment.yaml +++ b/deployment/templates/wes/wes-deployment.yaml @@ -26,7 +26,7 @@ spec: imagePullPolicy: Always workingDir: '/app/cwl_wes' command: [ 'gunicorn' ] - args: [ '--log-level', 'debug', '-c', 'config.py', 'wsgi:app', '--reload', '--reload-extra-file', '{{ .Values.extra_config.folder }}/{{ .Values.extra_config.file }}' ] + args: [ '--log-level', 'debug', '-c', 'gunicorn.py', 'wsgi:app', '--reload', '--reload-extra-file', '{{ .Values.extra_config.folder }}/{{ .Values.extra_config.file }}' ] env: - name: WES_CONFIG value: {{ .Values.extra_config.folder }}/{{ .Values.extra_config.file }} diff --git a/docker-compose.yaml b/docker-compose.yaml index cdae71b..7a3699d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -9,7 +9,7 @@ services: restart: unless-stopped links: - mongodb - command: bash -c "cd /app/cwl_wes; gunicorn -c config.py wsgi:app" + command: bash -c "cd /app/cwl_wes; gunicorn -c gunicorn.py wsgi:app" volumes: - ../data/cwl_wes:/data ports: @@ -23,7 +23,7 @@ services: links: - mongodb - rabbitmq - command: bash -c "cd /app/cwl_wes; celery -A celery_worker worker -E --loglevel=info" + command: bash -c "cd /app/cwl_wes; celery -A worker worker -E --loglevel=info" volumes: - ../data/cwl_wes:/data @@ -37,7 +37,7 @@ services: - "5672:5672" mongodb: - image: mongo:3.2 + image: mongo:3.6 restart: unless-stopped volumes: - ../data/cwl_wes/db:/data/db diff --git a/requirements.txt b/requirements.txt index 894004b..917c1cf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,6 @@ --e git+https://github.com/uniqueg/cwl-tes.git@57a193cabab2444bc9b661f83011837bd5ed571a#egg=cwl-tes -cwltool==1.0.20181217162649 +-e git+https://github.com/ohsu-comp-bio/cwl-tes.git@57a193cabab2444bc9b661f83011837bd5ed571a#egg=cwl-tes drs-cli==0.2.3 -foca==0.7.0 +foca==0.10.0 gunicorn==19.9.0 py-tes==0.4.2 -python-dateutil==2.6.1 -ruamel.yaml==0.15.51 \ No newline at end of file +python-dateutil==2.6.1 \ No newline at end of file From 37ce855d399af7abdf16460a388e05d47fe0e758 Mon Sep 17 00:00:00 2001 From: kushagra Date: Thu, 10 Nov 2022 00:14:07 +0100 Subject: [PATCH 03/29] revert wflow definitions --- ...140.workflow_execution_service.swagger.yaml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/cwl_wes/api/20181010.be85140.workflow_execution_service.swagger.yaml b/cwl_wes/api/20181010.be85140.workflow_execution_service.swagger.yaml index 54603f4..e2b686b 100644 --- a/cwl_wes/api/20181010.be85140.workflow_execution_service.swagger.yaml +++ b/cwl_wes/api/20181010.be85140.workflow_execution_service.swagger.yaml @@ -15,7 +15,8 @@ paths: summary: Get information about Workflow Execution Service. description: |- May include information related (but not limited to) the workflow descriptor formats, versions supported, the WES API versions supported, and information about general service availability. - operationId: "ga4gh.wes.server.GetServiceInfo" + x-swagger-router-controller: ga4gh.wes.server + operationId: GetServiceInfo responses: '200': description: '' @@ -50,7 +51,8 @@ paths: contents of the list reflect the workflow list at the moment that the first page is requested. To monitor a specific workflow run, use GetRunStatus or GetRunLog. - operationId: "ga4gh.wes.server.ListRuns" + x-swagger-router-controller: ga4gh.wes.server + operationId: ListRuns responses: '200': description: '' @@ -144,7 +146,8 @@ paths: See the `RunRequest` documentation for details about other fields. - operationId: "ga4gh.wes.server.RunWorkflow" + x-swagger-router-controller: ga4gh.wes.server + operationId: RunWorkflow responses: '200': description: '' @@ -213,7 +216,8 @@ paths: (if available), a log object which allows the stderr and stdout to be retrieved, a log array so stderr/stdout for individual tasks can be retrieved, and the overall state of the workflow run (e.g. RUNNING, see the State section). - operationId: "ga4gh.wes.server.GetRunLog" + x-swagger-router-controller: ga4gh.wes.server + operationId: GetRunLog responses: '200': description: '' @@ -245,7 +249,8 @@ paths: /runs/{run_id}/cancel: post: summary: Cancel a running workflow. - operationId: "ga4gh.wes.server.CancelRun" + x-swagger-router-controller: ga4gh.wes.server + operationId: CancelRun responses: '200': description: '' @@ -281,7 +286,8 @@ paths: This provides an abbreviated (and likely fast depending on implementation) status of the running workflow, returning a simple result with the overall state of the workflow run (e.g. RUNNING, see the State section). - operationId: "ga4gh.wes.server.GetRunStatus" + x-swagger-router-controller: ga4gh.wes.server + operationId: GetRunStatus responses: '200': description: '' From 8767dc35a7f611fc92ec6a10b9f955565d971d60 Mon Sep 17 00:00:00 2001 From: kushagra Date: Thu, 10 Nov 2022 00:37:17 +0100 Subject: [PATCH 04/29] fix: revert changes --- cwl_wes/__init__.py | 0 cwl_wes/config.yaml | 4 ++-- cwl_wes/ga4gh/wes/server.py | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) create mode 100644 cwl_wes/__init__.py diff --git a/cwl_wes/__init__.py b/cwl_wes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cwl_wes/config.yaml b/cwl_wes/config.yaml index 3707ecb..b39f825 100644 --- a/cwl_wes/config.yaml +++ b/cwl_wes/config.yaml @@ -57,8 +57,8 @@ api: specs: - path: - api/20181010.be85140.workflow_execution_service.swagger.yaml - add_operation_fields: - x-openapi-router-controller: cwl_wes.ga4gh.wes.controllers + # add_operation_fields: + # x-swagger-router-controller: cwl_wes.ga4gh.wes.server add_security_fields: x-bearerInfoFunc: app.validate_token disable_auth: True diff --git a/cwl_wes/ga4gh/wes/server.py b/cwl_wes/ga4gh/wes/server.py index c217f3e..9847c4a 100644 --- a/cwl_wes/ga4gh/wes/server.py +++ b/cwl_wes/ga4gh/wes/server.py @@ -22,7 +22,7 @@ # GET /runs/ @log_traffic -def GetRunLog(run_id, *args, **kwargs) -> dict: +def GetRunLog(run_id, *args, **kwargs): """Returns detailed run info.""" response = get_run_log.get_run_log( config=current_app.config, @@ -35,7 +35,7 @@ def GetRunLog(run_id, *args, **kwargs) -> dict: # POST /runs//cancel @log_traffic -def CancelRun(run_id, *args, **kwargs) -> dict: +def CancelRun(run_id, *args, **kwargs): """Cancels unfinished workflow run.""" response = cancel_run.cancel_run( config=current_app.config, @@ -49,7 +49,7 @@ def CancelRun(run_id, *args, **kwargs) -> dict: # GET /runs//status @log_traffic -def GetRunStatus(run_id, *args, **kwargs) -> dict: +def GetRunStatus(run_id, *args, **kwargs): """Returns run status.""" response = get_run_status.get_run_status( config=current_app.config, @@ -62,7 +62,7 @@ def GetRunStatus(run_id, *args, **kwargs) -> dict: # GET /service-info @log_traffic -def GetServiceInfo(*args, **kwargs) -> dict: +def GetServiceInfo(*args, **kwargs): """Returns service info.""" response = get_service_info.get_service_info( config=current_app.config, @@ -74,7 +74,7 @@ def GetServiceInfo(*args, **kwargs) -> dict: # GET /runs @log_traffic -def ListRuns(*args, **kwargs) -> dict: +def ListRuns(*args, **kwargs): """Lists IDs and status of all workflow runs.""" response = list_runs.list_runs( config=current_app.config, @@ -86,7 +86,7 @@ def ListRuns(*args, **kwargs) -> dict: # POST /runs @log_traffic -def RunWorkflow(*args, **kwargs) -> dict: +def RunWorkflow(*args, **kwargs): """Executes workflow.""" response = run_workflow.run_workflow( config=current_app.config, From 21b717e739536c04236c6f5fd29d6dc0f2a9d39f Mon Sep 17 00:00:00 2001 From: kushagra Date: Thu, 10 Nov 2022 09:46:19 +0100 Subject: [PATCH 05/29] fix: config --- cwl_wes/config.yaml | 2 -- cwl_wes/ga4gh/wes/server.py | 16 ++++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/cwl_wes/config.yaml b/cwl_wes/config.yaml index b39f825..7381c64 100644 --- a/cwl_wes/config.yaml +++ b/cwl_wes/config.yaml @@ -57,8 +57,6 @@ api: specs: - path: - api/20181010.be85140.workflow_execution_service.swagger.yaml - # add_operation_fields: - # x-swagger-router-controller: cwl_wes.ga4gh.wes.server add_security_fields: x-bearerInfoFunc: app.validate_token disable_auth: True diff --git a/cwl_wes/ga4gh/wes/server.py b/cwl_wes/ga4gh/wes/server.py index 9847c4a..398813e 100644 --- a/cwl_wes/ga4gh/wes/server.py +++ b/cwl_wes/ga4gh/wes/server.py @@ -8,12 +8,12 @@ from foca.utils.logging import log_traffic -import cwl_wes.ga4gh.wes.endpoints.cancel_run as cancel_run -import cwl_wes.ga4gh.wes.endpoints.get_run_log as get_run_log -import cwl_wes.ga4gh.wes.endpoints.get_run_status as get_run_status -import cwl_wes.ga4gh.wes.endpoints.list_runs as list_runs -import cwl_wes.ga4gh.wes.endpoints.run_workflow as run_workflow -import cwl_wes.ga4gh.wes.endpoints.get_service_info as get_service_info +from cwl_wes.ga4gh.wes.endpoints.cancel_run import cancel_run +from cwl_wes.ga4gh.wes.endpoints.get_run_log import get_run_log +from cwl_wes.ga4gh.wes.endpoints.get_run_status import get_run_status +from cwl_wes.ga4gh.wes.endpoints.list_runs import list_runs +from cwl_wes.ga4gh.wes.endpoints.run_workflow import run_workflow +from cwl_wes.ga4gh.wes.endpoints.get_service_info import get_service_info # Get logger instance @@ -24,7 +24,7 @@ @log_traffic def GetRunLog(run_id, *args, **kwargs): """Returns detailed run info.""" - response = get_run_log.get_run_log( + response = get_run_log( config=current_app.config, run_id=run_id, *args, @@ -37,7 +37,7 @@ def GetRunLog(run_id, *args, **kwargs): @log_traffic def CancelRun(run_id, *args, **kwargs): """Cancels unfinished workflow run.""" - response = cancel_run.cancel_run( + response = cancel_run( config=current_app.config, celery_app=celery_app, run_id=run_id, From 84770fdf1986f10f629176a7572933c02be79899 Mon Sep 17 00:00:00 2001 From: kushagra Date: Thu, 10 Nov 2022 11:14:50 +0100 Subject: [PATCH 06/29] fix: migration issues --- cwl_wes/config.yaml | 4 +++- cwl_wes/ga4gh/wes/endpoints/get_service_info.py | 9 +++++---- cwl_wes/ga4gh/wes/endpoints/run_workflow.py | 4 ++-- deployment/values.yaml | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/cwl_wes/config.yaml b/cwl_wes/config.yaml index 7381c64..95cf9fa 100644 --- a/cwl_wes/config.yaml +++ b/cwl_wes/config.yaml @@ -58,7 +58,9 @@ api: - path: - api/20181010.be85140.workflow_execution_service.swagger.yaml add_security_fields: - x-bearerInfoFunc: app.validate_token + x-apikeyInfoFunc: app.validate_token + add_operation_fields: + x-swagger-router-controller: ga4gh.wes.server disable_auth: True connexion: strict_validation: True diff --git a/cwl_wes/ga4gh/wes/endpoints/get_service_info.py b/cwl_wes/ga4gh/wes/endpoints/get_service_info.py index 0fca70e..6bbb553 100644 --- a/cwl_wes/ga4gh/wes/endpoints/get_service_info.py +++ b/cwl_wes/ga4gh/wes/endpoints/get_service_info.py @@ -6,6 +6,7 @@ from typing import (Any, Dict, Mapping) from pymongo import collection as Collection +from flask import Config import cwl_wes.utils.db_utils as db_utils from cwl_wes.ga4gh.wes.states import States @@ -17,16 +18,16 @@ # Helper function GET /service-info def get_service_info( - config: Mapping, + config: Config, silent: bool = False, *args: Any, **kwarg: Any ): """Returns readily formatted service info or `None` (in silent mode); creates service info database document if it does not exist.""" - collection_service_info = config['database']['collections']['service_info'] - collection_runs = config['database']['collections']['runs'] - service_info = deepcopy(config['service_info']) + collection_service_info = config.foca.db.dbs['cwl-wes-db'].collections['service_info'] + collection_runs = config.foca.db.dbs['cwl-wes-db'].collections['runs'] + service_info = deepcopy(config.foca.custom.service_info.dict()) # Write current service info to database if absent or different from latest if not service_info == db_utils.find_one_latest(collection_service_info): diff --git a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py index 4130bc3..e4ba7cd 100644 --- a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py +++ b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py @@ -187,8 +187,8 @@ def __create_run_environment( collection_runs = config.foca.db.dbs['cwl-wes-db'].collections['runs'] out_dir = config.foca.custom.storage.permanent_dir tmp_dir = config.foca.custom.storage.tmp_dir - run_id_charset = eval(config.foca.custom.run_id.charset) - run_id_length = config.foca.custom.run_id.length + run_id_charset = eval(config.foca.custom.runs_id.charset) + run_id_length = config.foca.custom.runs_id.length # Keep on trying until a unique run id was found and inserted # TODO: If no more possible IDs => inf loop; fix (raise custom error; 500 diff --git a/deployment/values.yaml b/deployment/values.yaml index f6a8e07..fa913e6 100644 --- a/deployment/values.yaml +++ b/deployment/values.yaml @@ -9,7 +9,7 @@ storageAccessMode: ReadWriteOnce # mongodb-pvc.yaml/rabbitmq-pvc.yaml, change t extra_config: folder: /etc/app_config - file: app_config.yaml + file: config.yaml autocert: createJob: "true" # actually create autocert cronjob, for K8S with autocert installed set to "false" From 6c21420ce21e2d9b19e971c9ccbd9251401db24a Mon Sep 17 00:00:00 2001 From: kushagra Date: Thu, 10 Nov 2022 11:35:55 +0100 Subject: [PATCH 07/29] fix server runs --- cwl_wes/ga4gh/wes/server.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cwl_wes/ga4gh/wes/server.py b/cwl_wes/ga4gh/wes/server.py index 398813e..835ae3b 100644 --- a/cwl_wes/ga4gh/wes/server.py +++ b/cwl_wes/ga4gh/wes/server.py @@ -51,7 +51,7 @@ def CancelRun(run_id, *args, **kwargs): @log_traffic def GetRunStatus(run_id, *args, **kwargs): """Returns run status.""" - response = get_run_status.get_run_status( + response = get_run_status( config=current_app.config, run_id=run_id, *args, @@ -64,7 +64,7 @@ def GetRunStatus(run_id, *args, **kwargs): @log_traffic def GetServiceInfo(*args, **kwargs): """Returns service info.""" - response = get_service_info.get_service_info( + response = get_service_info( config=current_app.config, *args, **kwargs @@ -76,7 +76,7 @@ def GetServiceInfo(*args, **kwargs): @log_traffic def ListRuns(*args, **kwargs): """Lists IDs and status of all workflow runs.""" - response = list_runs.list_runs( + response = list_runs( config=current_app.config, *args, **kwargs @@ -88,7 +88,7 @@ def ListRuns(*args, **kwargs): @log_traffic def RunWorkflow(*args, **kwargs): """Executes workflow.""" - response = run_workflow.run_workflow( + response = run_workflow( config=current_app.config, form_data=request.form, *args, From a24b555e66008f5a734e3cebf7d1436bb47840c4 Mon Sep 17 00:00:00 2001 From: kushagra Date: Thu, 10 Nov 2022 11:45:50 +0100 Subject: [PATCH 08/29] fix: collection --- cwl_wes/ga4gh/wes/endpoints/cancel_run.py | 3 ++- cwl_wes/ga4gh/wes/endpoints/get_run_log.py | 3 ++- cwl_wes/ga4gh/wes/endpoints/get_run_status.py | 3 ++- cwl_wes/ga4gh/wes/endpoints/get_service_info.py | 4 ++-- cwl_wes/ga4gh/wes/endpoints/list_runs.py | 4 ++-- cwl_wes/ga4gh/wes/endpoints/run_workflow.py | 3 ++- cwl_wes/tasks/register_celery.py | 3 ++- 7 files changed, 14 insertions(+), 9 deletions(-) diff --git a/cwl_wes/ga4gh/wes/endpoints/cancel_run.py b/cwl_wes/ga4gh/wes/endpoints/cancel_run.py index aaafbb4..0050c66 100644 --- a/cwl_wes/ga4gh/wes/endpoints/cancel_run.py +++ b/cwl_wes/ga4gh/wes/endpoints/cancel_run.py @@ -7,6 +7,7 @@ from connexion.exceptions import Forbidden from flask import Config +from pymongo.collection import Collection from cwl_wes.exceptions import WorkflowNotFound from cwl_wes.ga4gh.wes.states import States @@ -27,7 +28,7 @@ def cancel_run( ) -> Dict: """Cancels running workflow.""" foca_config = config.foca - collection_runs = foca_config.db.dbs['cwl-wes-db'].collections['runs'] + collection_runs: Collection = foca_config.db.dbs['cwl-wes-db'].collections['runs'].client document = collection_runs.find_one( filter={'run_id': run_id}, projection={ diff --git a/cwl_wes/ga4gh/wes/endpoints/get_run_log.py b/cwl_wes/ga4gh/wes/endpoints/get_run_log.py index 2732ee0..07d15ab 100644 --- a/cwl_wes/ga4gh/wes/endpoints/get_run_log.py +++ b/cwl_wes/ga4gh/wes/endpoints/get_run_log.py @@ -7,6 +7,7 @@ from cwl_wes.exceptions import WorkflowNotFound from flask import Config +from pymongo.collection import Collection # Get logger instance @@ -21,7 +22,7 @@ def get_run_log( **kwargs ) -> Dict: """Gets detailed log information for specific run.""" - collection_runs = config.foca.db.dbs['cwl-wes-db'].collections['runs'] + collection_runs: Collection = config.foca.db.dbs['cwl-wes-db'].collections['runs'].client document = collection_runs.find_one( filter={'run_id': run_id}, projection={ diff --git a/cwl_wes/ga4gh/wes/endpoints/get_run_status.py b/cwl_wes/ga4gh/wes/endpoints/get_run_status.py index c1dd9b1..516c235 100644 --- a/cwl_wes/ga4gh/wes/endpoints/get_run_status.py +++ b/cwl_wes/ga4gh/wes/endpoints/get_run_status.py @@ -7,6 +7,7 @@ from flask import Config from cwl_wes.exceptions import WorkflowNotFound +from pymongo.collection import Collection # Get logger instance @@ -21,7 +22,7 @@ def get_run_status( **kwargs ) -> Dict: """Gets status information for specific run.""" - collection_runs = config.foca.db.dbs['cwl-wes-db'].collections['runs'] + collection_runs: Collection = config.foca.db.dbs['cwl-wes-db'].collections['runs'].client document = collection_runs.find_one( filter={'run_id': run_id}, projection={ diff --git a/cwl_wes/ga4gh/wes/endpoints/get_service_info.py b/cwl_wes/ga4gh/wes/endpoints/get_service_info.py index 6bbb553..a79ea5f 100644 --- a/cwl_wes/ga4gh/wes/endpoints/get_service_info.py +++ b/cwl_wes/ga4gh/wes/endpoints/get_service_info.py @@ -25,8 +25,8 @@ def get_service_info( ): """Returns readily formatted service info or `None` (in silent mode); creates service info database document if it does not exist.""" - collection_service_info = config.foca.db.dbs['cwl-wes-db'].collections['service_info'] - collection_runs = config.foca.db.dbs['cwl-wes-db'].collections['runs'] + collection_service_info: Collection.Collection = config.foca.db.dbs['cwl-wes-db'].collections['service_info'].client + collection_runs: Collection.Collection = config.foca.db.dbs['cwl-wes-db'].collections['runs'].client service_info = deepcopy(config.foca.custom.service_info.dict()) # Write current service info to database if absent or different from latest diff --git a/cwl_wes/ga4gh/wes/endpoints/list_runs.py b/cwl_wes/ga4gh/wes/endpoints/list_runs.py index 2bc6c10..bd7b2f4 100644 --- a/cwl_wes/ga4gh/wes/endpoints/list_runs.py +++ b/cwl_wes/ga4gh/wes/endpoints/list_runs.py @@ -3,7 +3,7 @@ from typing import Dict from bson.objectid import ObjectId - +from pymongo.collection import Collection from flask import Config @@ -18,7 +18,7 @@ def list_runs( **kwargs ) -> Dict: """Lists IDs and status for all workflow runs.""" - collection_runs = config.foca.db.dbs['cwl-wes-db'].collections['runs'] + collection_runs: Collection = config.foca.db.dbs['cwl-wes-db'].collections['runs'].client # Fall back to default page size if not provided by user if 'page_size' in kwargs: diff --git a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py index e4ba7cd..25917f7 100644 --- a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py +++ b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py @@ -18,6 +18,7 @@ from werkzeug.utils import secure_filename from flask import Config, request +from pymongo.collection import Collection from cwl_wes.exceptions import BadRequest from cwl_wes.tasks.tasks.run_workflow import task__run_workflow @@ -184,7 +185,7 @@ def __create_run_environment( ) -> Dict: """Creates unique run identifier and permanent and temporary storage directories for current run.""" - collection_runs = config.foca.db.dbs['cwl-wes-db'].collections['runs'] + collection_runs: Collection = config.foca.db.dbs['cwl-wes-db'].collections['runs'].client out_dir = config.foca.custom.storage.permanent_dir tmp_dir = config.foca.custom.storage.tmp_dir run_id_charset = eval(config.foca.custom.runs_id.charset) diff --git a/cwl_wes/tasks/register_celery.py b/cwl_wes/tasks/register_celery.py index c4c76ec..58ac0ac 100644 --- a/cwl_wes/tasks/register_celery.py +++ b/cwl_wes/tasks/register_celery.py @@ -5,6 +5,7 @@ import os from foca.factories.celery_app import create_celery_app +from pymongo.collection import Collection from cwl_wes.tasks.celery_task_monitor import TaskMonitor @@ -23,7 +24,7 @@ def register_task_service(app: App) -> None: celery_app = create_celery_app(app.app) TaskMonitor( celery_app=celery_app, - collection=foca_config.db.dbs['cwl-wes-db'].collections['runs'], + collection=foca_config.db.dbs['cwl-wes-db'].collections['runs'].client, tes_config={ 'url': custom_config.tes_server.url, 'query_params': custom_config.tes_server.status_query_params, From 00068bf1dd0ab119ea1ccb5c41a04243d1c234ed Mon Sep 17 00:00:00 2001 From: kushagra Date: Thu, 10 Nov 2022 14:26:34 +0100 Subject: [PATCH 09/29] fix: files --- cwl_wes/app.py | 3 ++- cwl_wes/ga4gh/wes/endpoints/run_workflow.py | 6 +++--- cwl_wes/gunicorn.py | 3 ++- cwl_wes/tasks/register_celery.py | 10 +++------- cwl_wes/worker.py | 12 ++++++------ 5 files changed, 16 insertions(+), 18 deletions(-) diff --git a/cwl_wes/app.py b/cwl_wes/app.py index 9392b4b..722c411 100644 --- a/cwl_wes/app.py +++ b/cwl_wes/app.py @@ -25,7 +25,8 @@ def init_app() -> App: service_info.set_service_info( data=current_app.config.foca.custom.service_info.dict() ) - register_task_service(app) + celery_app = foca.create_celery_app() + register_task_service(celery_app) return app diff --git a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py index 25917f7..b9a8479 100644 --- a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py +++ b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py @@ -502,8 +502,8 @@ def __run_workflow( **kwargs ) -> None: """Helper function `run_workflow()`.""" - tes_url = config.custom.tes_server.url - remote_storage_url = config.custom.storage.remote_storage_url + tes_url = config.foca.custom.tes_server.url + remote_storage_url = config.foca.custom.storage.remote_storage_url run_id = document['run_id'] task_id = document['task_id'] tmp_dir = document['internal']['tmp_dir'] @@ -546,7 +546,7 @@ def __run_workflow( # ] # Get timeout duration - timeout_duration = config.custom.endpoint_params.timeout_run_workflow + timeout_duration = config.foca.custom.endpoint_params.timeout_run_workflow # Execute command as background task logger.info( diff --git a/cwl_wes/gunicorn.py b/cwl_wes/gunicorn.py index 007c074..7ce6163 100644 --- a/cwl_wes/gunicorn.py +++ b/cwl_wes/gunicorn.py @@ -3,7 +3,8 @@ from cwl_wes.app import init_app # Source application configuration -app_config = init_app().app.config.foca +app = init_app().app +app_config = app.config.foca # Set Gunicorn number of workers and threads workers = int(os.environ.get('GUNICORN_PROCESSES', '1')) diff --git a/cwl_wes/tasks/register_celery.py b/cwl_wes/tasks/register_celery.py index 58ac0ac..2ba563f 100644 --- a/cwl_wes/tasks/register_celery.py +++ b/cwl_wes/tasks/register_celery.py @@ -1,12 +1,9 @@ """Function to create Celery app instance and register task monitor.""" -from connexion import App +from celery import Celery import logging import os -from foca.factories.celery_app import create_celery_app -from pymongo.collection import Collection - from cwl_wes.tasks.celery_task_monitor import TaskMonitor @@ -14,14 +11,13 @@ logger = logging.getLogger(__name__) -def register_task_service(app: App) -> None: +def register_task_service(celery_app: Celery) -> None: """Instantiates Celery app and registers task monitor.""" # Ensure that code is executed only once when app reloader is used if os.environ.get("WERKZEUG_RUN_MAIN") != 'true': # Start task monitor daemon - foca_config = app.app.config.foca + foca_config = celery_app.conf.foca custom_config = foca_config.custom - celery_app = create_celery_app(app.app) TaskMonitor( celery_app=celery_app, collection=foca_config.db.dbs['cwl-wes-db'].collections['runs'].client, diff --git a/cwl_wes/worker.py b/cwl_wes/worker.py index 21f1a5f..1e58d4d 100644 --- a/cwl_wes/worker.py +++ b/cwl_wes/worker.py @@ -1,8 +1,8 @@ """Entry point for Celery workers.""" +from foca.foca import Foca -from foca.factories.celery_app import create_celery_app - -from cwl_wes.app import init_app - -flask_app = init_app().app -celery = create_celery_app(app=flask_app) \ No newline at end of file +foca = Foca( + config_file="config.yaml", + custom_config_model='cwl_wes.custom_config.CustomConfig', +) +celery = foca.create_celery_app() \ No newline at end of file From c97f9eb6f9570a5839fff676719b8554fbc9cd65 Mon Sep 17 00:00:00 2001 From: kushagra Date: Thu, 10 Nov 2022 15:16:23 +0100 Subject: [PATCH 10/29] update: requirements --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 917c1cf..76297d9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ --e git+https://github.com/ohsu-comp-bio/cwl-tes.git@57a193cabab2444bc9b661f83011837bd5ed571a#egg=cwl-tes +-e git+https://github.com/uniqueg/cwl-tes.git@57a193cabab2444bc9b661f83011837bd5ed571a#egg=cwl-tes drs-cli==0.2.3 -foca==0.10.0 +foca==0.11.0 gunicorn==19.9.0 py-tes==0.4.2 python-dateutil==2.6.1 \ No newline at end of file From e6451415998d5bb0165eb308bb9ad0b3a58359a0 Mon Sep 17 00:00:00 2001 From: kushagra Date: Thu, 10 Nov 2022 16:45:58 +0100 Subject: [PATCH 11/29] fix: requirements.txt --- cwl_wes/tasks/tasks/cancel_run.py | 4 ++-- cwl_wes/tasks/tasks/run_workflow.py | 8 ++++---- cwl_wes/worker.py | 2 +- requirements.txt | 8 +++++--- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/cwl_wes/tasks/tasks/cancel_run.py b/cwl_wes/tasks/tasks/cancel_run.py index 66b9e56..7246865 100644 --- a/cwl_wes/tasks/tasks/cancel_run.py +++ b/cwl_wes/tasks/tasks/cancel_run.py @@ -10,7 +10,7 @@ from flask import current_app from pymongo import collection as Collection -from cwl_wes.worker import celery +from cwl_wes.worker import celery_app import cwl_wes.utils.db_utils as db_utils from foca.database.register_mongodb import _create_mongo_client from cwl_wes.ga4gh.wes.states import States @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) -@celery.task( +@celery_app.task( name='tasks.cancel_run', ignore_result=True, bind=True, diff --git a/cwl_wes/tasks/tasks/run_workflow.py b/cwl_wes/tasks/tasks/run_workflow.py index a564fce..aa3c94f 100644 --- a/cwl_wes/tasks/tasks/run_workflow.py +++ b/cwl_wes/tasks/tasks/run_workflow.py @@ -6,14 +6,14 @@ import subprocess from typing import (Dict, List, Optional, Tuple) -from cwl_wes.worker import celery +from cwl_wes.worker import celery_app # Get logger instance logger = logging.getLogger(__name__) -@celery.task( +@celery_app.task( name='tasks.run_workflow', bind=True, ignore_result=True, @@ -47,7 +47,7 @@ def task__run_workflow( def __process_cwl_logs( - task: celery.Task, + task: celery_app.Task, stream: TextIOWrapper, token: Optional[str] = None, ) -> Tuple[List, List]: @@ -141,7 +141,7 @@ def __extract_tes_task_state_from_cwl_tes_log( def __send_event_tes_task_update( - task: celery.Task, + task: celery_app.Task, tes_id: str, tes_state: Optional[str] = None, token: Optional[str] = None, diff --git a/cwl_wes/worker.py b/cwl_wes/worker.py index 1e58d4d..a5fb293 100644 --- a/cwl_wes/worker.py +++ b/cwl_wes/worker.py @@ -5,4 +5,4 @@ config_file="config.yaml", custom_config_model='cwl_wes.custom_config.CustomConfig', ) -celery = foca.create_celery_app() \ No newline at end of file +celery_app = foca.create_celery_app() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 76297d9..e54a370 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,8 @@ --e git+https://github.com/uniqueg/cwl-tes.git@57a193cabab2444bc9b661f83011837bd5ed571a#egg=cwl-tes -drs-cli==0.2.3 foca==0.11.0 +-e git+https://github.com/ohsu-comp-bio/cwl-tes.git@7b44cb1825a302bb7eccb3f2d91dc233adc0e32f#egg=cwl-tes +# -e git+https://github.com/uniqueg/cwl-tes.git@57a193cabab2444bc9b661f83011837bd5ed571a#egg=cwl-tes +drs-cli==0.2.3 gunicorn==19.9.0 py-tes==0.4.2 -python-dateutil==2.6.1 \ No newline at end of file +python-dateutil==2.6.1 +importlib-metadata==4.13.0 \ No newline at end of file From 574f62afbc0956765f38335f64484d3f22c0fa3f Mon Sep 17 00:00:00 2001 From: kushagra Date: Thu, 10 Nov 2022 19:04:03 +0100 Subject: [PATCH 12/29] fix: requirements --- cwl_wes/app.py | 3 +-- cwl_wes/tasks/register_celery.py | 7 ++++--- requirements.txt | 3 +-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cwl_wes/app.py b/cwl_wes/app.py index 722c411..0fc8fce 100644 --- a/cwl_wes/app.py +++ b/cwl_wes/app.py @@ -25,8 +25,7 @@ def init_app() -> App: service_info.set_service_info( data=current_app.config.foca.custom.service_info.dict() ) - celery_app = foca.create_celery_app() - register_task_service(celery_app) + register_task_service() return app diff --git a/cwl_wes/tasks/register_celery.py b/cwl_wes/tasks/register_celery.py index 2ba563f..e1856e1 100644 --- a/cwl_wes/tasks/register_celery.py +++ b/cwl_wes/tasks/register_celery.py @@ -1,9 +1,10 @@ """Function to create Celery app instance and register task monitor.""" -from celery import Celery +from cwl_wes.worker import celery_app import logging import os +from foca.models.config import Config from cwl_wes.tasks.celery_task_monitor import TaskMonitor @@ -11,12 +12,12 @@ logger = logging.getLogger(__name__) -def register_task_service(celery_app: Celery) -> None: +def register_task_service() -> None: """Instantiates Celery app and registers task monitor.""" # Ensure that code is executed only once when app reloader is used if os.environ.get("WERKZEUG_RUN_MAIN") != 'true': # Start task monitor daemon - foca_config = celery_app.conf.foca + foca_config: Config = celery_app.conf.foca custom_config = foca_config.custom TaskMonitor( celery_app=celery_app, diff --git a/requirements.txt b/requirements.txt index e54a370..a24a143 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ -foca==0.11.0 +foca==0.12.0 -e git+https://github.com/ohsu-comp-bio/cwl-tes.git@7b44cb1825a302bb7eccb3f2d91dc233adc0e32f#egg=cwl-tes -# -e git+https://github.com/uniqueg/cwl-tes.git@57a193cabab2444bc9b661f83011837bd5ed571a#egg=cwl-tes drs-cli==0.2.3 gunicorn==19.9.0 py-tes==0.4.2 From 8b138a0e03cd5a7ff095ae9254fac9e26946bba6 Mon Sep 17 00:00:00 2001 From: kushagra Date: Sun, 1 Jan 2023 00:32:57 +0530 Subject: [PATCH 13/29] Removed task monitor and enabled background data save flow (#251) Co-authored-by: Alex Kanitz --- cwl_wes/app.py | 2 - cwl_wes/config.yaml | 36 +- cwl_wes/custom_config.py | 281 +++++++++- cwl_wes/ga4gh/wes/endpoints/cancel_run.py | 2 +- cwl_wes/ga4gh/wes/endpoints/list_runs.py | 2 +- cwl_wes/ga4gh/wes/endpoints/run_workflow.py | 10 +- cwl_wes/tasks/celery_task_monitor.py | 592 -------------------- cwl_wes/tasks/register_celery.py | 35 -- cwl_wes/tasks/tasks/cancel_run.py | 5 +- cwl_wes/tasks/tasks/cwl_log_processor.py | 297 ++++++++++ cwl_wes/tasks/tasks/run_workflow.py | 140 +---- cwl_wes/tasks/tasks/workflow_run_manager.py | 394 +++++++++++++ 12 files changed, 993 insertions(+), 803 deletions(-) delete mode 100644 cwl_wes/tasks/celery_task_monitor.py delete mode 100644 cwl_wes/tasks/register_celery.py create mode 100644 cwl_wes/tasks/tasks/cwl_log_processor.py create mode 100644 cwl_wes/tasks/tasks/workflow_run_manager.py diff --git a/cwl_wes/app.py b/cwl_wes/app.py index 0fc8fce..e2f4110 100644 --- a/cwl_wes/app.py +++ b/cwl_wes/app.py @@ -6,7 +6,6 @@ from flask import current_app from foca import Foca -from cwl_wes.tasks.register_celery import register_task_service from cwl_wes.ga4gh.wes.service_info import ServiceInfo from cwl_wes.exceptions import NotFound @@ -25,7 +24,6 @@ def init_app() -> App: service_info.set_service_info( data=current_app.config.foca.custom.service_info.dict() ) - register_task_service() return app diff --git a/cwl_wes/config.yaml b/cwl_wes/config.yaml index 95cf9fa..59e038d 100644 --- a/cwl_wes/config.yaml +++ b/cwl_wes/config.yaml @@ -118,13 +118,27 @@ custom: tmp_dir: '/data/tmp' remote_storage_url: 'ftp://ftp-private.ebi.ac.uk/upload/foivos' celery: - monitor: - timeout: 0.1 + timeout: 0.1 message_maxsize: 16777216 - endpoint_params: + controller: default_page_size: 5 timeout_cancel_run: 60 timeout_run_workflow: null + tes_server: + url: 'http://62.217.122.249:31567/' + timeout: 5 + status_query_params: 'FULL' + drs_server: + port: null # use this port for resolving DRS URIs; set to `null` to use default (443) + base_path: null # use this base path for resolving DRS URIs; set to `null` to use default (`ga4gh/drs/v1`) + use_http: False # use `http` for resolving DRS URIs; set to `False` to use default (`https`) + file_types: # extensions of files to scan for DRS URI resolution + - cwl + - yaml + - yml + runs_id: + length: 6 + charset: string.ascii_uppercase + string.digits service_info: contact_info: 'https://github.com/elixir-cloud-aai/cwl-WES' auth_instructions_url: 'https://www.elixir-europe.org/services/compute/aai' @@ -147,19 +161,3 @@ custom: default_value: '5' tags: known_tes_endpoints: 'https://tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|https://tesk.c01.k8s-popup.csc.fi/' - app_version: 0.15.0 - tes_server: - url: 'https://csc-tesk.c03.k8s-popup.csc.fi/' - timeout: 5 - status_query_params: 'FULL' - drs_server: - port: null # use this port for resolving DRS URIs; set to `null` to use default (443) - base_path: null # use this base path for resolving DRS URIs; set to `null` to use default (`ga4gh/drs/v1`) - use_http: False # use `http` for resolving DRS URIs; set to `False` to use default (`https`) - file_types: # extensions of files to scan for DRS URI resolution - - cwl - - yaml - - yml - runs_id: - length: 6 - charset: string.ascii_uppercase + string.digits diff --git a/cwl_wes/custom_config.py b/cwl_wes/custom_config.py index 1358add..d66be5c 100644 --- a/cwl_wes/custom_config.py +++ b/cwl_wes/custom_config.py @@ -3,25 +3,61 @@ from typing import Dict, List, Optional from foca.models.config import FOCABaseConfig + class StorageConfig(FOCABaseConfig): + """Model for task run and storage configuration. + + Args: + tmp_dir: Temporary run directory path + permanent_dir: Permanent working directory path + remote_storage_url: Remote file storage FTP endpoint + + Attributes: + tmp_dir: Temporary run directory path + permanent_dir: Permanent working directory path + remote_storage_url: Remote file storage FTP endpoint + + Raises: + pydantic.ValidationError: The class was instantianted with an illegal + data type. + + Example: + >>> StorageConfig( + ... tmp_dir='/data/tmp', + ... permanent_dir='/data/output', + ... remote_storage_url='ftp://ftp.private/upload' + ... ) + StorageConfig(tmp_dir='/data/tmp', permanent_dir='/data/output', remote_storage_url='ftp://ftp.private/upload') + """ permanent_dir: str = '/data/output' tmp_dir: str = '/data/tmp' remote_storage_url: str = 'ftp://ftp-private.ebi.ac.uk/upload/foivos' -class MonitorConfig(FOCABaseConfig): - timeout: float = 0.1 +class CeleryConfig(FOCABaseConfig): + """Model for celery configurations. + Args: + timeout: Celery task timeout. + message_maxsize: Celery message max size. -class CeleryConfig(FOCABaseConfig): - monitor: MonitorConfig = MonitorConfig() - message_maxsize: int = 16777216 + Attributes: + timeout: Celery task timeout. + message_maxsize: Celery message max size. + Raises: + pydantic.ValidationError: The class was instantianted with an illegal + data type. -class EndpointConfig(FOCABaseConfig): - default_page_size: int = 5 - timeout_cancel_run: int = 60 - timeout_run_workflow: Optional[int] = None + Example: + >>> CeleryConfig( + ... timeout=15, + ... message_maxsize=1024 + ... ) + CeleryConfig(timeout=15, message_maxsize=1024) + """ + timeout: float = 0.1 + message_maxsize: int = 16777216 class WorkflowTypeVersionConfig(FOCABaseConfig): @@ -29,20 +65,48 @@ class WorkflowTypeVersionConfig(FOCABaseConfig): Args: workflow_type_version: List of one or more acceptable versions for the workflow type. + + Attributes: + workflow_type_version: List of one or more acceptable versions for the + workflow type. + + Raises: + pydantic.ValidationError: The class was instantianted with an illegal + data type. + + Example: + >>> WorkflowTypeVersionConfig( + ... workflow_type_version=['v1.0'] + ... ) + WorkflowTypeVersionConfig(workflow_type_version=['v1.0']) """ workflow_type_version: Optional[List[str]] = [] class DefaultWorkflowEngineParameterConfig(FOCABaseConfig): """Model for default workflow engine parameters. + Args: name: Parameter name. type: Parameter type. default_value: Stringified version of default parameter. + Attributes: name: Parameter name. type: Parameter type. default_value: Stringified version of default parameter. + + Raises: + pydantic.ValidationError: The class was instantianted with an illegal + data type. + + Example: + >>> DefaultWorkflowEngineParameterConfig( + ... name='name', + ... type='str', + ... default_value='default' + ... ) + DefaultWorkflowEngineParameterConfig(name='name', type='str', default_value='default') """ name: Optional[str] type: Optional[str] @@ -50,11 +114,87 @@ class DefaultWorkflowEngineParameterConfig(FOCABaseConfig): class TagsConfig(FOCABaseConfig): + """Model for service info tag configuration. + + Args: + known_tes_endpoints: Valid TES endpoints. + + Attributes: + known_tes_endpoints: Valid TES endpoints. + + Raises: + pydantic.ValidationError: The class was instantianted with an illegal + data type. + + Example: + >>> TagsConfig( + ... known_tes_endpoints='https://tes.endpoint', + ... ) + TagsConfig(known_tes_endpoints='https://tes.endpoint') + """ known_tes_endpoints: str = 'https://tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|https://tesk.c01.k8s-popup.csc.fi/' - app_version: str = '0.15.0' class ServiceInfoConfig(FOCABaseConfig): + """Model for service info configurations. + + Args: + contact_info: Email address/webpage URL with contact information. + auth_instructions_url: Web page URL with information about how to get an + authorization token necessary to use a specific endpoint. + supported_filesystem_protocols: Filesystem protocols supported by this + service. + supported_wes_versions: Version(s) of the WES schema supported by this + service. + workflow_type_versions: Map with keys as the workflow format type name and + value is a `WorkflowTypeVersionConfig` object which simply contains an + array of one or more version strings. + workflow_engine_versions: Workflow engine(s) used by this WES service. + default_workflow_engine_parameters: Each workflow engine can present additional + parameters that can be sent to the workflow engine. + tags: A key-value map of arbitrary, extended metadata outside the scope of the above but + useful to report back. + + Attributes: + contact_info: Email address/webpage URL with contact information. + auth_instructions_url: Web page URL with information about how to get an + authorization token necessary to use a specific endpoint. + supported_filesystem_protocols: Filesystem protocols supported by this + service. + supported_wes_versions: Version(s) of the WES schema supported by this + service. + workflow_type_versions: Map with keys as the workflow format type name and + value is a `WorkflowTypeVersionConfig` object which simply contains an + array of one or more version strings. + workflow_engine_versions: Workflow engine(s) used by this WES service. + default_workflow_engine_parameters: Each workflow engine can present additional + parameters that can be sent to the workflow engine. + tags: A key-value map of arbitrary, extended metadata outside the scope of the above but + useful to report back. + + Raises: + pydantic.ValidationError: The class was instantianted with an illegal + data type. + + Example: + >>> ServiceInfoConfig( + ... contact_info='https://contact.url', + ... auth_instructions_url='https://auth.url', + ... supported_filesystem_protocols=['ftp', 'https', 'local'], + ... supported_wes_versions=['1.0.0'], + ... workflow_type_versions={'CWL': WorkflowTypeVersionConfig(workflow_type_version=['v1.0'])}, + ... workflow_engine_versions={}, + ... default_workflow_engine_parameters=[], + ... tags=TagsConfig(known_tes_endpoints='https://tes.endpoint/') + ... ) + ServiceInfoConfig(contact_info='https://github.com/elixir-cloud-aai/cwl-WES', auth_instruc\ + tions_url='https://www.elixir-europe.org/services/compute/aai', supported_filesystem_proto\ + cols=['ftp', 'https', 'local'], supported_wes_versions=['1.0.0'], workflow_type_versions={\ + 'CWL': WorkflowTypeVersionConfig(workflow_type_version=['v1.0'])}, workflow_engine_version\ + s={}, default_workflow_engine_parameters=[], tags=TagsConfig(known_tes_endpoints='https://\ + tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|h\ + ttps://tesk.c01.k8s-popup.csc.fi/')) + """ contact_info: str = 'https://github.com/elixir-cloud-aai/cwl-WES' auth_instructions_url: str = 'https://www.elixir-europe.org/services/compute/aai' supported_filesystem_protocols: List[str] = ['ftp', 'https', 'local'] @@ -70,12 +210,69 @@ class ServiceInfoConfig(FOCABaseConfig): class TesServerConfig(FOCABaseConfig): + """Model for tes server configuration. + + Args: + url: TES Endpoint URL. + timeout: Request time out. + status_query_params: Request query parameters. + + Attributes: + url: TES Endpoint URL. + timeout: Request time out. + status_query_params: Request query parameters. + + Raises: + pydantic.ValidationError: The class was instantianted with an illegal + data type. + + Example: + >>> TesServerConfig( + ... url='https://tes.endpoint', + ... timeout=5, + ... status_query_params='FULL' + ... ) + TesServerConfig(url='https://tes.endpoint', timeout=5, status_query_params='FULL') + """ url: str = 'https://csc-tesk.c03.k8s-popup.csc.fi/' timeout: int = 5 status_query_params: str = 'FULL' class DRSServerConfig(FOCABaseConfig): + """Model for DRS server configuration. + + Args: + port: Port for resolving DRS URIs; + set to `null` to use default (443). + base_path: Base path for resolving DRS URIs; + set to `null` to use default (`ga4gh/drs/v1`). + use_http: Use `http` for resolving DRS URIs; + set to `False` to use default (`https`). + file_types: Extensions of files to scan for DRS URI resolution. + + Attributes: + port: Port for resolving DRS URIs; + set to `null` to use default (443). + base_path: Base path for resolving DRS URIs; + set to `null` to use default (`ga4gh/drs/v1`). + use_http: Use `http` for resolving DRS URIs; + set to `False` to use default (`https`). + file_types: Extensions of files to scan for DRS URI resolution. + + Raises: + pydantic.ValidationError: The class was instantianted with an illegal + data type. + + Example: + >>> DRSServerConfig( + ... port=443, + ... base_path='ga4gh/drs/v1', + ... use_http=False, + ... file_types=['cwl', 'yaml', 'yml'] + ... ) + DRSServerConfig(port=443, base_path='ga4gh/drs/v1', use_http=False, file_types=['cwl', 'yaml', 'yml']) + """ port: Optional[int] = None base_path: Optional[str] = None use_http: bool = False @@ -110,12 +307,66 @@ class IdConfig(FOCABaseConfig): charset: str = string.ascii_uppercase + string.digits +class ControllerConfig(FOCABaseConfig): + """Model for controller configurations. + + Args: + default_page_size: Pagination page size. + timeout_cancel_run: Timeout for `cancel_run` workflow. + timeout_run_workflow: Timeout for `run_workflow` workflow. + tes_server: TES Server config parameters. + drs_server: DRS Server config parameters. + runs_id: Identifier config parameters. + + Attributes: + default_page_size: Pagination page size. + timeout_cancel_run: Timeout for `cancel_run` workflow. + timeout_run_workflow: Timeout for `run_workflow` workflow. + tes_server: TES Server config parameters. + drs_server: DRS Server config parameters. + runs_id: Identifier config parameters. + + Raises: + pydantic.ValidationError: The class was instantianted with an illegal + data type. + + Example: + >>> ControllerConfig( + ... default_page_size=5, + ... timeout_cancel_run=60, + ... timeout_run_workflow=None + ... ) + ControllerConfig(default_page_size=5, timeout_cancel_run=60, timeout_run_workflow=60) + """ + default_page_size: int = 5 + timeout_cancel_run: int = 60 + timeout_run_workflow: Optional[int] = None + tes_server: TesServerConfig = TesServerConfig() + drs_server: DRSServerConfig = DRSServerConfig() + runs_id: IdConfig = IdConfig() + + class CustomConfig(FOCABaseConfig): + """Model for custom configuration parameters. + + Args: + storage: Storage config parameters. + celery: Celery config parameters. + controller: Controller config parameters. + service_info: Service Info config parameters. + + Attributes: + storage: Storage config parameters. + celery: Celery config parameters. + controller: Controller config parameters. + service_info: Service Info config parameters. + + Raises: + pydantic.ValidationError: The class was instantianted with an illegal + data type. + """ storage: StorageConfig = StorageConfig() celery: CeleryConfig = CeleryConfig() - endpoint_params: EndpointConfig = EndpointConfig() + controller: ControllerConfig = ControllerConfig() service_info: ServiceInfoConfig = ServiceInfoConfig() - tes_server: TesServerConfig = TesServerConfig() - drs_server: DRSServerConfig = DRSServerConfig() - runs_id: IdConfig = IdConfig() - \ No newline at end of file + diff --git a/cwl_wes/ga4gh/wes/endpoints/cancel_run.py b/cwl_wes/ga4gh/wes/endpoints/cancel_run.py index 0050c66..6c5e7fa 100644 --- a/cwl_wes/ga4gh/wes/endpoints/cancel_run.py +++ b/cwl_wes/ga4gh/wes/endpoints/cancel_run.py @@ -62,7 +62,7 @@ def cancel_run( if document['api']['state'] in States.CANCELABLE: # Get timeout duration - timeout_duration = foca_config.custom.endpoint_params.timeout_cancel_run + timeout_duration = foca_config.custom.controller.timeout_cancel_run # Execute cancelation task in background task_id = uuid() diff --git a/cwl_wes/ga4gh/wes/endpoints/list_runs.py b/cwl_wes/ga4gh/wes/endpoints/list_runs.py index bd7b2f4..485443a 100644 --- a/cwl_wes/ga4gh/wes/endpoints/list_runs.py +++ b/cwl_wes/ga4gh/wes/endpoints/list_runs.py @@ -24,7 +24,7 @@ def list_runs( if 'page_size' in kwargs: page_size = kwargs['page_size'] else: - page_size = config.foca.custom.endpoint_params.default_page_size + page_size = config.foca.custom.controller.default_page_size # Extract/set page token if 'page_token' in kwargs: diff --git a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py index b9a8479..d7f2842 100644 --- a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py +++ b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py @@ -188,8 +188,8 @@ def __create_run_environment( collection_runs: Collection = config.foca.db.dbs['cwl-wes-db'].collections['runs'].client out_dir = config.foca.custom.storage.permanent_dir tmp_dir = config.foca.custom.storage.tmp_dir - run_id_charset = eval(config.foca.custom.runs_id.charset) - run_id_length = config.foca.custom.runs_id.length + run_id_charset = eval(config.foca.custom.controller.runs_id.charset) + run_id_length = config.foca.custom.controller.runs_id.length # Keep on trying until a unique run id was found and inserted # TODO: If no more possible IDs => inf loop; fix (raise custom error; 500 @@ -256,7 +256,7 @@ def __create_run_environment( break # translate DRS URIs to access URLs - drs_server_conf = current_app.config.foca.custom.drs_server + drs_server_conf = current_app.config.foca.custom.controller.drs_server service_info_conf = current_app.config.foca.custom.service_info file_types: List[str] = drs_server_conf.file_types supported_access_methods: List[str] = service_info_conf.supported_filesystem_protocols @@ -502,7 +502,7 @@ def __run_workflow( **kwargs ) -> None: """Helper function `run_workflow()`.""" - tes_url = config.foca.custom.tes_server.url + tes_url = config.foca.custom.controller.tes_server.url remote_storage_url = config.foca.custom.storage.remote_storage_url run_id = document['run_id'] task_id = document['task_id'] @@ -546,7 +546,7 @@ def __run_workflow( # ] # Get timeout duration - timeout_duration = config.foca.custom.endpoint_params.timeout_run_workflow + timeout_duration = config.foca.custom.controller.timeout_run_workflow # Execute command as background task logger.info( diff --git a/cwl_wes/tasks/celery_task_monitor.py b/cwl_wes/tasks/celery_task_monitor.py deleted file mode 100644 index a47660a..0000000 --- a/cwl_wes/tasks/celery_task_monitor.py +++ /dev/null @@ -1,592 +0,0 @@ -"""Celery task monitor, event handlers and related utility functions.""" - -from ast import literal_eval -from datetime import datetime -import logging -import os -import re -from shlex import quote -from threading import Thread -from time import sleep -from typing import (Dict, List, Optional) - -from celery import Celery -from celery.events import Event -from celery.events.receiver import EventReceiver -from kombu.connection import Connection # noqa: F401 -from pymongo import collection as Collection -import tes - -import cwl_wes.utils.db_utils as db_utils - - -# Get logger instance -logger = logging.getLogger(__name__) - - -# Set string time format -strf: str = '%Y-%m-%d %H:%M:%S.%f' - - -class TaskMonitor(): - """Celery task monitor.""" - - def __init__( - self, - celery_app: Celery, - collection: Collection, - tes_config: Dict[str, str], - timeout: float = 0, - authorization: bool = True, - ) -> None: - """Starts Celery task monitor daemon process.""" - self.celery_app = celery_app - self.collection = collection - self.timeout = timeout - self.authorization = authorization - self.tes_config = tes_config - - self.thread = Thread(target=self.run, args=()) - self.thread.daemon = True - self.thread.start() - - logger.debug('Celery task monitor daemon process started...') - - def run(self) -> None: - """Daemon process for Celery task monitor.""" - while True: - - try: - - with self.celery_app.connection() as \ - connection: # type: Connection - - listener: EventReceiver = self.celery_app.events.Receiver( - connection, - handlers={ - 'task-received': - self.on_task_received, - 'task-started': - self.on_task_started, - 'task-failed': - self.on_task_failed, - 'task-succeeded': - self.on_task_succeeded, - 'task-tes-task-update': - self.on_task_tes_task_update, - } - ) - listener.capture(limit=None, timeout=None, wakeup=True) - - except KeyboardInterrupt as e: - logger.exception( - ( - 'Task monitor interrupted. Execution aborted. ' - 'Original error message: {type}: {msg}' - ).format( - type=type(e).__name__, - msg=e, - ) - ) - raise SystemExit - - except Exception as e: - logger.exception( - ( - 'Unknown error in task monitor occurred. Original ' - 'error message: {type}: {msg}' - ).format( - type=type(e).__name__, - msg=e, - ) - ) - pass - - # Sleep for specified interval - sleep(self.timeout) - - def on_task_received( - self, - event: Event, - ) -> None: - """Event handler for received Celery tasks.""" - if not event['name'] == 'tasks.run_workflow': - return None - # Parse subprocess inputs - try: - kwargs = literal_eval(event['kwargs']) - except Exception as e: - logger.exception( - ( - "Field 'kwargs' in event message malformed. Original " - 'error message: {type}: {msg}' - ).format( - type=type(e).__name__, - msg=e, - ) - ) - pass - - # Build command - if 'command_list' in kwargs: - if self.authorization: - kwargs['command_list'][3] = '' - kwargs['command_list'][5] = '' - command = ' '.join( - [quote(item) for item in kwargs['command_list']] - ) - else: - command = 'N/A' - - # Create dictionary for internal parameters - internal = dict() - internal['task_received'] = datetime.utcfromtimestamp( - event['timestamp'] - ) - internal['process_id_worker'] = event['pid'] - internal['host'] = event['hostname'] - - # Update run document in database - try: - self.update_run_document( - event=event, - state='QUEUED', - internal=internal, - task_received=datetime.utcfromtimestamp( - event['timestamp'] - ).strftime(strf), - command=command, - utc_offset=event['utcoffset'], - max_retries=event['retries'], - expires=event['expires'], - ) - except Exception as e: - logger.exception( - ( - 'Database error. Could not update log information for ' - "task '{task}'. Original error message: {type}: {msg}" - ).format( - task=event['uuid'], - type=type(e).__name__, - msg=e, - ) - ) - - def on_task_started( - self, - event: Event, - ) -> None: - """Event handler for started Celery tasks.""" - if not self.collection.find_one({'task_id': event['uuid']}): - return None - internal = dict() - internal['task_started'] = datetime.utcfromtimestamp( - event['timestamp'] - ) - # Update run document in database - try: - self.update_run_document( - event=event, - state='RUNNING', - internal=internal, - task_started=datetime.utcfromtimestamp( - event['timestamp'] - ).strftime(strf), - ) - except Exception as e: - logger.exception( - ( - 'Database error. Could not update log information for ' - "task '{task}'. Original error message: {type}: {msg}" - ).format( - task=event['uuid'], - type=type(e).__name__, - msg=e, - ) - ) - - def on_task_failed( - self, - event: Event, - ) -> None: - """Event handler for failed (system error) Celery tasks.""" - if not self.collection.find_one({'task_id': event['uuid']}): - return None - # Create dictionary for internal parameters - internal = dict() - internal['task_finished'] = datetime.utcfromtimestamp( - event['timestamp'] - ) - internal['traceback'] = event['traceback'] - - # Update run document in databse - self.update_run_document( - event=event, - state='SYSTEM_ERROR', - internal=internal, - task_finished=datetime.utcfromtimestamp( - event['timestamp'] - ).strftime(strf), - exception=event['exception'], - ) - - def on_task_succeeded( - self, - event: Event, - ) -> None: - """Event handler for successful, failed and canceled Celery - tasks.""" - if not self.collection.find_one({'task_id': event['uuid']}): - return None - # Parse subprocess results - try: - (returncode, log, tes_ids, token) = literal_eval(event['result']) - log_list=log - log = os.linesep.join(log) - except Exception as e: - logger.exception( - ( - "Field 'result' in event message malformed. Original " - 'error message: {type}: {msg}' - ).format( - type=type(e).__name__, - msg=e, - ) - ) - pass - - # Create dictionary for internal parameters - internal = dict() - internal['task_finished'] = datetime.utcfromtimestamp( - event['timestamp'] - ) - - # Set final state to be set - document = self.collection.find_one( - filter={'task_id': event['uuid']}, - projection={ - 'api.state': True, - '_id': False, - } - ) - if document and document['api']['state'] == 'CANCELING': - state = 'CANCELED' - elif returncode: - state = 'EXECUTOR_ERROR' - else: - state = 'COMPLETE' - - # Extract run outputs - #outputs = self.__cwl_tes_outputs_parser(log) - outputs = self.__cwl_tes_outputs_parser_list(log_list) - - # Get task logs - task_logs = self.__get_tes_task_logs( - tes_ids=tes_ids, - token=token, - ) - - # Update run document in database - try: - self.update_run_document( - event=event, - state=state, - internal=internal, - outputs=outputs, - task_logs=task_logs, - task_finished=datetime.utcfromtimestamp( - event['timestamp'] - ).strftime(strf), - return_code=returncode, - stdout=log, - stderr='', - ) - except Exception as e: - logger.exception( - ( - 'Database error. Could not update log information for ' - "task '{task}'. Original error message: {type}: {msg}" - ).format( - task=event['uuid'], - type=type(e).__name__, - msg=e, - ) - ) - pass - - def on_task_tes_task_update( - self, - event: Event, - ) -> None: - """Event handler for TES task state changes.""" - # If TES task is new, add task log to database - if not event['tes_state']: - tes_log = self.__get_tes_task_log( - tes_id=event['tes_id'], - token=event['token'], - ) - try: - db_utils.append_to_tes_task_logs( - collection=self.collection, - task_id=event['uuid'], - tes_log=tes_log, - ) - except Exception as e: - logger.exception( - ( - 'Database error. Could not update log information for ' - "task '{task}'. Original error message: {type}: {msg}" - ).format( - task=event['uuid'], - type=type(e).__name__, - msg=e, - ) - ) - pass - - # Otherwise only update state - else: - try: - db_utils.update_tes_task_state( - collection=self.collection, - task_id=event['uuid'], - tes_id=event['tes_id'], - state=event['tes_state'], - ) - logger.info( - ( - "State of TES task '{tes_id}' of run with task ID " - "'{task_id}' changed to '{state}'." - ).format( - task_id=event['uuid'], - tes_id=event['tes_id'], - state=event['tes_state'], - ) - ) - except Exception as e: - logger.exception( - ( - 'Database error. Could not update log information for ' - "task '{task}'. Original error message: {type}: {msg}" - ).format( - task=event['uuid'], - type=type(e).__name__, - msg=e, - ) - ) - pass - - def update_run_document( - self, - event: Event, - state: Optional[str] = None, - internal: Optional[Dict] = None, - outputs: Optional[Dict] = None, - task_logs: Optional[List[Dict]] = None, - **run_log_params - ): - """Updates state, internal and run log parameters in database - document. - """ - # TODO: Minimize db ops; try to compile entire object & update once - # Update internal parameters - if internal: - document = db_utils.upsert_fields_in_root_object( - collection=self.collection, - task_id=event['uuid'], - root='internal', - **internal, - ) - - # Update outputs - if outputs: - document = db_utils.upsert_fields_in_root_object( - collection=self.collection, - task_id=event['uuid'], - root='api.outputs', - **outputs, - ) - - # Update task logs - if task_logs: - document = db_utils.upsert_fields_in_root_object( - collection=self.collection, - task_id=event['uuid'], - root='api', - task_logs=task_logs, - ) - - # Update run log parameters - if run_log_params: - document = db_utils.upsert_fields_in_root_object( - collection=self.collection, - task_id=event['uuid'], - root='api.run_log', - **run_log_params, - ) - - # Calculate queue, execution and run time - if document and document['internal']: - run_log = document['internal'] - durations = dict() - - if 'task_started' in run_log_params: - if 'task_started' in run_log and 'task_received' in run_log: - pass - durations['time_queue'] = ( - run_log['task_started'] - run_log['task_received'] - ).total_seconds() - - if 'task_finished' in run_log_params: - if 'task_finished' in run_log and 'task_started' in run_log: - pass - durations['time_execution'] = ( - run_log['task_finished'] - run_log['task_started'] - ).total_seconds() - if 'task_finished' in run_log and 'task_received' in run_log: - pass - durations['time_total'] = ( - run_log['task_finished'] - run_log['task_received'] - ).total_seconds() - - if durations: - document = db_utils.upsert_fields_in_root_object( - collection=self.collection, - task_id=event['uuid'], - root='api.run_log', - **durations, - ) - - # Update state - if state: - try: - document = db_utils.update_run_state( - collection=self.collection, - task_id=event['uuid'], - state=state, - ) - except Exception: - raise - - # Log info message - if document: - logger.info( - ( - "State of run '{run_id}' (task id: '{task_id}') changed " - "to '{state}'." - ).format( - run_id=document['run_id'], - task_id=event['uuid'], - state=state, - ) - ) - - return document - - @staticmethod - def __cwl_tes_outputs_parser(log: str) -> Dict: - """Parses outputs from cwl-tes log.""" - # Find outputs object in log string - re_outputs = re.compile( - r'(^\{$\n^ {4}"\S+": [\[\{]$\n(^ {4,}.*$\n)*^ {4}[\]\}]$\n^\}$\n)', - re.MULTILINE - ) - m = re_outputs.search(log) - if m: - return literal_eval(m.group(1)) - else: - return dict() - - @staticmethod - def __cwl_tes_outputs_parser_list(log: List) -> Dict: - """This function parses outputs from the cwl-tes log""" - """The outputs JSON starts at the line before last in the logs""" - """So unless the outputs are empty ({}), parse upward,""" - """until you find the beginning of the JSON containing the outputs""" - - indices=range(len(log)-1,-1,-1) - - start=-1 - end=-1 - for index in indices: - if log[index].rstrip()=='{}': - return dict() - elif log[index].rstrip()=='}': - end=index - break - - # No valid JSON was found and the previous loop - # reached the end of the log - if end==0: - return dict() - - indices=range(end-1,-1,-1) - for index in indices: - if log[index].rstrip()=='{': - start=index - break - - json=os.linesep.join(log[start:end+1]) - - try: - return literal_eval(json) - except ValueError as verr: - logger.exception( - "ValueError when evaluation JSON: '%s'. Original error message: %s" % \ - (json, verr) - ) - return dict() - except SyntaxError as serr: - logger.exception( - "SyntaxError when evaluation JSON: '%s'. Original error message: %s" % \ - (json, serr) - ) - return dict() - - def __get_tes_task_logs( - self, - tes_ids: List = list(), - token: Optional[str] = None, - ) -> List[Dict]: - """Gets multiple task logs from TES instance.""" - task_logs = list() - for tes_id in tes_ids: - task_logs.append( - self.__get_tes_task_log( - tes_id=tes_id, - token=token, - ) - ) - return task_logs - - def __get_tes_task_log( - self, - tes_id: str, - token: Optional[str] = None, - ) -> Dict: - """Gets task log from TES instance.""" - tes_client = tes.HTTPClient( - url=self.tes_config['url'], - timeout=self.tes_config['timeout'], - token=token, - ) - - task_log = {} - - try: - task_log = tes_client.get_task( - task_id=tes_id, - view=self.tes_config['query_params'], - ).as_dict() - except Exception as e: - # TODO: handle more robustly: only 400/Bad Request is okay; - # TODO: other errors (e.g. 500) should be dealt with - logger.warning( - "Could not obtain task log. Setting default. Original error " - f"message: {type(e).__name__}: {e}" - ) - task_log = {} - - logger.debug(f'Task log: {task_log}') - - return task_log diff --git a/cwl_wes/tasks/register_celery.py b/cwl_wes/tasks/register_celery.py deleted file mode 100644 index e1856e1..0000000 --- a/cwl_wes/tasks/register_celery.py +++ /dev/null @@ -1,35 +0,0 @@ -"""Function to create Celery app instance and register task monitor.""" - -from cwl_wes.worker import celery_app -import logging -import os - -from foca.models.config import Config -from cwl_wes.tasks.celery_task_monitor import TaskMonitor - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def register_task_service() -> None: - """Instantiates Celery app and registers task monitor.""" - # Ensure that code is executed only once when app reloader is used - if os.environ.get("WERKZEUG_RUN_MAIN") != 'true': - # Start task monitor daemon - foca_config: Config = celery_app.conf.foca - custom_config = foca_config.custom - TaskMonitor( - celery_app=celery_app, - collection=foca_config.db.dbs['cwl-wes-db'].collections['runs'].client, - tes_config={ - 'url': custom_config.tes_server.url, - 'query_params': custom_config.tes_server.status_query_params, - 'timeout': custom_config.tes_server.timeout - }, - timeout=custom_config.celery.monitor.timeout, - authorization=foca_config.security.auth.required, - ) - logger.info('Celery task monitor registered.') - - return None diff --git a/cwl_wes/tasks/tasks/cancel_run.py b/cwl_wes/tasks/tasks/cancel_run.py index 7246865..3e4313f 100644 --- a/cwl_wes/tasks/tasks/cancel_run.py +++ b/cwl_wes/tasks/tasks/cancel_run.py @@ -52,11 +52,12 @@ def task__cancel_run( try: # Cancel individual TES tasks + tes_server_config = foca_config.custom.controller.tes_server __cancel_tes_tasks( collection=collection, run_id=run_id, - url=foca_config.custom.tes_server.tes_server.url, - timeout=foca_config.custom.tes_server.tes_server.timeout, + url=tes_server_config.url, + timeout=tes_server_config.timeout, token=token, ) except SoftTimeLimitExceeded as e: diff --git a/cwl_wes/tasks/tasks/cwl_log_processor.py b/cwl_wes/tasks/tasks/cwl_log_processor.py new file mode 100644 index 0000000..c26c60f --- /dev/null +++ b/cwl_wes/tasks/tasks/cwl_log_processor.py @@ -0,0 +1,297 @@ +import re +import os +import logging +from _io import TextIOWrapper +from typing import (Dict, List, Optional, Tuple) +from ast import literal_eval + +import tes +from cwl_wes.worker import celery_app +import cwl_wes.utils.db_utils as db_utils + +# Get logger instance +logger = logging.getLogger(__name__) + + +class CWLLogProcessor: + + def __init__(self, tes_config, collection) -> None: + self.tes_config = tes_config + self.collection = collection + + def process_cwl_logs( + self, + task: celery_app.Task, + stream: TextIOWrapper, + token: Optional[str] = None, + ) -> Tuple[List, List]: + """Parses combinend cwl-tes STDOUT/STDERR and sends TES task IDs and state + updates to broker.""" + stream_container: List = list() + tes_states: Dict = dict() + + # Iterate over STDOUT/STDERR stream + for line in iter(stream.readline, ''): + + line = line.rstrip() + + # Replace single quote characters to avoid `literal_eval()` errors + line = line.replace("'", '"') + + # Handle special cases + lines = self.process_tes_log(line) + for line in lines: + stream_container.append(line) + logger.info(f"[{task}] {line}") + continue + + # Detect TES task state changes + (tes_id, tes_state) = self.extract_tes_state(line) + if tes_id: + + # Handle new task + if tes_id not in tes_states: + tes_states[tes_id] = tes_state + self.capture_tes_task_update( + task, + tes_id=tes_id, + token=token, + ) + # Handle state change + elif tes_states[tes_id] != tes_state: + tes_states[tes_id] = tes_state + self.capture_tes_task_update( + task, + tes_id=tes_id, + tes_state=tes_state, + ) + logger.info(line) + continue + + stream_container.append(line) + logger.info(line) + + return (stream_container, list(tes_states.keys())) + + + def process_tes_log(self, line: str) -> List[str]: + """Handles irregularities arising from log parsing.""" + lines: List = list() + + # Handle special case where FTP and cwl-tes logs are on same line + re_ftp_cwl_tes = re.compile( + r'^(\*cmd\* .*)(\[step \w*\] produced output \{)$' + ) + m = re_ftp_cwl_tes.match(line) + if m: + lines.append(m.group(1)) + + return lines + + def extract_tes_state( + self, + line: str, + ) -> Tuple[Optional[str], Optional[str]]: + """Extracts task ID and state from cwl-tes log.""" + task_id: Optional[str] = None + task_state: Optional[str] = None + + # Extract new task ID + re_task_new = re.compile(r"^\[job [\w\-]*\] task id: (\S*)$") + m = re_task_new.match(line) + if m: + task_id = m.group(1) + + # Extract task ID and state + re_task_state_poll = re.compile( + r'^\[job [\w\-]*\] POLLING "(\S*)", result: (\w*)' + ) + m = re_task_state_poll.match(line) + if m: + task_id = m.group(1) + task_state = m.group(2) + + return (task_id, task_state) + + + def capture_tes_task_update( + self, + task: celery_app.Task, + tes_id: str, + tes_state: Optional[str] = None, + token: Optional[str] = None, + ) -> None: + """Event handler for TES task state changes.""" + # If TES task is new, add task log to database + logger.info(f"TES_STATE------------->{tes_state}") + cwl_tes_processor = CWLTesProcessor(tes_config=self.tes_config) + if not tes_state: + tes_log = cwl_tes_processor.__get_tes_task_log( + tes_id=tes_id, + token=token, + ) + logger.info(f"LOG------------->{tes_log}") + try: + db_utils.append_to_tes_task_logs( + collection=self.collection, + task_id=task.task_id, + tes_log=tes_log, + ) + except Exception as e: + logger.exception( + ( + 'Database error. Could not update log information for ' + "task '{task}'. Original error message: {type}: {msg}" + ).format( + task=task.task_id, + type=type(e).__name__, + msg=e, + ) + ) + pass + + # Otherwise only update state + else: + try: + db_utils.update_tes_task_state( + collection=self.collection, + task_id=task.task_id, + tes_id=tes_id, + state=tes_state, + ) + logger.info( + ( + "State of TES task '{tes_id}' of run with task ID " + "'{task_id}' changed to '{state}'." + ).format( + task_id=task.task_id, + tes_id=tes_id, + state=tes_state, + ) + ) + except Exception as e: + logger.exception( + ( + 'Database error. Could not update log information for ' + "task '{task}'. Original error message: {type}: {msg}" + ).format( + task=task.task_id, + type=type(e).__name__, + msg=e, + ) + ) + pass + + +class CWLTesProcessor: + + def __init__(self, tes_config) -> None: + self.tes_config = tes_config + + @staticmethod + def __cwl_tes_outputs_parser(log: str) -> Dict: + """Parses outputs from cwl-tes log.""" + # Find outputs object in log string + re_outputs = re.compile( + r'(^\{$\n^ {4}"\S+": [\[\{]$\n(^ {4,}.*$\n)*^ {4}[\]\}]$\n^\}$\n)', + re.MULTILINE + ) + m = re_outputs.search(log) + if m: + return literal_eval(m.group(1)) + else: + return dict() + + @staticmethod + def __cwl_tes_outputs_parser_list(log: List) -> Dict: + """This function parses outputs from the cwl-tes log""" + """The outputs JSON starts at the line before last in the logs""" + """So unless the outputs are empty ({}), parse upward,""" + """until you find the beginning of the JSON containing the outputs""" + + indices=range(len(log)-1,-1,-1) + + start=-1 + end=-1 + for index in indices: + if log[index].rstrip()=='{}': + return dict() + elif log[index].rstrip()=='}': + end=index + break + + # No valid JSON was found and the previous loop + # reached the end of the log + if end==0: + return dict() + + indices=range(end-1,-1,-1) + for index in indices: + if log[index].rstrip()=='{': + start=index + break + + json=os.linesep.join(log[start:end+1]) + + try: + return literal_eval(json) + except ValueError as verr: + logger.exception( + "ValueError when evaluation JSON: '%s'. Original error message: %s" % \ + (json, verr) + ) + return dict() + except SyntaxError as serr: + logger.exception( + "SyntaxError when evaluation JSON: '%s'. Original error message: %s" % \ + (json, serr) + ) + return dict() + + def __get_tes_task_logs( + self, + tes_ids: List = list(), + token: Optional[str] = None, + ) -> List[Dict]: + """Gets multiple task logs from TES instance.""" + task_logs = list() + for tes_id in tes_ids: + task_logs.append( + self.__get_tes_task_log( + tes_id=tes_id, + token=token, + ) + ) + return task_logs + + def __get_tes_task_log( + self, + tes_id: str, + token: Optional[str] = None, + ) -> Dict: + """Gets task log from TES instance.""" + tes_client = tes.HTTPClient( + url=self.tes_config['url'], + timeout=self.tes_config['timeout'], + token=token, + ) + + task_log = {} + + try: + task_log = tes_client.get_task( + task_id=tes_id, + view=self.tes_config['query_params'], + ).as_dict() + except Exception as e: + # TODO: handle more robustly: only 400/Bad Request is okay; + # TODO: other errors (e.g. 500) should be dealt with + logger.warning( + "Could not obtain task log. Setting default. Original error " + f"message: {type(e).__name__}: {e}" + ) + task_log = {} + + logger.debug(f'Task log: {task_log}') + + return task_log \ No newline at end of file diff --git a/cwl_wes/tasks/tasks/run_workflow.py b/cwl_wes/tasks/tasks/run_workflow.py index aa3c94f..e910ea5 100644 --- a/cwl_wes/tasks/tasks/run_workflow.py +++ b/cwl_wes/tasks/tasks/run_workflow.py @@ -1,12 +1,10 @@ """Celery background task to start workflow run.""" -from _io import TextIOWrapper import logging -import re -import subprocess -from typing import (Dict, List, Optional, Tuple) +from typing import (List, Optional, Tuple) from cwl_wes.worker import celery_app +from cwl_wes.tasks.tasks.workflow_run_manager import WorkflowRunManager # Get logger instance @@ -27,131 +25,11 @@ def task__run_workflow( ) -> Tuple[int, List[str], List[str], Optional[str]]: """Adds workflow run to task queue.""" # Execute task in background - proc = subprocess.Popen( - command_list, - cwd=tmp_dir, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - universal_newlines=True, + workflow_run_manager = WorkflowRunManager( + task=self, + command_list=command_list, + tmp_dir=tmp_dir, + token=token ) - # Parse output in real-time - log, tes_ids = __process_cwl_logs( - self, - stream=proc.stdout, - token=token, - ) - - returncode = proc.wait() - - return (returncode, log, tes_ids, token) - - -def __process_cwl_logs( - task: celery_app.Task, - stream: TextIOWrapper, - token: Optional[str] = None, -) -> Tuple[List, List]: - """Parses combinend cwl-tes STDOUT/STDERR and sends TES task IDs and state - updates to broker.""" - stream_container: List = list() - tes_states: Dict = dict() - - # Iterate over STDOUT/STDERR stream - for line in iter(stream.readline, ''): - - line = line.rstrip() - - # Replace single quote characters to avoid `literal_eval()` errors - line = line.replace("'", '"') - - # Handle special cases - lines = __handle_cwl_tes_log_irregularities(line) - for line in lines: - stream_container.append(line) - logger.info(f"[{task}] {line}") - continue - - # Detect TES task state changes - (tes_id, tes_state) = __extract_tes_task_state_from_cwl_tes_log(line) - if tes_id: - - # Handle new task - if tes_id not in tes_states: - tes_states[tes_id] = tes_state - __send_event_tes_task_update( - task, - tes_id=tes_id, - token=token, - ) - # Handle state change - elif tes_states[tes_id] != tes_state: - tes_states[tes_id] = tes_state - __send_event_tes_task_update( - task, - tes_id=tes_id, - tes_state=tes_state, - ) - logger.info(line) - continue - - stream_container.append(line) - logger.info(line) - - return (stream_container, list(tes_states.keys())) - - -def __handle_cwl_tes_log_irregularities(line: str) -> List[str]: - """Handles irregularities arising from log parsing.""" - lines: List = list() - - # Handle special case where FTP and cwl-tes logs are on same line - re_ftp_cwl_tes = re.compile( - r'^(\*cmd\* .*)(\[step \w*\] produced output \{)$' - ) - m = re_ftp_cwl_tes.match(line) - if m: - lines.append(m.group(1)) - - return lines - - -def __extract_tes_task_state_from_cwl_tes_log( - line: str, -) -> Tuple[Optional[str], Optional[str]]: - """Extracts task ID and state from cwl-tes log.""" - task_id: Optional[str] = None - task_state: Optional[str] = None - - # Extract new task ID - re_task_new = re.compile(r"^\[job [\w\-]*\] task id: (\S*)$") - m = re_task_new.match(line) - if m: - task_id = m.group(1) - - # Extract task ID and state - re_task_state_poll = re.compile( - r'^\[job [\w\-]*\] POLLING "(\S*)", result: (\w*)' - ) - m = re_task_state_poll.match(line) - if m: - task_id = m.group(1) - task_state = m.group(2) - - return (task_id, task_state) - - -def __send_event_tes_task_update( - task: celery_app.Task, - tes_id: str, - tes_state: Optional[str] = None, - token: Optional[str] = None, -) -> None: - """Sends custom event to inform about TES task state change.""" - task.send_event( - 'task-tes-task-update', - tes_id=tes_id, - tes_state=tes_state, - token=token, - ) - - return None + return_val = workflow_run_manager.run_workflow() + return return_val diff --git a/cwl_wes/tasks/tasks/workflow_run_manager.py b/cwl_wes/tasks/tasks/workflow_run_manager.py new file mode 100644 index 0000000..68ec64a --- /dev/null +++ b/cwl_wes/tasks/tasks/workflow_run_manager.py @@ -0,0 +1,394 @@ +import os +import logging +import subprocess +from typing import (Dict, List, Optional) +import time +from datetime import datetime + +from foca.models.config import Config + +from cwl_wes.worker import celery_app +from cwl_wes.tasks.tasks.cwl_log_processor import CWLLogProcessor, CWLTesProcessor +import cwl_wes.utils.db_utils as db_utils + +# Get logger instance +logger = logging.getLogger(__name__) + + +class WorkflowRunManager: + """Workflow run manager. + """ + + def __init__( + self, + command_list: List, + task: celery_app.Task, + tmp_dir: str, + token: Optional[str] = None + ) -> None: + """Initiate workflow run manager instance. + + Args: + task: Celery task instance for initiating workflow run. + task_id: Unique identifier for workflow run task. + command_list: List of commands to be executed as a part of workflow run. + tmp_dir: Current working directory to be passed for child process execution + context. + token: JSON Web Token (JWT). + foca_config: :py:class:`foca.models.config.Config` instance + describing configurations registered with `celery_app`. + custom_config: :py:class:`cwl_wes.custom_config.CustomConfig` instance + describing custom configuration model for cwl-WES specific + configurations. + collection: Collection client for saving task run progress. + tes_config: TES (Task Execution Service) endpoint configurations. + authorization: Boolean to define the security auth configuration for + the app. + string_format: String time format for task timestamps. + + Attributes: + task: Celery task instance for initiating workflow run. + task_id: Unique identifier for workflow run task. + command_list: List of commands to be executed as a part of workflow run. + tmp_dir: Current working directory to be passed for child process execution + context. + token: JSON Web Token (JWT). + foca_config: :py:class:`foca.models.config.Config` instance + describing configurations registered with `celery_app`. + custom_config: :py:class:`cwl_wes.custom_config.CustomConfig` instance + describing custom configuration model for cwl-WES specific + configurations. + collection: Collection client for saving task run progress. + tes_config: TES (Task Execution Service) endpoint configurations. + authorization: Boolean to define the security auth configuration for + the app. + string_format: String time format for task timestamps. + """ + self.task = task + self.task_id = self.task.request.id + self.command_list = command_list + self.tmp_dir = tmp_dir + self.token = token + self.foca_config: Config = celery_app.conf.foca + self.controller_config = self.foca_config.custom.controller + self.collection = self.foca_config.db.dbs['cwl-wes-db'].collections['runs'].client + self.tes_config= { + 'url': self.controller_config.tes_server.url, + 'query_params': self.controller_config.tes_server.status_query_params, + 'timeout': self.controller_config.tes_server.timeout + } + self.authorization = self.foca_config.security.auth.required + self.string_format: str = '%Y-%m-%d %H:%M:%S.%f' + + def trigger_task_start_events(self) -> None: + """Method to trigger task start events. + """ + if not self.collection.find_one({'task_id': self.task.request.id}): + return None + internal = dict() + current_ts = time.time() + internal['task_started'] = datetime.utcfromtimestamp( + current_ts + ) + # Update run document in database + try: + self.update_run_document( + state='RUNNING', + internal=internal, + task_started=datetime.utcfromtimestamp( + current_ts + ).strftime(self.string_format), + ) + except Exception as e: + logger.exception( + ( + 'Database error. Could not update log information for ' + "task '{task}'. Original error message: {type}: {msg}" + ).format( + task=self.task_id, + type=type(e).__name__, + msg=e, + ) + ) + + def trigger_task_failure_events(self, task_end_ts): + """Method to trigger task failure events. + """ + if not self.collection.find_one({'task_id': self.task_id}): + return None + + # Create dictionary for internal parameters + internal = dict() + internal['task_finished'] = datetime.utcfromtimestamp( + task_end_ts + ) + task_meta_data = celery_app.AsyncResult(id=self.task_id) + internal['traceback'] = task_meta_data.traceback + + # Update run document in databse + self.update_run_document( + state='SYSTEM_ERROR', + internal=internal, + task_finished=datetime.utcfromtimestamp( + task_end_ts + ).strftime(self.string_format), + exception=task_meta_data.result, + ) + + def trigger_task_success_events( + self, + returncode: int, + log: str, + tes_ids: List[str], + token: str, + task_end_ts: float + ) -> None: + """Method to trigger task success events. + + Args: + returncode: Task completion status code. + log: Task run log. + tes_ids: TES task identifiers. + token: TES token. + task_end_ts: Task end timestamp. + """ + if not self.collection.find_one({'task_id': self.task_id}): + return None + + # Parse subprocess results + try: + log_list = log + log = os.linesep.join(log) + except Exception as e: + logger.exception( + ( + "Field 'result' in event message malformed. Original " + 'error message: {type}: {msg}' + ).format( + type=type(e).__name__, + msg=e, + ) + ) + pass + + # Create dictionary for internal parameters + internal = dict() + internal['task_finished'] = datetime.utcfromtimestamp( + task_end_ts + ) + + # Set final state to be set + document = self.collection.find_one( + filter={'task_id': self.task_id}, + projection={ + 'api.state': True, + '_id': False, + } + ) + if document and document['api']['state'] == 'CANCELING': + state = 'CANCELED' + elif returncode: + state = 'EXECUTOR_ERROR' + else: + state = 'COMPLETE' + + # Extract run outputs + cwl_tes_processor = CWLTesProcessor(tes_config=self.tes_config) + outputs = cwl_tes_processor.__cwl_tes_outputs_parser_list(log=log_list) + + # Get task logs + task_logs = cwl_tes_processor.__get_tes_task_logs( + tes_ids=tes_ids, + token=token, + ) + + # Update run document in database + try: + self.update_run_document( + state=state, + internal=internal, + outputs=outputs, + task_logs=task_logs, + task_finished=datetime.utcfromtimestamp( + task_end_ts + ).strftime(self.string_format), + return_code=returncode, + stdout=log, + stderr='', + ) + except Exception as e: + logger.exception( + ( + 'Database error. Could not update log information for ' + "task '{task}'. Original error message: {type}: {msg}" + ).format( + task=self.task_id, + type=type(e).__name__, + msg=e, + ) + ) + pass + + def trigger_task_end_events( + self, + returncode: int, + log: str, + tes_ids: List[str], + token: str + ) -> None: + """Method to trigger task completion events. + + Args: + returncode: Task completion status code. + log: Task run log. + tes_ids: TES task identifiers. + token: TES token. + task_end_ts: Task end timestamp. + """ + task_end_ts = time.time() + if returncode == 0: + self.trigger_task_success_events( + log=log, tes_ids=tes_ids, token=token, + task_end_ts=task_end_ts, returncode=returncode + ) + else: + self.trigger_task_failure_events(task_end_ts=task_end_ts) + + def update_run_document( + self, + state: Optional[str] = None, + internal: Optional[Dict] = None, + outputs: Optional[Dict] = None, + task_logs: Optional[List[Dict]] = None, + **run_log_params + ): + """Updates state, internal and run log parameters in database + document. + + Args: + state: Task state. + internal: Task specific internal parameters. + outputs: Task specific output parameters. + task_logs: Task run logs. + + """ + # TODO: Minimize db ops; try to compile entire object & update once + # Update internal parameters + if internal: + document = db_utils.upsert_fields_in_root_object( + collection=self.collection, + task_id=self.task_id, + root='internal', + **internal, + ) + + # Update outputs + if outputs: + document = db_utils.upsert_fields_in_root_object( + collection=self.collection, + task_id=self.task_id, + root='api.outputs', + **outputs, + ) + + # Update task logs + if task_logs: + document = db_utils.upsert_fields_in_root_object( + collection=self.collection, + task_id=self.task_id, + root='api', + task_logs=task_logs, + ) + + # Update run log parameters + if run_log_params: + document = db_utils.upsert_fields_in_root_object( + collection=self.collection, + task_id=self.task_id, + root='api.run_log', + **run_log_params, + ) + + # Calculate queue, execution and run time + if document and document['internal']: + run_log = document['internal'] + durations = dict() + + if 'task_started' in run_log_params: + if 'task_started' in run_log and 'task_received' in run_log: + pass + durations['time_queue'] = ( + run_log['task_started'] - run_log['task_received'] + ).total_seconds() + + if 'task_finished' in run_log_params: + if 'task_finished' in run_log and 'task_started' in run_log: + pass + durations['time_execution'] = ( + run_log['task_finished'] - run_log['task_started'] + ).total_seconds() + if 'task_finished' in run_log and 'task_received' in run_log: + pass + durations['time_total'] = ( + run_log['task_finished'] - run_log['task_received'] + ).total_seconds() + + if durations: + document = db_utils.upsert_fields_in_root_object( + collection=self.collection, + task_id=self.task_id, + root='api.run_log', + **durations, + ) + + # Update state + if state: + try: + document = db_utils.update_run_state( + collection=self.collection, + task_id=self.task_id, + state=state, + ) + except Exception: + raise + + # Log info message + if document: + logger.info( + ( + "State of run '{run_id}' (task id: '{task_id}') changed " + "to '{state}'." + ).format( + run_id=document['run_id'], + task_id=self.task_id, + state=state, + ) + ) + + return document + + + def run_workflow(self): + """Method to initiate workflow run. + """ + self.trigger_task_start_events() + proc = subprocess.Popen( + self.command_list, + cwd=self.tmp_dir, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, + ) + # Parse output in real-time + cwl_log_processor = CWLLogProcessor(tes_config=self.tes_config, collection=self.collection) + log, tes_ids = cwl_log_processor.process_cwl_logs( + self.task, + stream=proc.stdout, + token=self.token, + ) + returncode = proc.wait() + self.trigger_task_end_events( + token=self.token, + returncode=returncode, + log=log, tes_ids=tes_ids + ) \ No newline at end of file From 4f35e48f7621ad4fc29a94f06012e0b28fafc59f Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Sat, 31 Dec 2022 22:11:13 +0100 Subject: [PATCH 14/29] some cleaning up --- cwl_wes/__init__.py | 1 + cwl_wes/app.py | 6 +- cwl_wes/config.yaml | 4 +- cwl_wes/custom_config.py | 46 +-- cwl_wes/exceptions.py | 33 +-- cwl_wes/ga4gh/__init__.py | 1 + cwl_wes/ga4gh/wes/__init__.py | 1 + cwl_wes/ga4gh/wes/endpoints/__init__.py | 1 + cwl_wes/ga4gh/wes/endpoints/cancel_run.py | 44 ++- cwl_wes/ga4gh/wes/endpoints/get_run_log.py | 32 +- cwl_wes/ga4gh/wes/endpoints/get_run_status.py | 38 +-- .../ga4gh/wes/endpoints/get_service_info.py | 43 +-- cwl_wes/ga4gh/wes/endpoints/list_runs.py | 76 +++-- cwl_wes/ga4gh/wes/endpoints/run_workflow.py | 279 ++++++++---------- cwl_wes/ga4gh/wes/endpoints/utils/__init__.py | 0 cwl_wes/ga4gh/wes/server.py | 27 +- cwl_wes/ga4gh/wes/service_info.py | 24 +- cwl_wes/ga4gh/wes/states.py | 22 +- cwl_wes/gunicorn.py | 24 +- cwl_wes/tasks/__init__.py | 1 + cwl_wes/tasks/{tasks => }/cancel_run.py | 35 ++- .../tasks/{tasks => }/cwl_log_processor.py | 97 +++--- cwl_wes/tasks/{tasks => }/run_workflow.py | 13 +- cwl_wes/tasks/tasks/__init__.py | 0 cwl_wes/tasks/utils.py | 64 ---- .../tasks/{tasks => }/workflow_run_manager.py | 182 ++++++------ cwl_wes/utils/__init__.py | 1 + cwl_wes/utils/db.py | 143 +++++++++ cwl_wes/utils/db_utils.py | 73 ----- .../{ga4gh/wes/endpoints => }/utils/drs.py | 29 +- cwl_wes/version.py | 3 + cwl_wes/worker.py | 5 +- cwl_wes/wsgi.py | 2 +- deployment/values.yaml | 5 +- requirements.txt | 4 +- requirements_dev.txt | 5 + setup.py | 52 ++-- 37 files changed, 697 insertions(+), 719 deletions(-) delete mode 100644 cwl_wes/ga4gh/wes/endpoints/utils/__init__.py rename cwl_wes/tasks/{tasks => }/cancel_run.py (85%) rename cwl_wes/tasks/{tasks => }/cwl_log_processor.py (84%) rename cwl_wes/tasks/{tasks => }/run_workflow.py (68%) delete mode 100644 cwl_wes/tasks/tasks/__init__.py delete mode 100644 cwl_wes/tasks/utils.py rename cwl_wes/tasks/{tasks => }/workflow_run_manager.py (72%) create mode 100644 cwl_wes/utils/db.py delete mode 100644 cwl_wes/utils/db_utils.py rename cwl_wes/{ga4gh/wes/endpoints => }/utils/drs.py (93%) create mode 100644 cwl_wes/version.py create mode 100644 requirements_dev.txt diff --git a/cwl_wes/__init__.py b/cwl_wes/__init__.py index e69de29..a1e7cda 100644 --- a/cwl_wes/__init__.py +++ b/cwl_wes/__init__.py @@ -0,0 +1 @@ +"""cwl-WES package.""" diff --git a/cwl_wes/app.py b/cwl_wes/app.py index e2f4110..e31bfee 100644 --- a/cwl_wes/app.py +++ b/cwl_wes/app.py @@ -1,7 +1,5 @@ """cwl-WES application entry point.""" -from pathlib import Path - from connexion import App from flask import current_app from foca import Foca @@ -13,7 +11,7 @@ def init_app() -> App: foca = Foca( config_file="config.yaml", - custom_config_model='cwl_wes.custom_config.CustomConfig', + custom_config_model="cwl_wes.custom_config.CustomConfig", ) app = foca.create_app() with app.app.app_context(): @@ -31,6 +29,6 @@ def run_app(app: App) -> None: app.run(port=app.port) -if __name__ == '__main__': +if __name__ == "__main__": app = init_app() run_app(app) diff --git a/cwl_wes/config.yaml b/cwl_wes/config.yaml index 59e038d..d42dc57 100644 --- a/cwl_wes/config.yaml +++ b/cwl_wes/config.yaml @@ -100,8 +100,8 @@ jobs: port: 5672 backend: 'rpc://' include: - - cwl_wes.tasks.tasks.run_workflow - - cwl_wes.tasks.tasks.cancel_run + - cwl_wes.tasks.run_workflow + - cwl_wes.tasks.cancel_run # Exception configuration # Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.ExceptionConfig diff --git a/cwl_wes/custom_config.py b/cwl_wes/custom_config.py index d66be5c..021b5d6 100644 --- a/cwl_wes/custom_config.py +++ b/cwl_wes/custom_config.py @@ -29,9 +29,10 @@ class StorageConfig(FOCABaseConfig): ... ) StorageConfig(tmp_dir='/data/tmp', permanent_dir='/data/output', remote_storage_url='ftp://ftp.private/upload') """ - permanent_dir: str = '/data/output' - tmp_dir: str = '/data/tmp' - remote_storage_url: str = 'ftp://ftp-private.ebi.ac.uk/upload/foivos' + + permanent_dir: str = "/data/output" + tmp_dir: str = "/data/tmp" + remote_storage_url: str = "ftp://ftp-private.ebi.ac.uk/upload/foivos" class CeleryConfig(FOCABaseConfig): @@ -56,6 +57,7 @@ class CeleryConfig(FOCABaseConfig): ... ) CeleryConfig(timeout=15, message_maxsize=1024) """ + timeout: float = 0.1 message_maxsize: int = 16777216 @@ -65,10 +67,10 @@ class WorkflowTypeVersionConfig(FOCABaseConfig): Args: workflow_type_version: List of one or more acceptable versions for the workflow type. - + Attributes: workflow_type_version: List of one or more acceptable versions for the - workflow type. + workflow type. Raises: pydantic.ValidationError: The class was instantianted with an illegal @@ -80,6 +82,7 @@ class WorkflowTypeVersionConfig(FOCABaseConfig): ... ) WorkflowTypeVersionConfig(workflow_type_version=['v1.0']) """ + workflow_type_version: Optional[List[str]] = [] @@ -108,6 +111,7 @@ class DefaultWorkflowEngineParameterConfig(FOCABaseConfig): ... ) DefaultWorkflowEngineParameterConfig(name='name', type='str', default_value='default') """ + name: Optional[str] type: Optional[str] default_value: Optional[str] @@ -132,7 +136,8 @@ class TagsConfig(FOCABaseConfig): ... ) TagsConfig(known_tes_endpoints='https://tes.endpoint') """ - known_tes_endpoints: str = 'https://tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|https://tesk.c01.k8s-popup.csc.fi/' + + known_tes_endpoints: str = "https://tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|https://tesk.c01.k8s-popup.csc.fi/" class ServiceInfoConfig(FOCABaseConfig): @@ -153,7 +158,7 @@ class ServiceInfoConfig(FOCABaseConfig): default_workflow_engine_parameters: Each workflow engine can present additional parameters that can be sent to the workflow engine. tags: A key-value map of arbitrary, extended metadata outside the scope of the above but - useful to report back. + useful to report back. Attributes: contact_info: Email address/webpage URL with contact information. @@ -170,7 +175,7 @@ class ServiceInfoConfig(FOCABaseConfig): default_workflow_engine_parameters: Each workflow engine can present additional parameters that can be sent to the workflow engine. tags: A key-value map of arbitrary, extended metadata outside the scope of the above but - useful to report back. + useful to report back. Raises: pydantic.ValidationError: The class was instantianted with an illegal @@ -195,12 +200,15 @@ class ServiceInfoConfig(FOCABaseConfig): tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|h\ ttps://tesk.c01.k8s-popup.csc.fi/')) """ - contact_info: str = 'https://github.com/elixir-cloud-aai/cwl-WES' - auth_instructions_url: str = 'https://www.elixir-europe.org/services/compute/aai' - supported_filesystem_protocols: List[str] = ['ftp', 'https', 'local'] - supported_wes_versions: List[str] = ['1.0.0'] + + contact_info: str = "https://github.com/elixir-cloud-aai/cwl-WES" + auth_instructions_url: str = ( + "https://www.elixir-europe.org/services/compute/aai" + ) + supported_filesystem_protocols: List[str] = ["ftp", "https", "local"] + supported_wes_versions: List[str] = ["1.0.0"] workflow_type_versions: Dict[str, WorkflowTypeVersionConfig] = { - 'CWL': WorkflowTypeVersionConfig(workflow_type_version=['v1.0']), + "CWL": WorkflowTypeVersionConfig(workflow_type_version=["v1.0"]), } workflow_engine_versions: Dict[str, str] = {} default_workflow_engine_parameters: List[ @@ -234,9 +242,10 @@ class TesServerConfig(FOCABaseConfig): ... ) TesServerConfig(url='https://tes.endpoint', timeout=5, status_query_params='FULL') """ - url: str = 'https://csc-tesk.c03.k8s-popup.csc.fi/' + + url: str = "https://csc-tesk.c03.k8s-popup.csc.fi/" timeout: int = 5 - status_query_params: str = 'FULL' + status_query_params: str = "FULL" class DRSServerConfig(FOCABaseConfig): @@ -273,10 +282,11 @@ class DRSServerConfig(FOCABaseConfig): ... ) DRSServerConfig(port=443, base_path='ga4gh/drs/v1', use_http=False, file_types=['cwl', 'yaml', 'yml']) """ + port: Optional[int] = None base_path: Optional[str] = None use_http: bool = False - file_types: List[str] = ['cwl', 'yaml', 'yml'] + file_types: List[str] = ["cwl", "yaml", "yml"] class IdConfig(FOCABaseConfig): @@ -303,6 +313,7 @@ class IdConfig(FOCABaseConfig): ... ) IdConfig(charset='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', length=6) """ + length: int = 6 charset: str = string.ascii_uppercase + string.digits @@ -338,6 +349,7 @@ class ControllerConfig(FOCABaseConfig): ... ) ControllerConfig(default_page_size=5, timeout_cancel_run=60, timeout_run_workflow=60) """ + default_page_size: int = 5 timeout_cancel_run: int = 60 timeout_run_workflow: Optional[int] = None @@ -365,8 +377,8 @@ class CustomConfig(FOCABaseConfig): pydantic.ValidationError: The class was instantianted with an illegal data type. """ + storage: StorageConfig = StorageConfig() celery: CeleryConfig = CeleryConfig() controller: ControllerConfig = ControllerConfig() service_info: ServiceInfoConfig = ServiceInfoConfig() - diff --git a/cwl_wes/exceptions.py b/cwl_wes/exceptions.py index 0dd724b..ea6542b 100644 --- a/cwl_wes/exceptions.py +++ b/cwl_wes/exceptions.py @@ -3,60 +3,57 @@ ExtraParameterProblem, Forbidden, Unauthorized, - ProblemException + ProblemException, ) from pydantic import ValidationError -from werkzeug.exceptions import ( - BadRequest, - InternalServerError, - NotFound -) +from werkzeug.exceptions import BadRequest, InternalServerError, NotFound class WorkflowNotFound(ProblemException, NotFound): """WorkflowNotFound(404) error compatible with Connexion.""" + pass exceptions = { Exception: { "message": "An unexpected error occurred.", - "code": '500', + "code": "500", }, BadRequest: { "message": "The request is malformed.", - "code": '400', + "code": "400", }, BadRequestProblem: { "message": "The request is malformed.", - "code": '400', + "code": "400", }, ExtraParameterProblem: { "message": "The request is malformed.", - "code": '400', + "code": "400", }, ValidationError: { "message": "The request is malformed.", - "code": '400', + "code": "400", }, Unauthorized: { "message": " The request is unauthorized.", - "code": '401', + "code": "401", }, Forbidden: { "message": "The requester is not authorized to perform this action.", - "code": '403', + "code": "403", }, NotFound: { "message": "The requested resource wasn't found.", - "code": '404', + "code": "404", }, InternalServerError: { "message": "An unexpected error occurred.", - "code": '500', + "code": "500", }, WorkflowNotFound: { - "message": "The requested workflow run wasn\'t found.", - "code": '404', + "message": "The requested workflow run wasn't found.", + "code": "404", }, -} \ No newline at end of file +} diff --git a/cwl_wes/ga4gh/__init__.py b/cwl_wes/ga4gh/__init__.py index e69de29..cb92431 100644 --- a/cwl_wes/ga4gh/__init__.py +++ b/cwl_wes/ga4gh/__init__.py @@ -0,0 +1 @@ +"""Controllers superpackage.""" diff --git a/cwl_wes/ga4gh/wes/__init__.py b/cwl_wes/ga4gh/wes/__init__.py index e69de29..dd66766 100644 --- a/cwl_wes/ga4gh/wes/__init__.py +++ b/cwl_wes/ga4gh/wes/__init__.py @@ -0,0 +1 @@ +"""cwl-WES controllers package.""" diff --git a/cwl_wes/ga4gh/wes/endpoints/__init__.py b/cwl_wes/ga4gh/wes/endpoints/__init__.py index e69de29..934f923 100644 --- a/cwl_wes/ga4gh/wes/endpoints/__init__.py +++ b/cwl_wes/ga4gh/wes/endpoints/__init__.py @@ -0,0 +1 @@ +"""cwl-WES controllers subpackage.""" diff --git a/cwl_wes/ga4gh/wes/endpoints/cancel_run.py b/cwl_wes/ga4gh/wes/endpoints/cancel_run.py index 6c5e7fa..1e564b8 100644 --- a/cwl_wes/ga4gh/wes/endpoints/cancel_run.py +++ b/cwl_wes/ga4gh/wes/endpoints/cancel_run.py @@ -3,15 +3,14 @@ import logging from typing import Dict -from celery import (Celery, uuid) +from celery import Celery, uuid from connexion.exceptions import Forbidden - from flask import Config from pymongo.collection import Collection from cwl_wes.exceptions import WorkflowNotFound from cwl_wes.ga4gh.wes.states import States -from cwl_wes.tasks.tasks.cancel_run import task__cancel_run +from cwl_wes.tasks.cancel_run import task__cancel_run # Get logger instance @@ -20,23 +19,21 @@ # Utility function for endpoint POST /runs//delete def cancel_run( - config: Config, - celery_app: Celery, - run_id: str, - *args, - **kwargs + config: Config, celery_app: Celery, run_id: str, *args, **kwargs ) -> Dict: """Cancels running workflow.""" foca_config = config.foca - collection_runs: Collection = foca_config.db.dbs['cwl-wes-db'].collections['runs'].client + collection_runs: Collection = ( + foca_config.db.dbs["cwl-wes-db"].collections["runs"].client + ) document = collection_runs.find_one( - filter={'run_id': run_id}, + filter={"run_id": run_id}, projection={ - 'user_id': True, - 'task_id': True, - 'api.state': True, - '_id': False, - } + "user_id": True, + "task_id": True, + "api.state": True, + "_id": False, + }, ) # Raise error if workflow run was not found @@ -46,20 +43,20 @@ def cancel_run( # Raise error trying to access workflow run that is not owned by user # Only if authorization enabled - if 'user_id' in kwargs and document['user_id'] != kwargs['user_id']: + if "user_id" in kwargs and document["user_id"] != kwargs["user_id"]: logger.error( ( "User '{user_id}' is not allowed to access workflow run " "'{run_id}'." ).format( - user_id=kwargs['user_id'], + user_id=kwargs["user_id"], run_id=run_id, ) ) raise Forbidden # Cancel unfinished workflow run in background - if document['api']['state'] in States.CANCELABLE: + if document["api"]["state"] in States.CANCELABLE: # Get timeout duration timeout_duration = foca_config.custom.controller.timeout_cancel_run @@ -68,8 +65,7 @@ def cancel_run( task_id = uuid() logger.info( ( - "Canceling run '{run_id}' as background task " - "'{task_id}'..." + "Canceling run '{run_id}' as background task " "'{task_id}'..." ).format( run_id=run_id, task_id=task_id, @@ -78,13 +74,13 @@ def cancel_run( task__cancel_run.apply_async( None, { - 'run_id': run_id, - 'task_id': document['task_id'], - 'token': kwargs.get('jwt'), + "run_id": run_id, + "task_id": document["task_id"], + "token": kwargs.get("jwt"), }, task_id=task_id, soft_time_limit=timeout_duration, ) - response = {'run_id': run_id} + response = {"run_id": run_id} return response diff --git a/cwl_wes/ga4gh/wes/endpoints/get_run_log.py b/cwl_wes/ga4gh/wes/endpoints/get_run_log.py index 07d15ab..2131522 100644 --- a/cwl_wes/ga4gh/wes/endpoints/get_run_log.py +++ b/cwl_wes/ga4gh/wes/endpoints/get_run_log.py @@ -1,53 +1,49 @@ """Utility function for GET /runs/{run_id} endpoint.""" -from connexion.exceptions import Forbidden import logging - from typing import Dict -from cwl_wes.exceptions import WorkflowNotFound +from connexion.exceptions import Forbidden from flask import Config from pymongo.collection import Collection +from cwl_wes.exceptions import WorkflowNotFound # Get logger instance logger = logging.getLogger(__name__) # Utility function for endpoint GET /runs/ -def get_run_log( - config: Config, - run_id: str, - *args, - **kwargs -) -> Dict: +def get_run_log(config: Config, run_id: str, *args, **kwargs) -> Dict: """Gets detailed log information for specific run.""" - collection_runs: Collection = config.foca.db.dbs['cwl-wes-db'].collections['runs'].client + collection_runs: Collection = ( + config.foca.db.dbs["cwl-wes-db"].collections["runs"].client + ) document = collection_runs.find_one( - filter={'run_id': run_id}, + filter={"run_id": run_id}, projection={ - 'user_id': True, - 'api': True, - '_id': False, - } + "user_id": True, + "api": True, + "_id": False, + }, ) # Raise error if workflow run was not found or has no task ID if document: - run_log = document['api'] + run_log = document["api"] else: logger.error("Run '{run_id}' not found.".format(run_id=run_id)) raise WorkflowNotFound # Raise error trying to access workflow run that is not owned by user # Only if authorization enabled - if 'user_id' in kwargs and document['user_id'] != kwargs['user_id']: + if "user_id" in kwargs and document["user_id"] != kwargs["user_id"]: logger.error( ( "User '{user_id}' is not allowed to access workflow run " "'{run_id}'." ).format( - user_id=kwargs['user_id'], + user_id=kwargs["user_id"], run_id=run_id, ) ) diff --git a/cwl_wes/ga4gh/wes/endpoints/get_run_status.py b/cwl_wes/ga4gh/wes/endpoints/get_run_status.py index 516c235..256a596 100644 --- a/cwl_wes/ga4gh/wes/endpoints/get_run_status.py +++ b/cwl_wes/ga4gh/wes/endpoints/get_run_status.py @@ -1,60 +1,54 @@ """Utility function for GET /runs/{run_id}/status endpoint.""" -from connexion.exceptions import Forbidden import logging - from typing import Dict +from connexion.exceptions import Forbidden from flask import Config -from cwl_wes.exceptions import WorkflowNotFound from pymongo.collection import Collection +from cwl_wes.exceptions import WorkflowNotFound + # Get logger instance logger = logging.getLogger(__name__) # Utility function for endpoint GET /runs//status -def get_run_status( - config: Config, - run_id: str, - *args, - **kwargs -) -> Dict: +def get_run_status(config: Config, run_id: str, *args, **kwargs) -> Dict: """Gets status information for specific run.""" - collection_runs: Collection = config.foca.db.dbs['cwl-wes-db'].collections['runs'].client + collection_runs: Collection = ( + config.foca.db.dbs["cwl-wes-db"].collections["runs"].client + ) document = collection_runs.find_one( - filter={'run_id': run_id}, + filter={"run_id": run_id}, projection={ - 'user_id': True, - 'api.state': True, - '_id': False, - } + "user_id": True, + "api.state": True, + "_id": False, + }, ) # Raise error if workflow run was not found or has no task ID if document: - state = document['api']['state'] + state = document["api"]["state"] else: logger.error("Run '{run_id}' not found.".format(run_id=run_id)) raise WorkflowNotFound # Raise error trying to access workflow run that is not owned by user # Only if authorization enabled - if 'user_id' in kwargs and document['user_id'] != kwargs['user_id']: + if "user_id" in kwargs and document["user_id"] != kwargs["user_id"]: logger.error( ( "User '{user_id}' is not allowed to access workflow run " "'{run_id}'." ).format( - user_id=kwargs['user_id'], + user_id=kwargs["user_id"], run_id=run_id, ) ) raise Forbidden - response = { - 'run_id': run_id, - 'state': state - } + response = {"run_id": run_id, "state": state} return response diff --git a/cwl_wes/ga4gh/wes/endpoints/get_service_info.py b/cwl_wes/ga4gh/wes/endpoints/get_service_info.py index a79ea5f..715211d 100644 --- a/cwl_wes/ga4gh/wes/endpoints/get_service_info.py +++ b/cwl_wes/ga4gh/wes/endpoints/get_service_info.py @@ -3,13 +3,13 @@ from copy import deepcopy from datetime import datetime import logging -from typing import (Any, Dict, Mapping) +from typing import Any, Dict -from pymongo import collection as Collection from flask import Config +from pymongo import collection as Collection -import cwl_wes.utils.db_utils as db_utils from cwl_wes.ga4gh.wes.states import States +import cwl_wes.utils.db_utils as db_utils # Get logger instance @@ -18,40 +18,43 @@ # Helper function GET /service-info def get_service_info( - config: Config, - silent: bool = False, - *args: Any, - **kwarg: Any + config: Config, silent: bool = False, *args: Any, **kwarg: Any ): """Returns readily formatted service info or `None` (in silent mode); creates service info database document if it does not exist.""" - collection_service_info: Collection.Collection = config.foca.db.dbs['cwl-wes-db'].collections['service_info'].client - collection_runs: Collection.Collection = config.foca.db.dbs['cwl-wes-db'].collections['runs'].client + collection_service_info: Collection.Collection = ( + config.foca.db.dbs["cwl-wes-db"].collections["service_info"].client + ) + collection_runs: Collection.Collection = ( + config.foca.db.dbs["cwl-wes-db"].collections["runs"].client + ) service_info = deepcopy(config.foca.custom.service_info.dict()) # Write current service info to database if absent or different from latest if not service_info == db_utils.find_one_latest(collection_service_info): collection_service_info.insert(service_info) - logger.info('Updated service info: {service_info}'.format( - service_info=service_info, - )) + logger.info( + "Updated service info: {service_info}".format( + service_info=service_info, + ) + ) else: - logger.debug('No change in service info. Not updated.') + logger.debug("No change in service info. Not updated.") # Return None when called in silent mode: if silent: return None # Add current system state counts - service_info['system_state_counts'] = __get_system_state_counts( + service_info["system_state_counts"] = __get_system_state_counts( collection_runs ) # Add timestamps _id = db_utils.find_id_latest(collection_service_info) if _id: - service_info['tags']['last_service_info_update'] = _id.generation_time - service_info['tags']['current_time'] = datetime.utcnow().isoformat() + service_info["tags"]["last_service_info_update"] = _id.generation_time + service_info["tags"]["current_time"] = datetime.utcnow().isoformat() return service_info @@ -64,14 +67,14 @@ def __get_system_state_counts(collection: Collection) -> Dict[str, int]: cursor = collection.find( filter={}, projection={ - 'api.state': True, - '_id': False, - } + "api.state": True, + "_id": False, + }, ) # Iterate over states and increase counter for record in cursor: - current_counts[record['api']['state']] += 1 + current_counts[record["api"]["state"]] += 1 return current_counts diff --git a/cwl_wes/ga4gh/wes/endpoints/list_runs.py b/cwl_wes/ga4gh/wes/endpoints/list_runs.py index 485443a..c135db3 100644 --- a/cwl_wes/ga4gh/wes/endpoints/list_runs.py +++ b/cwl_wes/ga4gh/wes/endpoints/list_runs.py @@ -1,61 +1,62 @@ """Utility function for GET /runs endpoint.""" + import logging from typing import Dict from bson.objectid import ObjectId -from pymongo.collection import Collection from flask import Config - +from pymongo.collection import Collection # Get logger instance logger = logging.getLogger(__name__) # Utility function for endpoint GET /runs -def list_runs( - config: Config, - *args, - **kwargs -) -> Dict: +def list_runs(config: Config, *args, **kwargs) -> Dict: """Lists IDs and status for all workflow runs.""" - collection_runs: Collection = config.foca.db.dbs['cwl-wes-db'].collections['runs'].client + collection_runs: Collection = ( + config.foca.db.dbs["cwl-wes-db"].collections["runs"].client + ) # Fall back to default page size if not provided by user - if 'page_size' in kwargs: - page_size = kwargs['page_size'] + if "page_size" in kwargs: + page_size = kwargs["page_size"] else: page_size = config.foca.custom.controller.default_page_size # Extract/set page token - if 'page_token' in kwargs: - page_token = kwargs['page_token'] + if "page_token" in kwargs: + page_token = kwargs["page_token"] else: - page_token = '' + page_token = "" # Initialize filter dictionary filter_dict = {} # Add filter for user-owned runs if user ID is available - if 'user_id' in kwargs: - filter_dict['user_id'] = kwargs['user_id'] - + if "user_id" in kwargs: + filter_dict["user_id"] = kwargs["user_id"] + # Add pagination filter based on last object ID - if page_token != '': - filter_dict['_id'] = {'$lt': ObjectId(page_token)} + if page_token != "": + filter_dict["_id"] = {"$lt": ObjectId(page_token)} # Query database for workflow runs - cursor = collection_runs.find( - filter=filter_dict, - projection={ - 'run_id': True, - 'api.state': True, - } - # Sort results by descending object ID (+/- newest to oldest) - ).sort( - '_id', -1 - # Implement page size limit - ).limit( - page_size + cursor = ( + collection_runs.find( + filter=filter_dict, + projection={ + "run_id": True, + "api.state": True, + } + # Sort results by descending object ID (+/- newest to oldest) + ) + .sort( + "_id", + -1 + # Implement page size limit + ) + .limit(page_size) ) # Convert cursor to list @@ -63,18 +64,15 @@ def list_runs( # Get next page token from ID of last run in cursor if runs_list: - next_page_token = str(runs_list[-1]['_id']) + next_page_token = str(runs_list[-1]["_id"]) else: - next_page_token = '' + next_page_token = "" # Reshape list of runs for run in runs_list: - del run['_id'] - run['state'] = run['api']['state'] - del run['api'] + del run["_id"] + run["state"] = run["api"]["state"] + del run["api"] # Build and return response - return { - 'next_page_token': next_page_token, - 'runs': runs_list - } + return {"next_page_token": next_page_token, "runs": runs_list} diff --git a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py index d7f2842..ec527a4 100644 --- a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py +++ b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py @@ -1,29 +1,26 @@ """Utility functions for POST /runs endpoint.""" +from json import decoder, loads import logging import os +from random import choice import re import shutil import string # noqa: F401 import subprocess +from typing import Dict, List, Optional from celery import uuid -from flask import current_app -from json import (decoder, loads) +from flask import current_app, Config, request +from pymongo.collection import Collection from pymongo.errors import DuplicateKeyError -from random import choice -from typing import (Dict, List, Optional) from yaml import dump from werkzeug.datastructures import ImmutableMultiDict from werkzeug.utils import secure_filename -from flask import Config, request -from pymongo.collection import Collection - from cwl_wes.exceptions import BadRequest -from cwl_wes.tasks.tasks.run_workflow import task__run_workflow -from cwl_wes.ga4gh.wes.endpoints.utils.drs import translate_drs_uris - +from cwl_wes.tasks.run_workflow import task__run_workflow +from cwl_wes.utils.drs import translate_drs_uris # Get logger instance logger = logging.getLogger(__name__) @@ -31,10 +28,7 @@ # Utility function for endpoint POST /runs def run_workflow( - config: Config, - form_data: ImmutableMultiDict, - *args, - **kwargs + config: Config, form_data: ImmutableMultiDict, *args, **kwargs ) -> Dict: """Executes workflow and save info to database; returns unique run id.""" # Validate data and prepare run environment @@ -45,19 +39,13 @@ def run_workflow( __check_service_info_compatibility(data=form_data_dict) document = __init_run_document(data=form_data_dict) document = __create_run_environment( - config=config, - document=document, - **kwargs + config=config, document=document, **kwargs ) # Start workflow run in background - __run_workflow( - config=config, - document=document, - **kwargs - ) + __run_workflow(config=config, document=document, **kwargs) - response = {'run_id': document['run_id']} + response = {"run_id": document["run_id"]} return response @@ -70,7 +58,7 @@ def __secure_join(basedir: str, fname: str) -> str: def __immutable_multi_dict_to_nested_dict( - multi_dict: ImmutableMultiDict + multi_dict: ImmutableMultiDict, ) -> Dict: """Converts ImmutableMultiDict to nested dictionary.""" # Convert to flat dictionary @@ -115,20 +103,20 @@ def __validate_run_workflow_request(data: Dict) -> None: # required = False params_required = { - 'workflow_params', - 'workflow_type', - 'workflow_type_version', - 'workflow_url', + "workflow_params", + "workflow_type", + "workflow_type_version", + "workflow_url", } params_str = [ - 'workflow_type', - 'workflow_type_version', - 'workflow_url', + "workflow_type", + "workflow_type_version", + "workflow_url", ] params_dict = [ - 'workflow_params', - 'workflow_engine_parameters', - 'tags', + "workflow_params", + "workflow_engine_parameters", + "tags", ] # Raise error if any required params are missing @@ -153,7 +141,7 @@ def __validate_run_workflow_request(data: Dict) -> None: invalid = True if invalid: - logger.error('POST request does not conform to schema.') + logger.error("POST request does not conform to schema.") raise BadRequest return None @@ -168,24 +156,22 @@ def __check_service_info_compatibility(data: Dict) -> None: def __init_run_document(data: Dict) -> Dict: """Initializes workflow run document.""" document: Dict = dict() - document['api'] = dict() - document['internal'] = dict() - document['api']['request'] = data - document['api']['state'] = 'UNKNOWN' - document['api']['run_log'] = dict() - document['api']['task_logs'] = list() - document['api']['outputs'] = dict() + document["api"] = dict() + document["internal"] = dict() + document["api"]["request"] = data + document["api"]["state"] = "UNKNOWN" + document["api"]["run_log"] = dict() + document["api"]["task_logs"] = list() + document["api"]["outputs"] = dict() return document -def __create_run_environment( - config: Config, - document: Dict, - **kwargs -) -> Dict: +def __create_run_environment(config: Config, document: Dict, **kwargs) -> Dict: """Creates unique run identifier and permanent and temporary storage directories for current run.""" - collection_runs: Collection = config.foca.db.dbs['cwl-wes-db'].collections['runs'].client + collection_runs: Collection = ( + config.foca.db.dbs["cwl-wes-db"].collections["runs"].client + ) out_dir = config.foca.custom.storage.permanent_dir tmp_dir = config.foca.custom.storage.tmp_dir run_id_charset = eval(config.foca.custom.controller.runs_id.charset) @@ -220,14 +206,14 @@ def __create_run_environment( continue # Add run/task/user identifier, temp/output directories to document - document['run_id'] = run_id - document['task_id'] = task_id - if 'user_id' in kwargs: - document['user_id'] = kwargs['user_id'] + document["run_id"] = run_id + document["task_id"] = task_id + if "user_id" in kwargs: + document["user_id"] = kwargs["user_id"] else: - document['user_id'] = None - document['internal']['tmp_dir'] = current_tmp_dir - document['internal']['out_dir'] = current_out_dir + document["user_id"] = None + document["internal"]["tmp_dir"] = current_tmp_dir + document["internal"]["out_dir"] = current_out_dir # Process worflow attachments document = __process_workflow_attachments(document) @@ -248,23 +234,25 @@ def __create_run_environment( # Catch other database errors # TODO: implement properly except Exception as e: - print('Database error') + print("Database error") print(e) break # Exit loop break - + # translate DRS URIs to access URLs drs_server_conf = current_app.config.foca.custom.controller.drs_server service_info_conf = current_app.config.foca.custom.service_info file_types: List[str] = drs_server_conf.file_types - supported_access_methods: List[str] = service_info_conf.supported_filesystem_protocols + supported_access_methods: List[ + str + ] = service_info_conf.supported_filesystem_protocols port: Optional[int] = drs_server_conf.port base_path: Optional[str] = drs_server_conf.base_path use_http: bool = drs_server_conf.use_http translate_drs_uris( - path=document['internal']['workflow_files'], + path=document["internal"]["workflow_files"], file_types=file_types, supported_access_methods=supported_access_methods, port=port, @@ -275,12 +263,9 @@ def __create_run_environment( return document -def __create_run_id( - charset: str = '0123456789', - length: int = 6 -) -> str: +def __create_run_id(charset: str = "0123456789", length: int = 6) -> str: """Creates random run ID.""" - return ''.join(choice(charset) for __ in range(length)) + return "".join(choice(charset) for __ in range(length)) def __process_workflow_attachments(data: Dict) -> Dict: @@ -317,16 +302,14 @@ def __process_workflow_attachments(data: Dict) -> Dict: # specified, are: ',', ';', ':', '|' re_git_file = re.compile( ( - r'^(https?:.*)\/(blob|src|tree)\/(.*?)\/(.*?\.(cwl|yml|yaml|json))' - r'[,:;|]?(.*\.(yml|yaml|json))?' + r"^(https?:.*)\/(blob|src|tree)\/(.*?)\/(.*?\.(cwl|yml|yaml|json))" + r"[,:;|]?(.*\.(yml|yaml|json))?" ) ) # Create directory for storing workflow files - data['internal']['workflow_files'] = workflow_dir = os.path.abspath( - os.path.join( - data['internal']['out_dir'], 'workflow_files' - ) + data["internal"]["workflow_files"] = workflow_dir = os.path.abspath( + os.path.join(data["internal"]["out_dir"], "workflow_files") ) try: os.mkdir(workflow_dir) @@ -336,29 +319,24 @@ def __process_workflow_attachments(data: Dict) -> Dict: pass # Get main workflow file - user_string = data['api']['request']['workflow_url'] + user_string = data["api"]["request"]["workflow_url"] m = re_git_file.match(user_string) # Get workflow from Git repo if regex matches if m: - repo_url = '.'.join([m.group(1), 'git']) + repo_url = ".".join([m.group(1), "git"]) branch_commit = m.group(3) cwl_path = m.group(4) # Try to clone repo if not subprocess.run( - [ - 'git', - 'clone', - repo_url, - os.path.join(workflow_dir, 'repo') - ], - check=True + ["git", "clone", repo_url, os.path.join(workflow_dir, "repo")], + check=True, ): logger.error( ( - 'Could not clone Git repository. Check value of ' + "Could not clone Git repository. Check value of " "'workflow_url' in run request." ) ) @@ -367,29 +345,27 @@ def __process_workflow_attachments(data: Dict) -> Dict: # Try to checkout branch/commit if not subprocess.run( [ - 'git', - '--git-dir', - os.path.join(workflow_dir, 'repo', '.git'), - '--work-tree', - os.path.join(workflow_dir, 'repo'), - 'checkout', - branch_commit + "git", + "--git-dir", + os.path.join(workflow_dir, "repo", ".git"), + "--work-tree", + os.path.join(workflow_dir, "repo"), + "checkout", + branch_commit, ], - check=True + check=True, ): logger.error( ( - 'Could not checkout repository commit/branch. Check value ' + "Could not checkout repository commit/branch. Check value " "of 'workflow_url' in run request." ) ) raise BadRequest # Set CWL path - data['internal']['cwl_path'] = os.path.join( - workflow_dir, - 'repo', - cwl_path + data["internal"]["cwl_path"] = os.path.join( + workflow_dir, "repo", cwl_path ) # Else assume value of 'workflow_url' represents file on local file system, @@ -408,126 +384,129 @@ def __process_workflow_attachments(data: Dict) -> Dict: shutil.copyfileobj(attachment.stream, dest) # Adjust workflow_url to point to workflow directory. - req_data = data['api']['request'] - workflow_url = __secure_join(workflow_dir, req_data['workflow_url']) + req_data = data["api"]["request"] + workflow_url = __secure_join( + workflow_dir, req_data["workflow_url"] + ) if os.path.exists(workflow_url): - req_data['workflow_url'] = workflow_url + req_data["workflow_url"] = workflow_url # Set main CWL workflow file path - data['internal']['cwl_path'] = os.path.abspath( - data['api']['request']['workflow_url'] + data["internal"]["cwl_path"] = os.path.abspath( + data["api"]["request"]["workflow_url"] ) # Extract name and extensions of workflow workflow_name_ext = os.path.splitext( - os.path.basename( - data['internal']['cwl_path'] - ) + os.path.basename(data["internal"]["cwl_path"]) ) # Get parameter file workflow_name_ext = os.path.splitext( - os.path.basename( - data['internal']['cwl_path'] - ) + os.path.basename(data["internal"]["cwl_path"]) ) - # Try to get parameters from 'workflow_params' field - if data['api']['request']['workflow_params']: + if data["api"]["request"]["workflow_params"]: # Replace `DRS URIs` in 'workflow_params' # replace_drs_uris(data['api']['request']['workflow_params']) - data['internal']['param_file_path'] = os.path.join( + data["internal"]["param_file_path"] = os.path.join( workflow_dir, - '.'.join([ - str(workflow_name_ext[0]), - 'yml', - ]), + ".".join( + [ + str(workflow_name_ext[0]), + "yml", + ] + ), ) - with open(data['internal']['param_file_path'], 'w') as yaml_file: + with open(data["internal"]["param_file_path"], "w") as yaml_file: dump( - data['api']['request']['workflow_params'], + data["api"]["request"]["workflow_params"], yaml_file, allow_unicode=True, - default_flow_style=False + default_flow_style=False, ) # Or from provided relative file path in repo elif m and m.group(6): param_path = m.group(6) - data['internal']['param_file_path'] = os.path.join( + data["internal"]["param_file_path"] = os.path.join( workflow_dir, - 'repo', + "repo", param_path, ) # Else try to see if there is a 'yml', 'yaml' or 'json' file with exactly # the same basename as CWL in same dir else: - param_file_extensions = ['yml', 'yaml', 'json'] + param_file_extensions = ["yml", "yaml", "json"] for ext in param_file_extensions: possible_param_file = os.path.join( workflow_dir, - 'repo', - '.'.join([ - str(workflow_name_ext[0]), - ext, - ]), + "repo", + ".".join( + [ + str(workflow_name_ext[0]), + ext, + ] + ), ) if os.path.isfile(possible_param_file): - data['internal']['param_file_path'] = possible_param_file + data["internal"]["param_file_path"] = possible_param_file break # Raise BadRequest if not parameter file was found - if 'param_file_path' not in data['internal']: + if "param_file_path" not in data["internal"]: raise BadRequest # Extract workflow attachments from form data dictionary - if 'workflow_attachment' in data['api']['request']: + if "workflow_attachment" in data["api"]["request"]: # TODO: do something with data['workflow_attachment'] # Strip workflow attachments from data - del data['api']['request']['workflow_attachment'] + del data["api"]["request"]["workflow_attachment"] # Return form data stripped of workflow attachments return data -def __run_workflow( - config: Config, - document: Dict, - **kwargs -) -> None: +def __run_workflow(config: Config, document: Dict, **kwargs) -> None: """Helper function `run_workflow()`.""" tes_url = config.foca.custom.controller.tes_server.url remote_storage_url = config.foca.custom.storage.remote_storage_url - run_id = document['run_id'] - task_id = document['task_id'] - tmp_dir = document['internal']['tmp_dir'] - cwl_path = document['internal']['cwl_path'] - param_file_path = document['internal']['param_file_path'] + run_id = document["run_id"] + task_id = document["task_id"] + tmp_dir = document["internal"]["tmp_dir"] + cwl_path = document["internal"]["cwl_path"] + param_file_path = document["internal"]["param_file_path"] # Build command command_list = [ - 'cwl-tes', - '--debug', - '--leave-outputs', - '--remote-storage-url', remote_storage_url, - '--tes', tes_url, + "cwl-tes", + "--debug", + "--leave-outputs", + "--remote-storage-url", + remote_storage_url, + "--tes", + tes_url, cwl_path, - param_file_path + param_file_path, ] # Add authorization parameters - if 'jwt' in kwargs \ - and 'claims' in kwargs \ - and 'public_key' in kwargs['claims']: + if ( + "jwt" in kwargs + and "claims" in kwargs + and "public_key" in kwargs["claims"] + ): auth_params = [ - '--token-public-key', kwargs['claims']['public_key'], - '--token', kwargs['jwt'], + "--token-public-key", + kwargs["claims"]["public_key"], + "--token", + kwargs["jwt"], ] command_list[2:2] = auth_params @@ -562,9 +541,9 @@ def __run_workflow( task__run_workflow.apply_async( None, { - 'command_list': command_list, - 'tmp_dir': tmp_dir, - 'token': kwargs.get('jwt'), + "command_list": command_list, + "tmp_dir": tmp_dir, + "token": kwargs.get("jwt"), }, task_id=task_id, soft_time_limit=timeout_duration, diff --git a/cwl_wes/ga4gh/wes/endpoints/utils/__init__.py b/cwl_wes/ga4gh/wes/endpoints/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/ga4gh/wes/server.py b/cwl_wes/ga4gh/wes/server.py index 835ae3b..e510c2c 100644 --- a/cwl_wes/ga4gh/wes/server.py +++ b/cwl_wes/ga4gh/wes/server.py @@ -25,10 +25,7 @@ def GetRunLog(run_id, *args, **kwargs): """Returns detailed run info.""" response = get_run_log( - config=current_app.config, - run_id=run_id, - *args, - **kwargs + config=current_app.config, run_id=run_id, *args, **kwargs ) return response @@ -52,10 +49,7 @@ def CancelRun(run_id, *args, **kwargs): def GetRunStatus(run_id, *args, **kwargs): """Returns run status.""" response = get_run_status( - config=current_app.config, - run_id=run_id, - *args, - **kwargs + config=current_app.config, run_id=run_id, *args, **kwargs ) return response @@ -64,11 +58,7 @@ def GetRunStatus(run_id, *args, **kwargs): @log_traffic def GetServiceInfo(*args, **kwargs): """Returns service info.""" - response = get_service_info( - config=current_app.config, - *args, - **kwargs - ) + response = get_service_info(config=current_app.config, *args, **kwargs) return response @@ -76,11 +66,7 @@ def GetServiceInfo(*args, **kwargs): @log_traffic def ListRuns(*args, **kwargs): """Lists IDs and status of all workflow runs.""" - response = list_runs( - config=current_app.config, - *args, - **kwargs - ) + response = list_runs(config=current_app.config, *args, **kwargs) return response @@ -89,9 +75,6 @@ def ListRuns(*args, **kwargs): def RunWorkflow(*args, **kwargs): """Executes workflow.""" response = run_workflow( - config=current_app.config, - form_data=request.form, - *args, - **kwargs + config=current_app.config, form_data=request.form, *args, **kwargs ) return response diff --git a/cwl_wes/ga4gh/wes/service_info.py b/cwl_wes/ga4gh/wes/service_info.py index 61e9b94..0bcf351 100644 --- a/cwl_wes/ga4gh/wes/service_info.py +++ b/cwl_wes/ga4gh/wes/service_info.py @@ -17,7 +17,6 @@ class ServiceInfo: - def __init__(self) -> None: """Class for WES API service info server-side controller methods. @@ -34,11 +33,12 @@ def __init__(self) -> None: self.config: Dict = current_app.config self.foca_config: Config = self.config.foca self.db_client_service_info: Collection = ( - self.foca_config.db.dbs['cwl-wes-db'] - .collections['service_info'].client + self.foca_config.db.dbs["cwl-wes-db"] + .collections["service_info"] + .client ) self.db_client_runs: Collection = ( - self.foca_config.db.dbs['cwl-wes-db'].collections['runs'].client + self.foca_config.db.dbs["cwl-wes-db"].collections["runs"].client ) self.object_id: str = "000000000000000000000000" @@ -55,13 +55,13 @@ def get_service_info(self, get_counts: bool = True) -> Dict: NotFound: Service info was not found. """ service_info = self.db_client_service_info.find_one( - {'_id': ObjectId(self.object_id)}, - {'_id': False}, + {"_id": ObjectId(self.object_id)}, + {"_id": False}, ) if service_info is None: raise NotFound if get_counts: - service_info['system_state_counts'] = self._get_state_counts() + service_info["system_state_counts"] = self._get_state_counts() return service_info def set_service_info( @@ -74,7 +74,7 @@ def set_service_info( data: Dictionary of service info values. Cf. """ self.db_client_service_info.replace_one( - filter={'_id': ObjectId(self.object_id)}, + filter={"_id": ObjectId(self.object_id)}, replacement=data, upsert=True, ) @@ -86,10 +86,10 @@ def _get_state_counts(self) -> Dict[str, int]: cursor = self.db_client_runs.find( filter={}, projection={ - 'run_log.state': True, - '_id': False, - } + "run_log.state": True, + "_id": False, + }, ) for record in cursor: - current_counts[record['run_log']['state']] += 1 + current_counts[record["run_log"]["state"]] += 1 return current_counts diff --git a/cwl_wes/ga4gh/wes/states.py b/cwl_wes/ga4gh/wes/states.py index 3ab5fc2..e0692ae 100644 --- a/cwl_wes/ga4gh/wes/states.py +++ b/cwl_wes/ga4gh/wes/states.py @@ -1,25 +1,25 @@ -class States(): +class States: UNDEFINED = [ - 'UNKNOWN', + "UNKNOWN", ] CANCELABLE = [ - 'INITIALIZING', - 'PAUSED', - 'QUEUED', - 'RUNNING', + "INITIALIZING", + "PAUSED", + "QUEUED", + "RUNNING", ] UNFINISHED = CANCELABLE + [ - 'CANCELING', + "CANCELING", ] FINISHED = [ - 'COMPLETE', - 'CANCELED', - 'EXECUTOR_ERROR', - 'SYSTEM_ERROR', + "COMPLETE", + "CANCELED", + "EXECUTOR_ERROR", + "SYSTEM_ERROR", ] DEFINED = UNFINISHED + FINISHED diff --git a/cwl_wes/gunicorn.py b/cwl_wes/gunicorn.py index 7ce6163..06b006c 100644 --- a/cwl_wes/gunicorn.py +++ b/cwl_wes/gunicorn.py @@ -7,26 +7,26 @@ app_config = app.config.foca # Set Gunicorn number of workers and threads -workers = int(os.environ.get('GUNICORN_PROCESSES', '1')) -threads = int(os.environ.get('GUNICORN_THREADS', '1')) +workers = int(os.environ.get("GUNICORN_PROCESSES", "1")) +threads = int(os.environ.get("GUNICORN_THREADS", "1")) # Set allowed IPs -forwarded_allow_ips = '*' +forwarded_allow_ips = "*" # Set Gunicorn bind address -bind = '{address}:{port}'.format( +bind = "{address}:{port}".format( address=app_config.server.host, port=app_config.server.port, ) # Source the environment variables for the Gunicorn workers raw_env = [ - "WES_CONFIG=%s" % os.environ.get('WES_CONFIG', ''), - "RABBIT_HOST=%s" % os.environ.get('RABBIT_HOST', app_config.jobs.host), - "RABBIT_PORT=%s" % os.environ.get('RABBIT_PORT', app_config.jobs.port), - "MONGO_HOST=%s" % os.environ.get('MONGO_HOST', app_config.db.host), - "MONGO_PORT=%s" % os.environ.get('MONGO_PORT', app_config.db.port), - "MONGO_DBNAME=%s" % os.environ.get('MONGO_DBNAME', 'cwl-wes-db'), - "MONGO_USERNAME=%s" % os.environ.get('MONGO_USERNAME', ''), - "MONGO_PASSWORD=%s" % os.environ.get('MONGO_PASSWORD', '') + "WES_CONFIG=%s" % os.environ.get("WES_CONFIG", ""), + "RABBIT_HOST=%s" % os.environ.get("RABBIT_HOST", app_config.jobs.host), + "RABBIT_PORT=%s" % os.environ.get("RABBIT_PORT", app_config.jobs.port), + "MONGO_HOST=%s" % os.environ.get("MONGO_HOST", app_config.db.host), + "MONGO_PORT=%s" % os.environ.get("MONGO_PORT", app_config.db.port), + "MONGO_DBNAME=%s" % os.environ.get("MONGO_DBNAME", "cwl-wes-db"), + "MONGO_USERNAME=%s" % os.environ.get("MONGO_USERNAME", ""), + "MONGO_PASSWORD=%s" % os.environ.get("MONGO_PASSWORD", ""), ] diff --git a/cwl_wes/tasks/__init__.py b/cwl_wes/tasks/__init__.py index e69de29..b86bf42 100644 --- a/cwl_wes/tasks/__init__.py +++ b/cwl_wes/tasks/__init__.py @@ -0,0 +1 @@ +"""cwl-WES background tasks.""" diff --git a/cwl_wes/tasks/tasks/cancel_run.py b/cwl_wes/tasks/cancel_run.py similarity index 85% rename from cwl_wes/tasks/tasks/cancel_run.py rename to cwl_wes/tasks/cancel_run.py index 3e4313f..27a4c7c 100644 --- a/cwl_wes/tasks/tasks/cancel_run.py +++ b/cwl_wes/tasks/cancel_run.py @@ -2,19 +2,18 @@ import logging from requests import HTTPError -import tes import time -from typing import (List, Optional) +from typing import List, Optional from celery.exceptions import SoftTimeLimitExceeded from flask import current_app +from foca.database.register_mongodb import _create_mongo_client from pymongo import collection as Collection +import tes -from cwl_wes.worker import celery_app -import cwl_wes.utils.db_utils as db_utils -from foca.database.register_mongodb import _create_mongo_client from cwl_wes.ga4gh.wes.states import States -from cwl_wes.tasks.utils import set_run_state +import cwl_wes.utils.db as db_utils +from cwl_wes.worker import celery_app # Get logger instance @@ -22,7 +21,7 @@ @celery_app.task( - name='tasks.cancel_run', + name="tasks.cancel_run", ignore_result=True, bind=True, ) @@ -39,15 +38,15 @@ def task__cancel_run( app=current_app, host=foca_config.db.host, port=foca_config.db.port, - db='cwl-wes-db', + db="cwl-wes-db", ) - collection = mongo.db['runs'] + collection = mongo.db["runs"] # Set run state to 'CANCELING' - set_run_state( + db_utils.set_run_state( collection=collection, run_id=run_id, task_id=task_id, - state='CANCELING', + state="CANCELING", ) try: @@ -61,11 +60,11 @@ def task__cancel_run( token=token, ) except SoftTimeLimitExceeded as e: - set_run_state( + db_utils.set_run_state( collection=collection, run_id=run_id, task_id=task_id, - state='SYSTEM_ERROR', + state="SYSTEM_ERROR", ) logger.warning( ( @@ -110,11 +109,11 @@ def __cancel_tes_tasks( canceled = canceled + cancel time.sleep(timeout) document = collection.find_one( - filter={'run_id': run_id}, + filter={"run_id": run_id}, projection={ - 'api.state': True, - '_id': False, - } + "api.state": True, + "_id": False, + }, ) - if document['api']['state'] in States.FINISHED: + if document["api"]["state"] in States.FINISHED: break diff --git a/cwl_wes/tasks/tasks/cwl_log_processor.py b/cwl_wes/tasks/cwl_log_processor.py similarity index 84% rename from cwl_wes/tasks/tasks/cwl_log_processor.py rename to cwl_wes/tasks/cwl_log_processor.py index c26c60f..248cbc4 100644 --- a/cwl_wes/tasks/tasks/cwl_log_processor.py +++ b/cwl_wes/tasks/cwl_log_processor.py @@ -1,24 +1,25 @@ -import re -import os -import logging -from _io import TextIOWrapper -from typing import (Dict, List, Optional, Tuple) +"""cwl-tes log parser executed on worker.""" + from ast import literal_eval +from _io import TextIOWrapper +import logging +import os +import re +from typing import Dict, List, Optional, Tuple -import tes +import cwl_wes.utils.db as db_utils from cwl_wes.worker import celery_app -import cwl_wes.utils.db_utils as db_utils +import tes # Get logger instance logger = logging.getLogger(__name__) class CWLLogProcessor: - def __init__(self, tes_config, collection) -> None: self.tes_config = tes_config self.collection = collection - + def process_cwl_logs( self, task: celery_app.Task, @@ -26,12 +27,13 @@ def process_cwl_logs( token: Optional[str] = None, ) -> Tuple[List, List]: """Parses combinend cwl-tes STDOUT/STDERR and sends TES task IDs and state - updates to broker.""" + updates to broker. + """ stream_container: List = list() tes_states: Dict = dict() # Iterate over STDOUT/STDERR stream - for line in iter(stream.readline, ''): + for line in iter(stream.readline, ""): line = line.rstrip() @@ -73,14 +75,13 @@ def process_cwl_logs( return (stream_container, list(tes_states.keys())) - def process_tes_log(self, line: str) -> List[str]: """Handles irregularities arising from log parsing.""" lines: List = list() # Handle special case where FTP and cwl-tes logs are on same line re_ftp_cwl_tes = re.compile( - r'^(\*cmd\* .*)(\[step \w*\] produced output \{)$' + r"^(\*cmd\* .*)(\[step \w*\] produced output \{)$" ) m = re_ftp_cwl_tes.match(line) if m: @@ -113,7 +114,6 @@ def extract_tes_state( return (task_id, task_state) - def capture_tes_task_update( self, task: celery_app.Task, @@ -140,7 +140,7 @@ def capture_tes_task_update( except Exception as e: logger.exception( ( - 'Database error. Could not update log information for ' + "Database error. Could not update log information for " "task '{task}'. Original error message: {type}: {msg}" ).format( task=task.task_id, @@ -172,7 +172,7 @@ def capture_tes_task_update( except Exception as e: logger.exception( ( - 'Database error. Could not update log information for ' + "Database error. Could not update log information for " "task '{task}'. Original error message: {type}: {msg}" ).format( task=task.task_id, @@ -184,67 +184,68 @@ def capture_tes_task_update( class CWLTesProcessor: - def __init__(self, tes_config) -> None: self.tes_config = tes_config - + @staticmethod def __cwl_tes_outputs_parser(log: str) -> Dict: """Parses outputs from cwl-tes log.""" # Find outputs object in log string re_outputs = re.compile( r'(^\{$\n^ {4}"\S+": [\[\{]$\n(^ {4,}.*$\n)*^ {4}[\]\}]$\n^\}$\n)', - re.MULTILINE + re.MULTILINE, ) m = re_outputs.search(log) if m: return literal_eval(m.group(1)) else: return dict() - + @staticmethod def __cwl_tes_outputs_parser_list(log: List) -> Dict: - """This function parses outputs from the cwl-tes log""" - """The outputs JSON starts at the line before last in the logs""" - """So unless the outputs are empty ({}), parse upward,""" - """until you find the beginning of the JSON containing the outputs""" - - indices=range(len(log)-1,-1,-1) - - start=-1 - end=-1 + """Parse outputs from cwl-tes log. + + The outputs JSON starts at the line before last in the logs. So unless + the outputs are empty ({}), parse upward, until you find the beginning + of the JSON containing the outputs. + """ + + indices = range(len(log) - 1, -1, -1) + + start = -1 + end = -1 for index in indices: - if log[index].rstrip()=='{}': + if log[index].rstrip() == "{}": return dict() - elif log[index].rstrip()=='}': - end=index + elif log[index].rstrip() == "}": + end = index break - + # No valid JSON was found and the previous loop # reached the end of the log - if end==0: + if end == 0: return dict() - - indices=range(end-1,-1,-1) + + indices = range(end - 1, -1, -1) for index in indices: - if log[index].rstrip()=='{': - start=index + if log[index].rstrip() == "{": + start = index break - json=os.linesep.join(log[start:end+1]) + json = os.linesep.join(log[start:end + 1]) try: return literal_eval(json) except ValueError as verr: logger.exception( - "ValueError when evaluation JSON: '%s'. Original error message: %s" % \ - (json, verr) + "ValueError when evaluation JSON: '%s'. Original error message: %s" + % (json, verr) ) return dict() except SyntaxError as serr: logger.exception( - "SyntaxError when evaluation JSON: '%s'. Original error message: %s" % \ - (json, serr) + "SyntaxError when evaluation JSON: '%s'. Original error message: %s" + % (json, serr) ) return dict() @@ -271,8 +272,8 @@ def __get_tes_task_log( ) -> Dict: """Gets task log from TES instance.""" tes_client = tes.HTTPClient( - url=self.tes_config['url'], - timeout=self.tes_config['timeout'], + url=self.tes_config["url"], + timeout=self.tes_config["timeout"], token=token, ) @@ -281,7 +282,7 @@ def __get_tes_task_log( try: task_log = tes_client.get_task( task_id=tes_id, - view=self.tes_config['query_params'], + view=self.tes_config["query_params"], ).as_dict() except Exception as e: # TODO: handle more robustly: only 400/Bad Request is okay; @@ -292,6 +293,6 @@ def __get_tes_task_log( ) task_log = {} - logger.debug(f'Task log: {task_log}') + logger.debug(f"Task log: {task_log}") - return task_log \ No newline at end of file + return task_log diff --git a/cwl_wes/tasks/tasks/run_workflow.py b/cwl_wes/tasks/run_workflow.py similarity index 68% rename from cwl_wes/tasks/tasks/run_workflow.py rename to cwl_wes/tasks/run_workflow.py index e910ea5..1903c76 100644 --- a/cwl_wes/tasks/tasks/run_workflow.py +++ b/cwl_wes/tasks/run_workflow.py @@ -1,10 +1,10 @@ """Celery background task to start workflow run.""" import logging -from typing import (List, Optional, Tuple) +from typing import List, Optional, Tuple from cwl_wes.worker import celery_app -from cwl_wes.tasks.tasks.workflow_run_manager import WorkflowRunManager +from cwl_wes.tasks.workflow_run_manager import WorkflowRunManager # Get logger instance @@ -12,10 +12,10 @@ @celery_app.task( - name='tasks.run_workflow', + name="tasks.run_workflow", bind=True, ignore_result=True, - track_started=True + track_started=True, ) def task__run_workflow( self, @@ -26,10 +26,7 @@ def task__run_workflow( """Adds workflow run to task queue.""" # Execute task in background workflow_run_manager = WorkflowRunManager( - task=self, - command_list=command_list, - tmp_dir=tmp_dir, - token=token + task=self, command_list=command_list, tmp_dir=tmp_dir, token=token ) return_val = workflow_run_manager.run_workflow() return return_val diff --git a/cwl_wes/tasks/tasks/__init__.py b/cwl_wes/tasks/tasks/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/cwl_wes/tasks/utils.py b/cwl_wes/tasks/utils.py deleted file mode 100644 index 53dcb03..0000000 --- a/cwl_wes/tasks/utils.py +++ /dev/null @@ -1,64 +0,0 @@ -"""Utility functions for Celery background tasks.""" - -import logging -from typing import Optional - -from pymongo import collection as Collection - -import cwl_wes.utils.db_utils as db_utils - - -# Get logger instance -logger = logging.getLogger(__name__) - - -def set_run_state( - collection: Collection, - run_id: str, - task_id: Optional[str] = None, - state: str = 'UNKNOWN', -): - """Set/update state of run associated with Celery task.""" - if not task_id: - document = collection.find_one( - filter={'run_id': run_id}, - projection={ - 'task_id': True, - '_id': False, - } - ) - _task_id = document['task_id'] - else: - _task_id = task_id - try: - document = db_utils.update_run_state( - collection=collection, - task_id=_task_id, - state=state, - ) - except Exception as e: - logger.exception( - ( - "Database error. Could not update state of run '{run_id}' " - "(task id: '{task_id}') to state '{state}'. Original error " - "message: {type}: {msg}" - ).format( - run_id=run_id, - task_id=_task_id, - state=state, - type=type(e).__name__, - msg=e, - ) - ) - finally: - if document: - logger.info( - ( - "State of run '{run_id}' (task id: '{task_id}') " - "changed to '{state}'." - ).format( - run_id=run_id, - task_id=_task_id, - state=state, - ) - ) diff --git a/cwl_wes/tasks/tasks/workflow_run_manager.py b/cwl_wes/tasks/workflow_run_manager.py similarity index 72% rename from cwl_wes/tasks/tasks/workflow_run_manager.py rename to cwl_wes/tasks/workflow_run_manager.py index 68ec64a..cad3b73 100644 --- a/cwl_wes/tasks/tasks/workflow_run_manager.py +++ b/cwl_wes/tasks/workflow_run_manager.py @@ -1,30 +1,31 @@ -import os +"""Workflow run manager executed on worker.""" + +from datetime import datetime import logging +import os import subprocess -from typing import (Dict, List, Optional) import time -from datetime import datetime +from typing import Dict, List, Optional from foca.models.config import Config +from cwl_wes.tasks.cwl_log_processor import CWLLogProcessor, CWLTesProcessor +import cwl_wes.utils.db as db_utils from cwl_wes.worker import celery_app -from cwl_wes.tasks.tasks.cwl_log_processor import CWLLogProcessor, CWLTesProcessor -import cwl_wes.utils.db_utils as db_utils # Get logger instance logger = logging.getLogger(__name__) class WorkflowRunManager: - """Workflow run manager. - """ + """Workflow run manager.""" def __init__( self, command_list: List, task: celery_app.Task, tmp_dir: str, - token: Optional[str] = None + token: Optional[str] = None, ) -> None: """Initiate workflow run manager instance. @@ -39,7 +40,7 @@ def __init__( describing configurations registered with `celery_app`. custom_config: :py:class:`cwl_wes.custom_config.CustomConfig` instance describing custom configuration model for cwl-WES specific - configurations. + configurations. collection: Collection client for saving task run progress. tes_config: TES (Task Execution Service) endpoint configurations. authorization: Boolean to define the security auth configuration for @@ -57,7 +58,7 @@ def __init__( describing configurations registered with `celery_app`. custom_config: :py:class:`cwl_wes.custom_config.CustomConfig` instance describing custom configuration model for cwl-WES specific - configurations. + configurations. collection: Collection client for saving task run progress. tes_config: TES (Task Execution Service) endpoint configurations. authorization: Boolean to define the security auth configuration for @@ -71,38 +72,37 @@ def __init__( self.token = token self.foca_config: Config = celery_app.conf.foca self.controller_config = self.foca_config.custom.controller - self.collection = self.foca_config.db.dbs['cwl-wes-db'].collections['runs'].client - self.tes_config= { - 'url': self.controller_config.tes_server.url, - 'query_params': self.controller_config.tes_server.status_query_params, - 'timeout': self.controller_config.tes_server.timeout + self.collection = ( + self.foca_config.db.dbs["cwl-wes-db"].collections["runs"].client + ) + self.tes_config = { + "url": self.controller_config.tes_server.url, + "query_params": self.controller_config.tes_server.status_query_params, + "timeout": self.controller_config.tes_server.timeout, } self.authorization = self.foca_config.security.auth.required - self.string_format: str = '%Y-%m-%d %H:%M:%S.%f' - + self.string_format: str = "%Y-%m-%d %H:%M:%S.%f" + def trigger_task_start_events(self) -> None: - """Method to trigger task start events. - """ - if not self.collection.find_one({'task_id': self.task.request.id}): + """Method to trigger task start events.""" + if not self.collection.find_one({"task_id": self.task.request.id}): return None internal = dict() current_ts = time.time() - internal['task_started'] = datetime.utcfromtimestamp( - current_ts - ) + internal["task_started"] = datetime.utcfromtimestamp(current_ts) # Update run document in database try: self.update_run_document( - state='RUNNING', + state="RUNNING", internal=internal, - task_started=datetime.utcfromtimestamp( - current_ts - ).strftime(self.string_format), + task_started=datetime.utcfromtimestamp(current_ts).strftime( + self.string_format + ), ) except Exception as e: logger.exception( ( - 'Database error. Could not update log information for ' + "Database error. Could not update log information for " "task '{task}'. Original error message: {type}: {msg}" ).format( task=self.task_id, @@ -112,36 +112,33 @@ def trigger_task_start_events(self) -> None: ) def trigger_task_failure_events(self, task_end_ts): - """Method to trigger task failure events. - """ - if not self.collection.find_one({'task_id': self.task_id}): + """Method to trigger task failure events.""" + if not self.collection.find_one({"task_id": self.task_id}): return None - + # Create dictionary for internal parameters internal = dict() - internal['task_finished'] = datetime.utcfromtimestamp( - task_end_ts - ) + internal["task_finished"] = datetime.utcfromtimestamp(task_end_ts) task_meta_data = celery_app.AsyncResult(id=self.task_id) - internal['traceback'] = task_meta_data.traceback + internal["traceback"] = task_meta_data.traceback # Update run document in databse self.update_run_document( - state='SYSTEM_ERROR', + state="SYSTEM_ERROR", internal=internal, - task_finished=datetime.utcfromtimestamp( - task_end_ts - ).strftime(self.string_format), + task_finished=datetime.utcfromtimestamp(task_end_ts).strftime( + self.string_format + ), exception=task_meta_data.result, ) - + def trigger_task_success_events( self, returncode: int, log: str, tes_ids: List[str], token: str, - task_end_ts: float + task_end_ts: float, ) -> None: """Method to trigger task success events. @@ -152,7 +149,7 @@ def trigger_task_success_events( token: TES token. task_end_ts: Task end timestamp. """ - if not self.collection.find_one({'task_id': self.task_id}): + if not self.collection.find_one({"task_id": self.task_id}): return None # Parse subprocess results @@ -163,7 +160,7 @@ def trigger_task_success_events( logger.exception( ( "Field 'result' in event message malformed. Original " - 'error message: {type}: {msg}' + "error message: {type}: {msg}" ).format( type=type(e).__name__, msg=e, @@ -173,24 +170,22 @@ def trigger_task_success_events( # Create dictionary for internal parameters internal = dict() - internal['task_finished'] = datetime.utcfromtimestamp( - task_end_ts - ) + internal["task_finished"] = datetime.utcfromtimestamp(task_end_ts) # Set final state to be set document = self.collection.find_one( - filter={'task_id': self.task_id}, + filter={"task_id": self.task_id}, projection={ - 'api.state': True, - '_id': False, - } + "api.state": True, + "_id": False, + }, ) - if document and document['api']['state'] == 'CANCELING': - state = 'CANCELED' + if document and document["api"]["state"] == "CANCELING": + state = "CANCELED" elif returncode: - state = 'EXECUTOR_ERROR' + state = "EXECUTOR_ERROR" else: - state = 'COMPLETE' + state = "COMPLETE" # Extract run outputs cwl_tes_processor = CWLTesProcessor(tes_config=self.tes_config) @@ -209,17 +204,17 @@ def trigger_task_success_events( internal=internal, outputs=outputs, task_logs=task_logs, - task_finished=datetime.utcfromtimestamp( - task_end_ts - ).strftime(self.string_format), + task_finished=datetime.utcfromtimestamp(task_end_ts).strftime( + self.string_format + ), return_code=returncode, stdout=log, - stderr='', + stderr="", ) except Exception as e: logger.exception( ( - 'Database error. Could not update log information for ' + "Database error. Could not update log information for " "task '{task}'. Original error message: {type}: {msg}" ).format( task=self.task_id, @@ -228,13 +223,9 @@ def trigger_task_success_events( ) ) pass - + def trigger_task_end_events( - self, - returncode: int, - log: str, - tes_ids: List[str], - token: str + self, returncode: int, log: str, tes_ids: List[str], token: str ) -> None: """Method to trigger task completion events. @@ -248,8 +239,11 @@ def trigger_task_end_events( task_end_ts = time.time() if returncode == 0: self.trigger_task_success_events( - log=log, tes_ids=tes_ids, token=token, - task_end_ts=task_end_ts, returncode=returncode + log=log, + tes_ids=tes_ids, + token=token, + task_end_ts=task_end_ts, + returncode=returncode, ) else: self.trigger_task_failure_events(task_end_ts=task_end_ts) @@ -278,7 +272,7 @@ def update_run_document( document = db_utils.upsert_fields_in_root_object( collection=self.collection, task_id=self.task_id, - root='internal', + root="internal", **internal, ) @@ -287,7 +281,7 @@ def update_run_document( document = db_utils.upsert_fields_in_root_object( collection=self.collection, task_id=self.task_id, - root='api.outputs', + root="api.outputs", **outputs, ) @@ -296,7 +290,7 @@ def update_run_document( document = db_utils.upsert_fields_in_root_object( collection=self.collection, task_id=self.task_id, - root='api', + root="api", task_logs=task_logs, ) @@ -305,39 +299,39 @@ def update_run_document( document = db_utils.upsert_fields_in_root_object( collection=self.collection, task_id=self.task_id, - root='api.run_log', + root="api.run_log", **run_log_params, ) # Calculate queue, execution and run time - if document and document['internal']: - run_log = document['internal'] + if document and document["internal"]: + run_log = document["internal"] durations = dict() - if 'task_started' in run_log_params: - if 'task_started' in run_log and 'task_received' in run_log: + if "task_started" in run_log_params: + if "task_started" in run_log and "task_received" in run_log: pass - durations['time_queue'] = ( - run_log['task_started'] - run_log['task_received'] + durations["time_queue"] = ( + run_log["task_started"] - run_log["task_received"] ).total_seconds() - if 'task_finished' in run_log_params: - if 'task_finished' in run_log and 'task_started' in run_log: + if "task_finished" in run_log_params: + if "task_finished" in run_log and "task_started" in run_log: pass - durations['time_execution'] = ( - run_log['task_finished'] - run_log['task_started'] + durations["time_execution"] = ( + run_log["task_finished"] - run_log["task_started"] ).total_seconds() - if 'task_finished' in run_log and 'task_received' in run_log: + if "task_finished" in run_log and "task_received" in run_log: pass - durations['time_total'] = ( - run_log['task_finished'] - run_log['task_received'] + durations["time_total"] = ( + run_log["task_finished"] - run_log["task_received"] ).total_seconds() if durations: document = db_utils.upsert_fields_in_root_object( collection=self.collection, task_id=self.task_id, - root='api.run_log', + root="api.run_log", **durations, ) @@ -359,7 +353,7 @@ def update_run_document( "State of run '{run_id}' (task id: '{task_id}') changed " "to '{state}'." ).format( - run_id=document['run_id'], + run_id=document["run_id"], task_id=self.task_id, state=state, ) @@ -367,10 +361,8 @@ def update_run_document( return document - def run_workflow(self): - """Method to initiate workflow run. - """ + """Method to initiate workflow run.""" self.trigger_task_start_events() proc = subprocess.Popen( self.command_list, @@ -380,7 +372,9 @@ def run_workflow(self): universal_newlines=True, ) # Parse output in real-time - cwl_log_processor = CWLLogProcessor(tes_config=self.tes_config, collection=self.collection) + cwl_log_processor = CWLLogProcessor( + tes_config=self.tes_config, collection=self.collection + ) log, tes_ids = cwl_log_processor.process_cwl_logs( self.task, stream=proc.stdout, @@ -388,7 +382,5 @@ def run_workflow(self): ) returncode = proc.wait() self.trigger_task_end_events( - token=self.token, - returncode=returncode, - log=log, tes_ids=tes_ids - ) \ No newline at end of file + token=self.token, returncode=returncode, log=log, tes_ids=tes_ids + ) diff --git a/cwl_wes/utils/__init__.py b/cwl_wes/utils/__init__.py index e69de29..5bf53ce 100644 --- a/cwl_wes/utils/__init__.py +++ b/cwl_wes/utils/__init__.py @@ -0,0 +1 @@ +"""cwl-WES utilities.""" diff --git a/cwl_wes/utils/db.py b/cwl_wes/utils/db.py new file mode 100644 index 0000000..89785ef --- /dev/null +++ b/cwl_wes/utils/db.py @@ -0,0 +1,143 @@ +"""Utility functions for database access.""" + +import logging +from typing import Any, List, Mapping, Optional + +from bson.objectid import ObjectId +from pymongo import collection as Collection +from pymongo.collection import ReturnDocument + +# Get logger instance +logger = logging.getLogger(__name__) + + +def update_run_state( + collection: Collection, task_id: str, state: str = "UNKNOWN" +) -> Optional[Mapping[Any, Any]]: + """Updates state of workflow run and returns document.""" + return collection.find_one_and_update( + {"task_id": task_id}, + {"$set": {"api.state": state}}, + return_document=ReturnDocument.AFTER, + ) + + +def upsert_fields_in_root_object( + collection: Collection, task_id: str, root: str, **kwargs +) -> Optional[Mapping[Any, Any]]: + """Inserts (or updates) fields in(to) the same root (object) field and + returns document. + """ + return collection.find_one_and_update( + {"task_id": task_id}, + { + "$set": { + ".".join([root, key]): value for (key, value) in kwargs.items() + } + }, + return_document=ReturnDocument.AFTER, + ) + + +def update_tes_task_state( + collection: Collection, task_id: str, tes_id: str, state: str +) -> Optional[Mapping[Any, Any]]: + """Updates `state` field in TES task log and returns updated document.""" + return collection.find_one_and_update( + {"task_id": task_id, "api.task_logs": {"$elemMatch": {"id": tes_id}}}, + {"$set": {"api.task_logs.$.state": state}}, + return_document=ReturnDocument.AFTER, + ) + + +def append_to_tes_task_logs( + collection: Collection, + task_id: str, + tes_log: Mapping, +) -> Optional[Mapping[Any, Any]]: + """Appends task log to TES task logs and returns updated document.""" + return collection.find_one_and_update( + {"task_id": task_id}, + {"$push": {"api.task_logs": tes_log}}, + return_document=ReturnDocument.AFTER, + ) + + +def find_tes_task_ids(collection: Collection, run_id: str) -> List: + """Get list of TES task ids associated with a run of interest.""" + return collection.distinct("api.task_logs.id", {"run_id": run_id}) + + +def set_run_state( + collection: Collection, + run_id: str, + task_id: Optional[str] = None, + state: str = "UNKNOWN", +): + """Set/update state of run associated with Celery task.""" + if not task_id: + document = collection.find_one( + filter={"run_id": run_id}, + projection={ + "task_id": True, + "_id": False, + }, + ) + _task_id = document["task_id"] + else: + _task_id = task_id + try: + document = update_run_state( + collection=collection, + task_id=_task_id, + state=state, + ) + except Exception as e: + logger.exception( + ( + "Database error. Could not update state of run '{run_id}' " + "(task id: '{task_id}') to state '{state}'. Original error " + "message: {type}: {msg}" + ).format( + run_id=run_id, + task_id=_task_id, + state=state, + type=type(e).__name__, + msg=e, + ) + ) + finally: + if document: + logger.info( + ( + "State of run '{run_id}' (task id: '{task_id}') " + "changed to '{state}'." + ).format( + run_id=run_id, + task_id=_task_id, + state=state, + ) + ) + + +def find_one_latest(collection: Collection) -> Optional[Mapping[Any, Any]]: + """Returns newest object, stripped of the object id, or None if no object + exists. + """ + try: + return ( + collection.find({}, {"_id": False}) + .sort([("_id", -1)]) + .limit(1) + .next() + ) + except StopIteration: + return None + + +def find_id_latest(collection: Collection) -> Optional[ObjectId]: + """Returns object id of newest object, or None if no object exists.""" + try: + return collection.find().sort([("_id", -1)]).limit(1).next()["_id"] + except StopIteration: + return None diff --git a/cwl_wes/utils/db_utils.py b/cwl_wes/utils/db_utils.py deleted file mode 100644 index d7ded8a..0000000 --- a/cwl_wes/utils/db_utils.py +++ /dev/null @@ -1,73 +0,0 @@ -"""Utility functions for MongoDB document insertion, updates and retrieval.""" - -from typing import (Any, List, Mapping, Optional) - -from pymongo.collection import ReturnDocument -from pymongo import collection as Collection - - -def update_run_state( - collection: Collection, - task_id: str, - state: str = 'UNKNOWN' -) -> Optional[Mapping[Any, Any]]: - """Updates state of workflow run and returns document.""" - return collection.find_one_and_update( - {'task_id': task_id}, - {'$set': {'api.state': state}}, - return_document=ReturnDocument.AFTER - ) - - -def upsert_fields_in_root_object( - collection: Collection, - task_id: str, - root: str, - **kwargs -) -> Optional[Mapping[Any, Any]]: - """Inserts (or updates) fields in(to) the same root (object) field and - returns document. - """ - return collection.find_one_and_update( - {'task_id': task_id}, - {'$set': { - '.'.join([root, key]): - value for (key, value) in kwargs.items() - }}, - return_document=ReturnDocument.AFTER - ) - - -def update_tes_task_state( - collection: Collection, - task_id: str, - tes_id: str, - state: str -) -> Optional[Mapping[Any, Any]]: - """Updates `state` field in TES task log and returns updated document.""" - return collection.find_one_and_update( - {'task_id': task_id, 'api.task_logs': {'$elemMatch': {'id': tes_id}}}, - {'$set': {'api.task_logs.$.state': state}}, - return_document=ReturnDocument.AFTER - ) - - -def append_to_tes_task_logs( - collection: Collection, - task_id: str, - tes_log: Mapping, -) -> Optional[Mapping[Any, Any]]: - """Appends task log to TES task logs and returns updated document.""" - return collection.find_one_and_update( - {'task_id': task_id}, - {'$push': {'api.task_logs': tes_log}}, - return_document=ReturnDocument.AFTER - ) - - -def find_tes_task_ids( - collection: Collection, - run_id: str -) -> List: - """Get list of TES task ids associated with a run of interest.""" - return collection.distinct('api.task_logs.id', {'run_id': run_id}) diff --git a/cwl_wes/ga4gh/wes/endpoints/utils/drs.py b/cwl_wes/utils/drs.py similarity index 93% rename from cwl_wes/ga4gh/wes/endpoints/utils/drs.py rename to cwl_wes/utils/drs.py index 8e15d23..1ecffda 100644 --- a/cwl_wes/ga4gh/wes/endpoints/utils/drs.py +++ b/cwl_wes/utils/drs.py @@ -7,10 +7,10 @@ import re from requests.exceptions import ConnectionError import sys -from typing import (Iterator, List, Match, Optional) +from typing import Iterator, List, Match, Optional from drs_cli.client import DRSClient -from drs_cli.errors import (InvalidResponseError, InvalidURI) +from drs_cli.errors import InvalidResponseError, InvalidURI from drs_cli.models import Error from werkzeug.exceptions import ( BadRequest, @@ -51,16 +51,20 @@ def translate_drs_uris( documentation/specification. """ # define regex for identifying DRS URIs - _RE_DOMAIN_PART = r'[a-z0-9]([a-z0-9-]{1,61}[a-z0-9]?)?' + _RE_DOMAIN_PART = r"[a-z0-9]([a-z0-9-]{1,61}[a-z0-9]?)?" _RE_DOMAIN = rf"({_RE_DOMAIN_PART}\.)+{_RE_DOMAIN_PART}\.?" _RE_OBJECT_ID = rf"(?Pdrs:\/\/{_RE_DOMAIN}\/\S+)" # get absolute paths of file or directory (including subdirectories) logger.debug(f"Collecting file(s) for provided path '{path}'...") - files = abs_paths( - dir=path, - file_ext=file_types, - ) if os.path.isdir(path) else [path] + files = ( + abs_paths( + dir=path, + file_ext=file_types, + ) + if os.path.isdir(path) + else [path] + ) # replace any DRS URIs in any file in place for _file in files: @@ -72,7 +76,7 @@ def translate_drs_uris( pattern=_RE_OBJECT_ID, repl=partial( get_replacement_string, - ref='drs_uri', + ref="drs_uri", supported_access_methods=supported_access_methods, port=port, base_path=base_path, @@ -184,9 +188,7 @@ def get_access_url_from_drs( # get DRS object try: - object = client.get_object( - object_id=drs_uri - ) + object = client.get_object(object_id=drs_uri) except (ConnectionError, InvalidResponseError): logger.error(f"Could not connect to DRS host for DRS URI '{drs_uri}'.") raise InternalServerError @@ -208,8 +210,9 @@ def get_access_url_from_drs( for supported_method in supported_access_methods: try: access_url = str( - available_methods - [available_types.index(supported_method)].access_url.url + available_methods[ + available_types.index(supported_method) + ].access_url.url ) logger.info( f"Resolved DRS URI '{drs_uri}' to access link '{access_url}'." diff --git a/cwl_wes/version.py b/cwl_wes/version.py new file mode 100644 index 0000000..18e5ee6 --- /dev/null +++ b/cwl_wes/version.py @@ -0,0 +1,3 @@ +"""Single source of truth for package version.""" + +__version__ = "0.16.0" diff --git a/cwl_wes/worker.py b/cwl_wes/worker.py index a5fb293..d4bd7a5 100644 --- a/cwl_wes/worker.py +++ b/cwl_wes/worker.py @@ -1,8 +1,9 @@ """Entry point for Celery workers.""" + from foca.foca import Foca foca = Foca( config_file="config.yaml", - custom_config_model='cwl_wes.custom_config.CustomConfig', + custom_config_model="cwl_wes.custom_config.CustomConfig", ) -celery_app = foca.create_celery_app() \ No newline at end of file +celery_app = foca.create_celery_app() diff --git a/cwl_wes/wsgi.py b/cwl_wes/wsgi.py index 0763c51..b8e3dad 100644 --- a/cwl_wes/wsgi.py +++ b/cwl_wes/wsgi.py @@ -1,3 +1,3 @@ from cwl_wes.app import init_app -app = init_app() \ No newline at end of file +app = init_app() diff --git a/deployment/values.yaml b/deployment/values.yaml index fa913e6..b7916a0 100644 --- a/deployment/values.yaml +++ b/deployment/values.yaml @@ -88,8 +88,8 @@ wes: broker_port: 5672 result_backend: 'rpc://' include: - - cwl_wes.tasks.tasks.run_workflow - - cwl_wes.tasks.tasks.cancel_run + - cwl_wes.tasks.run_workflow + - cwl_wes.tasks.cancel_run monitor: timeout: 0.1 message_maxsize: 16777216 @@ -130,7 +130,6 @@ wes: default_value: '5' tags: known_tes_endpoints: 'https://tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|https://tesk.c01.k8s-popup.csc.fi/' - app_version: 0.15.0 # TES server tes: diff --git a/requirements.txt b/requirements.txt index a24a143..46397fc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,5 @@ foca==0.12.0 --e git+https://github.com/ohsu-comp-bio/cwl-tes.git@7b44cb1825a302bb7eccb3f2d91dc233adc0e32f#egg=cwl-tes +cwl-tes @ git+https://github.com/ohsu-comp-bio/cwl-tes.git@7b44cb1825a302bb7eccb3f2d91dc233adc0e32f#egg=cwl-tes drs-cli==0.2.3 gunicorn==19.9.0 py-tes==0.4.2 -python-dateutil==2.6.1 -importlib-metadata==4.13.0 \ No newline at end of file diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..cb10cdf --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,5 @@ +black~=22.12.0 +flake8~=6.0.0 +flake8-docstrings~=1.6.0 +mypy~=0.991 +pylint~=2.15.9 diff --git a/setup.py b/setup.py index 058e2df..12b07fc 100644 --- a/setup.py +++ b/setup.py @@ -1,30 +1,42 @@ -from setuptools import (setup, find_packages) +"""Package setup.""" -with open('README.md', 'r') as fh: - long_description = fh.read() +from pathlib import Path +from setuptools import setup, find_packages + +root_dir = Path(__file__).parent.resolve() + +exec(open(root_dir / "cwl_wes" / "version.py").read()) + +file_name = root_dir / "README.md" +with open(file_name, "r") as _file: + LONG_DESCRIPTION = _file.read() + +req = root_dir / "requirements.txt" +with open(req, "r") as _file: + INSTALL_REQUIRES = _file.read().splitlines() setup( - name='cwl-wes', - version='0.15.0', - author='Elixir Cloud & AAI', - author_email='alexander.kanitz@alumni.ethz.ch', - description='Flask- and MongoDB-powered GA4GH WES server', - long_description=long_description, + name="cwl-wes", + version=__version__, # noqa: F821 + author="Elixir Cloud & AAI", + author_email="alexander.kanitz@alumni.ethz.ch", + description="Flask- and MongoDB-powered GA4GH WES server", + long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", - license='Apache License 2.0', - url='https://github.com/elixir-cloud-aai/cwl-WES.git', + license="Apache License 2.0", + url="https://github.com/elixir-cloud-aai/cwl-WES.git", packages=find_packages(), keywords=( - 'ga4gh wes workflow elixir rest restful api app server openapi ' - 'swagger mongodb python flask' + "ga4gh wes workflow elixir rest restful api app server openapi " + "swagger mongodb python flask" ), classifiers=[ - 'License :: OSI Approved :: Apache Software License', - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Science/Research', - 'Topic :: Scientific/Engineering :: Bio-Informatics', - 'Natural Language :: English', - 'Programming Language :: Python :: 3.7', + "License :: OSI Approved :: Apache Software License", + "Development Status :: 3 - Alpha", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Natural Language :: English", + "Programming Language :: Python :: 3.7", ], - install_requires=['connexion', 'Flask-Cors', 'Flask-PyMongo'], + install_requires=INSTALL_REQUIRES, ) From ddb29d024b144e60c83009f25804579744e416f8 Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Sun, 1 Jan 2023 19:20:55 +0100 Subject: [PATCH 15/29] pylint & mypy pass --- cwl_wes/app.py | 16 +- cwl_wes/custom_config.py | 129 +++----- cwl_wes/exceptions.py | 4 +- cwl_wes/ga4gh/wes/endpoints/__init__.py | 2 +- cwl_wes/ga4gh/wes/endpoints/cancel_run.py | 86 ----- cwl_wes/ga4gh/wes/endpoints/get_run_log.py | 52 --- cwl_wes/ga4gh/wes/endpoints/get_run_status.py | 54 --- .../ga4gh/wes/endpoints/get_service_info.py | 50 ++- cwl_wes/ga4gh/wes/endpoints/list_runs.py | 78 ----- cwl_wes/ga4gh/wes/endpoints/run_workflow.py | 310 +++++++++--------- cwl_wes/ga4gh/wes/server.py | 175 ++++++++-- cwl_wes/ga4gh/wes/service_info.py | 28 +- cwl_wes/ga4gh/wes/states.py | 6 + cwl_wes/gunicorn.py | 25 +- cwl_wes/tasks/cancel_run.py | 20 +- cwl_wes/tasks/cwl_log_processor.py | 230 ++++++++----- cwl_wes/tasks/run_workflow.py | 9 +- cwl_wes/tasks/workflow_run_manager.py | 158 +++++---- cwl_wes/utils/controllers.py | 50 +++ cwl_wes/utils/db.py | 97 ++++-- cwl_wes/utils/drs.py | 57 ++-- cwl_wes/worker.py | 2 +- cwl_wes/wsgi.py | 2 + pylintrc | 4 + requirements.txt | 8 +- requirements_dev.txt | 2 +- setup.py | 11 +- 27 files changed, 823 insertions(+), 842 deletions(-) delete mode 100644 cwl_wes/ga4gh/wes/endpoints/cancel_run.py delete mode 100644 cwl_wes/ga4gh/wes/endpoints/get_run_log.py delete mode 100644 cwl_wes/ga4gh/wes/endpoints/get_run_status.py delete mode 100644 cwl_wes/ga4gh/wes/endpoints/list_runs.py create mode 100644 cwl_wes/utils/controllers.py create mode 100644 pylintrc diff --git a/cwl_wes/app.py b/cwl_wes/app.py index e31bfee..949da1c 100644 --- a/cwl_wes/app.py +++ b/cwl_wes/app.py @@ -1,5 +1,7 @@ """cwl-WES application entry point.""" +from pathlib import Path + from connexion import App from flask import current_app from foca import Foca @@ -9,15 +11,20 @@ def init_app() -> App: + """Initialize FOCA application. + + Returns: + App: FOCA application. + """ foca = Foca( - config_file="config.yaml", + config_file=Path("config.yaml"), custom_config_model="cwl_wes.custom_config.CustomConfig", ) app = foca.create_app() with app.app.app_context(): service_info = ServiceInfo() try: - service_info = service_info.get_service_info() + service_info.get_service_info() except NotFound: service_info.set_service_info( data=current_app.config.foca.custom.service_info.dict() @@ -26,9 +33,10 @@ def init_app() -> App: def run_app(app: App) -> None: + """Run FOCA application.""" app.run(port=app.port) if __name__ == "__main__": - app = init_app() - run_app(app) + my_app = init_app() + run_app(my_app) diff --git a/cwl_wes/custom_config.py b/cwl_wes/custom_config.py index 021b5d6..940192a 100644 --- a/cwl_wes/custom_config.py +++ b/cwl_wes/custom_config.py @@ -1,8 +1,13 @@ """Custom app config models.""" + +from pathlib import Path import string from typing import Dict, List, Optional + from foca.models.config import FOCABaseConfig +# pragma pylint: disable=too-few-public-methods + class StorageConfig(FOCABaseConfig): """Model for task run and storage configuration. @@ -17,21 +22,18 @@ class StorageConfig(FOCABaseConfig): permanent_dir: Permanent working directory path remote_storage_url: Remote file storage FTP endpoint - Raises: - pydantic.ValidationError: The class was instantianted with an illegal - data type. - Example: >>> StorageConfig( ... tmp_dir='/data/tmp', ... permanent_dir='/data/output', ... remote_storage_url='ftp://ftp.private/upload' ... ) - StorageConfig(tmp_dir='/data/tmp', permanent_dir='/data/output', remote_storage_url='ftp://ftp.private/upload') + StorageConfig(tmp_dir='/data/tmp', permanent_dir='/data/output', remote + orage_url='ftp://ftp.private/upload') """ - permanent_dir: str = "/data/output" - tmp_dir: str = "/data/tmp" + permanent_dir: Path = Path("/data/output") + tmp_dir: Path = Path("/data/tmp") remote_storage_url: str = "ftp://ftp-private.ebi.ac.uk/upload/foivos" @@ -46,10 +48,6 @@ class CeleryConfig(FOCABaseConfig): timeout: Celery task timeout. message_maxsize: Celery message max size. - Raises: - pydantic.ValidationError: The class was instantianted with an illegal - data type. - Example: >>> CeleryConfig( ... timeout=15, @@ -64,6 +62,7 @@ class CeleryConfig(FOCABaseConfig): class WorkflowTypeVersionConfig(FOCABaseConfig): """Workflow type versions supported by this service. + Args: workflow_type_version: List of one or more acceptable versions for the workflow type. @@ -72,10 +71,6 @@ class WorkflowTypeVersionConfig(FOCABaseConfig): workflow_type_version: List of one or more acceptable versions for the workflow type. - Raises: - pydantic.ValidationError: The class was instantianted with an illegal - data type. - Example: >>> WorkflowTypeVersionConfig( ... workflow_type_version=['v1.0'] @@ -99,17 +94,14 @@ class DefaultWorkflowEngineParameterConfig(FOCABaseConfig): type: Parameter type. default_value: Stringified version of default parameter. - Raises: - pydantic.ValidationError: The class was instantianted with an illegal - data type. - Example: >>> DefaultWorkflowEngineParameterConfig( ... name='name', ... type='str', ... default_value='default' ... ) - DefaultWorkflowEngineParameterConfig(name='name', type='str', default_value='default') + DefaultWorkflowEngineParameterConfig(name='name', type='str', default_v + alue='default') """ name: Optional[str] @@ -126,10 +118,6 @@ class TagsConfig(FOCABaseConfig): Attributes: known_tes_endpoints: Valid TES endpoints. - Raises: - pydantic.ValidationError: The class was instantianted with an illegal - data type. - Example: >>> TagsConfig( ... known_tes_endpoints='https://tes.endpoint', @@ -137,7 +125,7 @@ class TagsConfig(FOCABaseConfig): TagsConfig(known_tes_endpoints='https://tes.endpoint') """ - known_tes_endpoints: str = "https://tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|https://tesk.c01.k8s-popup.csc.fi/" + known_tes_endpoints: str class ServiceInfoConfig(FOCABaseConfig): @@ -145,41 +133,37 @@ class ServiceInfoConfig(FOCABaseConfig): Args: contact_info: Email address/webpage URL with contact information. - auth_instructions_url: Web page URL with information about how to get an - authorization token necessary to use a specific endpoint. + auth_instructions_url: Web page URL with information about how to get + an authorization token necessary to use a specific endpoint. supported_filesystem_protocols: Filesystem protocols supported by this service. supported_wes_versions: Version(s) of the WES schema supported by this service. - workflow_type_versions: Map with keys as the workflow format type name and - value is a `WorkflowTypeVersionConfig` object which simply contains an - array of one or more version strings. + workflow_type_versions: Map with keys as the workflow format type name + and value is a `WorkflowTypeVersionConfig` object which simply + contains an array of one or more version strings. workflow_engine_versions: Workflow engine(s) used by this WES service. - default_workflow_engine_parameters: Each workflow engine can present additional - parameters that can be sent to the workflow engine. - tags: A key-value map of arbitrary, extended metadata outside the scope of the above but - useful to report back. + default_workflow_engine_parameters: Each workflow engine can present + additional parameters that can be sent to the workflow engine. + tags: A key-value map of arbitrary, extended metadata outside the scope + of the above but useful to report back. Attributes: contact_info: Email address/webpage URL with contact information. - auth_instructions_url: Web page URL with information about how to get an - authorization token necessary to use a specific endpoint. + auth_instructions_url: Web page URL with information about how to get + an authorization token necessary to use a specific endpoint. supported_filesystem_protocols: Filesystem protocols supported by this service. supported_wes_versions: Version(s) of the WES schema supported by this service. - workflow_type_versions: Map with keys as the workflow format type name and - value is a `WorkflowTypeVersionConfig` object which simply contains an - array of one or more version strings. + workflow_type_versions: Map with keys as the workflow format type name + and value is a `WorkflowTypeVersionConfig` object which simply + contains an array of one or more version strings. workflow_engine_versions: Workflow engine(s) used by this WES service. - default_workflow_engine_parameters: Each workflow engine can present additional - parameters that can be sent to the workflow engine. - tags: A key-value map of arbitrary, extended metadata outside the scope of the above but - useful to report back. - - Raises: - pydantic.ValidationError: The class was instantianted with an illegal - data type. + default_workflow_engine_parameters: Each workflow engine can present + additional parameters that can be sent to the workflow engine. + tags: A key-value map of arbitrary, extended metadata outside the scope + of the above but useful to report back. Example: >>> ServiceInfoConfig( @@ -187,18 +171,22 @@ class ServiceInfoConfig(FOCABaseConfig): ... auth_instructions_url='https://auth.url', ... supported_filesystem_protocols=['ftp', 'https', 'local'], ... supported_wes_versions=['1.0.0'], - ... workflow_type_versions={'CWL': WorkflowTypeVersionConfig(workflow_type_version=['v1.0'])}, + ... workflow_type_versions={ + ... 'CWL': WorkflowTypeVersionConfig( + ... workflow_type_version=['v1.0'] + ... ) + ... }, ... workflow_engine_versions={}, ... default_workflow_engine_parameters=[], ... tags=TagsConfig(known_tes_endpoints='https://tes.endpoint/') ... ) - ServiceInfoConfig(contact_info='https://github.com/elixir-cloud-aai/cwl-WES', auth_instruc\ - tions_url='https://www.elixir-europe.org/services/compute/aai', supported_filesystem_proto\ - cols=['ftp', 'https', 'local'], supported_wes_versions=['1.0.0'], workflow_type_versions={\ - 'CWL': WorkflowTypeVersionConfig(workflow_type_version=['v1.0'])}, workflow_engine_version\ - s={}, default_workflow_engine_parameters=[], tags=TagsConfig(known_tes_endpoints='https://\ - tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|h\ - ttps://tesk.c01.k8s-popup.csc.fi/')) + ServiceInfoConfig(contact_info='https://github.com/elixir-cloud-aai/cwl + -WES', auth_instructions_url='https://www.elixir-europe.org/services/co + mpute/aai', supported_filesystem_protocols=['ftp', 'https', 'local'], s + upported_wes_versions=['1.0.0'], workflow_type_versions={'CWL': Workflo + wTypeVersionConfig(workflow_type_version=['v1.0'])}, workflow_engine_ve + rsions={}, default_workflow_engine_parameters=[], tags=TagsConfig(known + _tes_endpoints='https://tes.endpoint/')) """ contact_info: str = "https://github.com/elixir-cloud-aai/cwl-WES" @@ -230,20 +218,17 @@ class TesServerConfig(FOCABaseConfig): timeout: Request time out. status_query_params: Request query parameters. - Raises: - pydantic.ValidationError: The class was instantianted with an illegal - data type. - Example: >>> TesServerConfig( ... url='https://tes.endpoint', ... timeout=5, ... status_query_params='FULL' ... ) - TesServerConfig(url='https://tes.endpoint', timeout=5, status_query_params='FULL') + TesServerConfig(url='https://tes.endpoint', timeout=5, status_query_par + ams='FULL') """ - url: str = "https://csc-tesk.c03.k8s-popup.csc.fi/" + url: str timeout: int = 5 status_query_params: str = "FULL" @@ -269,10 +254,6 @@ class DRSServerConfig(FOCABaseConfig): set to `False` to use default (`https`). file_types: Extensions of files to scan for DRS URI resolution. - Raises: - pydantic.ValidationError: The class was instantianted with an illegal - data type. - Example: >>> DRSServerConfig( ... port=443, @@ -280,7 +261,8 @@ class DRSServerConfig(FOCABaseConfig): ... use_http=False, ... file_types=['cwl', 'yaml', 'yml'] ... ) - DRSServerConfig(port=443, base_path='ga4gh/drs/v1', use_http=False, file_types=['cwl', 'yaml', 'yml']) + DRSServerConfig(port=443, base_path='ga4gh/drs/v1', use_http=False, fil + e_types=['cwl', 'yaml', 'yml']) """ port: Optional[int] = None @@ -302,10 +284,6 @@ class IdConfig(FOCABaseConfig): a string of allowed characters. length: Length of returned string. - Raises: - pydantic.ValidationError: The class was instantianted with an illegal - data type. - Example: >>> IdConfig( ... charset='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', @@ -337,17 +315,14 @@ class ControllerConfig(FOCABaseConfig): drs_server: DRS Server config parameters. runs_id: Identifier config parameters. - Raises: - pydantic.ValidationError: The class was instantianted with an illegal - data type. - Example: >>> ControllerConfig( ... default_page_size=5, ... timeout_cancel_run=60, ... timeout_run_workflow=None ... ) - ControllerConfig(default_page_size=5, timeout_cancel_run=60, timeout_run_workflow=60) + ControllerConfig(default_page_size=5, timeout_cancel_run=60, timeout_ru + n_workflow=60) """ default_page_size: int = 5 @@ -372,10 +347,6 @@ class CustomConfig(FOCABaseConfig): celery: Celery config parameters. controller: Controller config parameters. service_info: Service Info config parameters. - - Raises: - pydantic.ValidationError: The class was instantianted with an illegal - data type. """ storage: StorageConfig = StorageConfig() diff --git a/cwl_wes/exceptions.py b/cwl_wes/exceptions.py index ea6542b..abccce5 100644 --- a/cwl_wes/exceptions.py +++ b/cwl_wes/exceptions.py @@ -1,3 +1,5 @@ +"""cwl-WES exceptions.""" + from connexion.exceptions import ( BadRequestProblem, ExtraParameterProblem, @@ -12,8 +14,6 @@ class WorkflowNotFound(ProblemException, NotFound): """WorkflowNotFound(404) error compatible with Connexion.""" - pass - exceptions = { Exception: { diff --git a/cwl_wes/ga4gh/wes/endpoints/__init__.py b/cwl_wes/ga4gh/wes/endpoints/__init__.py index 934f923..856f49d 100644 --- a/cwl_wes/ga4gh/wes/endpoints/__init__.py +++ b/cwl_wes/ga4gh/wes/endpoints/__init__.py @@ -1 +1 @@ -"""cwl-WES controllers subpackage.""" +"""cwl-WES controllers helper functions.""" diff --git a/cwl_wes/ga4gh/wes/endpoints/cancel_run.py b/cwl_wes/ga4gh/wes/endpoints/cancel_run.py deleted file mode 100644 index 1e564b8..0000000 --- a/cwl_wes/ga4gh/wes/endpoints/cancel_run.py +++ /dev/null @@ -1,86 +0,0 @@ -"""Utility functions for POST /runs/{run_id}/cancel endpoints.""" - -import logging -from typing import Dict - -from celery import Celery, uuid -from connexion.exceptions import Forbidden -from flask import Config -from pymongo.collection import Collection - -from cwl_wes.exceptions import WorkflowNotFound -from cwl_wes.ga4gh.wes.states import States -from cwl_wes.tasks.cancel_run import task__cancel_run - - -# Get logger instance -logger = logging.getLogger(__name__) - - -# Utility function for endpoint POST /runs//delete -def cancel_run( - config: Config, celery_app: Celery, run_id: str, *args, **kwargs -) -> Dict: - """Cancels running workflow.""" - foca_config = config.foca - collection_runs: Collection = ( - foca_config.db.dbs["cwl-wes-db"].collections["runs"].client - ) - document = collection_runs.find_one( - filter={"run_id": run_id}, - projection={ - "user_id": True, - "task_id": True, - "api.state": True, - "_id": False, - }, - ) - - # Raise error if workflow run was not found - if not document: - logger.error("Run '{run_id}' not found.".format(run_id=run_id)) - raise WorkflowNotFound - - # Raise error trying to access workflow run that is not owned by user - # Only if authorization enabled - if "user_id" in kwargs and document["user_id"] != kwargs["user_id"]: - logger.error( - ( - "User '{user_id}' is not allowed to access workflow run " - "'{run_id}'." - ).format( - user_id=kwargs["user_id"], - run_id=run_id, - ) - ) - raise Forbidden - - # Cancel unfinished workflow run in background - if document["api"]["state"] in States.CANCELABLE: - - # Get timeout duration - timeout_duration = foca_config.custom.controller.timeout_cancel_run - - # Execute cancelation task in background - task_id = uuid() - logger.info( - ( - "Canceling run '{run_id}' as background task " "'{task_id}'..." - ).format( - run_id=run_id, - task_id=task_id, - ) - ) - task__cancel_run.apply_async( - None, - { - "run_id": run_id, - "task_id": document["task_id"], - "token": kwargs.get("jwt"), - }, - task_id=task_id, - soft_time_limit=timeout_duration, - ) - - response = {"run_id": run_id} - return response diff --git a/cwl_wes/ga4gh/wes/endpoints/get_run_log.py b/cwl_wes/ga4gh/wes/endpoints/get_run_log.py deleted file mode 100644 index 2131522..0000000 --- a/cwl_wes/ga4gh/wes/endpoints/get_run_log.py +++ /dev/null @@ -1,52 +0,0 @@ -"""Utility function for GET /runs/{run_id} endpoint.""" - -import logging -from typing import Dict - -from connexion.exceptions import Forbidden -from flask import Config -from pymongo.collection import Collection - -from cwl_wes.exceptions import WorkflowNotFound - -# Get logger instance -logger = logging.getLogger(__name__) - - -# Utility function for endpoint GET /runs/ -def get_run_log(config: Config, run_id: str, *args, **kwargs) -> Dict: - """Gets detailed log information for specific run.""" - collection_runs: Collection = ( - config.foca.db.dbs["cwl-wes-db"].collections["runs"].client - ) - document = collection_runs.find_one( - filter={"run_id": run_id}, - projection={ - "user_id": True, - "api": True, - "_id": False, - }, - ) - - # Raise error if workflow run was not found or has no task ID - if document: - run_log = document["api"] - else: - logger.error("Run '{run_id}' not found.".format(run_id=run_id)) - raise WorkflowNotFound - - # Raise error trying to access workflow run that is not owned by user - # Only if authorization enabled - if "user_id" in kwargs and document["user_id"] != kwargs["user_id"]: - logger.error( - ( - "User '{user_id}' is not allowed to access workflow run " - "'{run_id}'." - ).format( - user_id=kwargs["user_id"], - run_id=run_id, - ) - ) - raise Forbidden - - return run_log diff --git a/cwl_wes/ga4gh/wes/endpoints/get_run_status.py b/cwl_wes/ga4gh/wes/endpoints/get_run_status.py deleted file mode 100644 index 256a596..0000000 --- a/cwl_wes/ga4gh/wes/endpoints/get_run_status.py +++ /dev/null @@ -1,54 +0,0 @@ -"""Utility function for GET /runs/{run_id}/status endpoint.""" - -import logging -from typing import Dict - -from connexion.exceptions import Forbidden -from flask import Config -from pymongo.collection import Collection - -from cwl_wes.exceptions import WorkflowNotFound - - -# Get logger instance -logger = logging.getLogger(__name__) - - -# Utility function for endpoint GET /runs//status -def get_run_status(config: Config, run_id: str, *args, **kwargs) -> Dict: - """Gets status information for specific run.""" - collection_runs: Collection = ( - config.foca.db.dbs["cwl-wes-db"].collections["runs"].client - ) - document = collection_runs.find_one( - filter={"run_id": run_id}, - projection={ - "user_id": True, - "api.state": True, - "_id": False, - }, - ) - - # Raise error if workflow run was not found or has no task ID - if document: - state = document["api"]["state"] - else: - logger.error("Run '{run_id}' not found.".format(run_id=run_id)) - raise WorkflowNotFound - - # Raise error trying to access workflow run that is not owned by user - # Only if authorization enabled - if "user_id" in kwargs and document["user_id"] != kwargs["user_id"]: - logger.error( - ( - "User '{user_id}' is not allowed to access workflow run " - "'{run_id}'." - ).format( - user_id=kwargs["user_id"], - run_id=run_id, - ) - ) - raise Forbidden - - response = {"run_id": run_id, "state": state} - return response diff --git a/cwl_wes/ga4gh/wes/endpoints/get_service_info.py b/cwl_wes/ga4gh/wes/endpoints/get_service_info.py index 715211d..6ef16dd 100644 --- a/cwl_wes/ga4gh/wes/endpoints/get_service_info.py +++ b/cwl_wes/ga4gh/wes/endpoints/get_service_info.py @@ -6,11 +6,12 @@ from typing import Any, Dict from flask import Config +from foca.utils.db import find_id_latest, find_one_latest from pymongo import collection as Collection from cwl_wes.ga4gh.wes.states import States -import cwl_wes.utils.db_utils as db_utils +# pragma pylint: disable=unused-argument # Get logger instance logger = logging.getLogger(__name__) @@ -18,10 +19,24 @@ # Helper function GET /service-info def get_service_info( - config: Config, silent: bool = False, *args: Any, **kwarg: Any + config: Config, + *args: Any, + silent: bool = False, + **kwarg: Any, ): - """Returns readily formatted service info or `None` (in silent mode); - creates service info database document if it does not exist.""" + """Get formatted service info. + + Creates service info database document if it does not exist. + + Args: + config: App configuration. + *args: Variable length argument list. + silent: Whether to return service info or `None` (in silent mode). + **kwargs: Arbitrary keyword arguments. + + Returns: + Readily formatted service info, or `None` (in silent mode); + """ collection_service_info: Collection.Collection = ( config.foca.db.dbs["cwl-wes-db"].collections["service_info"].client ) @@ -31,13 +46,9 @@ def get_service_info( service_info = deepcopy(config.foca.custom.service_info.dict()) # Write current service info to database if absent or different from latest - if not service_info == db_utils.find_one_latest(collection_service_info): + if not service_info == find_one_latest(collection_service_info): collection_service_info.insert(service_info) - logger.info( - "Updated service info: {service_info}".format( - service_info=service_info, - ) - ) + logger.info(f"Updated service info: {service_info}") else: logger.debug("No change in service info. Not updated.") @@ -51,7 +62,7 @@ def get_service_info( ) # Add timestamps - _id = db_utils.find_id_latest(collection_service_info) + _id = find_id_latest(collection_service_info) if _id: service_info["tags"]["last_service_info_update"] = _id.generation_time service_info["tags"]["current_time"] = datetime.utcnow().isoformat() @@ -60,7 +71,14 @@ def get_service_info( def __get_system_state_counts(collection: Collection) -> Dict[str, int]: - """Gets current system state counts.""" + """Get current system state counts. + + Args: + collection: MongoDB collection object. + + Returns: + Dictionary of counts per state. + """ current_counts = __init_system_state_counts() # Query database for workflow run states @@ -80,7 +98,9 @@ def __get_system_state_counts(collection: Collection) -> Dict[str, int]: def __init_system_state_counts() -> Dict[str, int]: - """Initializes system state counts.""" - # TODO: Get states programmatically or define as enum - # Set all state counts to zero + """Initialize system state counts. + + Returns: + Dictionary of state counts, inititalized to zero. + """ return {state: 0 for state in States.ALL} diff --git a/cwl_wes/ga4gh/wes/endpoints/list_runs.py b/cwl_wes/ga4gh/wes/endpoints/list_runs.py deleted file mode 100644 index c135db3..0000000 --- a/cwl_wes/ga4gh/wes/endpoints/list_runs.py +++ /dev/null @@ -1,78 +0,0 @@ -"""Utility function for GET /runs endpoint.""" - -import logging -from typing import Dict - -from bson.objectid import ObjectId -from flask import Config -from pymongo.collection import Collection - -# Get logger instance -logger = logging.getLogger(__name__) - - -# Utility function for endpoint GET /runs -def list_runs(config: Config, *args, **kwargs) -> Dict: - """Lists IDs and status for all workflow runs.""" - collection_runs: Collection = ( - config.foca.db.dbs["cwl-wes-db"].collections["runs"].client - ) - - # Fall back to default page size if not provided by user - if "page_size" in kwargs: - page_size = kwargs["page_size"] - else: - page_size = config.foca.custom.controller.default_page_size - - # Extract/set page token - if "page_token" in kwargs: - page_token = kwargs["page_token"] - else: - page_token = "" - - # Initialize filter dictionary - filter_dict = {} - - # Add filter for user-owned runs if user ID is available - if "user_id" in kwargs: - filter_dict["user_id"] = kwargs["user_id"] - - # Add pagination filter based on last object ID - if page_token != "": - filter_dict["_id"] = {"$lt": ObjectId(page_token)} - - # Query database for workflow runs - cursor = ( - collection_runs.find( - filter=filter_dict, - projection={ - "run_id": True, - "api.state": True, - } - # Sort results by descending object ID (+/- newest to oldest) - ) - .sort( - "_id", - -1 - # Implement page size limit - ) - .limit(page_size) - ) - - # Convert cursor to list - runs_list = list(cursor) - - # Get next page token from ID of last run in cursor - if runs_list: - next_page_token = str(runs_list[-1]["_id"]) - else: - next_page_token = "" - - # Reshape list of runs - for run in runs_list: - del run["_id"] - run["state"] = run["api"]["state"] - del run["api"] - - # Build and return response - return {"next_page_token": next_page_token, "runs": runs_list} diff --git a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py index ec527a4..33dda8a 100644 --- a/cwl_wes/ga4gh/wes/endpoints/run_workflow.py +++ b/cwl_wes/ga4gh/wes/endpoints/run_workflow.py @@ -2,18 +2,17 @@ from json import decoder, loads import logging -import os -from random import choice +from pathlib import Path import re import shutil -import string # noqa: F401 import subprocess -from typing import Dict, List, Optional +from typing import Dict from celery import uuid -from flask import current_app, Config, request +from flask import Config, request +from foca.utils.misc import generate_id from pymongo.collection import Collection -from pymongo.errors import DuplicateKeyError +from pymongo.errors import DuplicateKeyError, PyMongoError from yaml import dump from werkzeug.datastructures import ImmutableMultiDict from werkzeug.utils import secure_filename @@ -22,6 +21,8 @@ from cwl_wes.tasks.run_workflow import task__run_workflow from cwl_wes.utils.drs import translate_drs_uris +# pragma pylint: disable=unused-argument + # Get logger instance logger = logging.getLogger(__name__) @@ -30,7 +31,17 @@ def run_workflow( config: Config, form_data: ImmutableMultiDict, *args, **kwargs ) -> Dict: - """Executes workflow and save info to database; returns unique run id.""" + """Execute workflow and save info to database. + + Args: + config: Flask configuration object. + form_data: Form data from POST /runs request. + *args: Variable length argument list. + **kwargs: Arbitrary keyword arguments. + + Returns: + Unique run id. + """ # Validate data and prepare run environment form_data_dict = __immutable_multi_dict_to_nested_dict( multi_dict=form_data @@ -49,18 +60,34 @@ def run_workflow( return response -def __secure_join(basedir: str, fname: str) -> str: +def __secure_join(basedir: Path, fname: str) -> Path: + """Generate a secure path for a file. + + Args: + basedir: Base directory. + fname: Filename. + + Returns: + Secure path. + """ fname = secure_filename(fname) if not fname: # Replace by a random filename fname = uuid() - return os.path.join(basedir, fname) + return basedir / fname def __immutable_multi_dict_to_nested_dict( multi_dict: ImmutableMultiDict, ) -> Dict: - """Converts ImmutableMultiDict to nested dictionary.""" + """Convert ImmutableMultiDict to nested dictionary. + + Args: + multi_dict: Immutable multi dictionary. + + Returns: + Nested dictionary. + """ # Convert to flat dictionary nested_dict = multi_dict.to_dict(flat=True) for key in nested_dict: @@ -73,9 +100,13 @@ def __immutable_multi_dict_to_nested_dict( def __validate_run_workflow_request(data: Dict) -> None: - """Validates presence and types of workflow run request form data; sets - defaults for optional fields.""" + """Validate workflow run request form data. + + Set defaults for optional fields. + Args: + data: Workflow run request form data. + """ # The form data is not validated properly because all types except # 'workflow_attachment' are string and none are labeled as required # Considering the 'RunRequest' model in the specs, the following @@ -144,62 +175,72 @@ def __validate_run_workflow_request(data: Dict) -> None: logger.error("POST request does not conform to schema.") raise BadRequest - return None - def __check_service_info_compatibility(data: Dict) -> None: - """Checks compatibility with service info; raises BadRequest.""" + """Check compatibility with service info. Not implemented.""" # TODO: implement - return None def __init_run_document(data: Dict) -> Dict: - """Initializes workflow run document.""" - document: Dict = dict() - document["api"] = dict() - document["internal"] = dict() + """Initialize workflow run document. + + Args: + data: Workflow run request form data. + + Returns: + Workflow run document. + """ + document: Dict = {} + document["api"] = {} + document["internal"] = {} document["api"]["request"] = data document["api"]["state"] = "UNKNOWN" - document["api"]["run_log"] = dict() - document["api"]["task_logs"] = list() - document["api"]["outputs"] = dict() + document["api"]["run_log"] = {} + document["api"]["task_logs"] = [] + document["api"]["outputs"] = {} return document def __create_run_environment(config: Config, document: Dict, **kwargs) -> Dict: - """Creates unique run identifier and permanent and temporary storage - directories for current run.""" + """Create run environment. + + Create unique run identifier and permanent and temporary storage + directories for current run. + + Args: + config: Flask configuration object. + document: Workflow run document. + **kwargs: Additional keyword arguments. + + Returns: + Workflow run documument. + """ collection_runs: Collection = ( config.foca.db.dbs["cwl-wes-db"].collections["runs"].client ) - out_dir = config.foca.custom.storage.permanent_dir - tmp_dir = config.foca.custom.storage.tmp_dir - run_id_charset = eval(config.foca.custom.controller.runs_id.charset) - run_id_length = config.foca.custom.controller.runs_id.length + controller_conf = config.foca.custom.controller + info_conf = config.foca.custom.service_info + storage_conf = config.foca.custom.storage # Keep on trying until a unique run id was found and inserted - # TODO: If no more possible IDs => inf loop; fix (raise custom error; 500 - # to user) + # TODO: If no more possible IDs => inf loop; fix while True: # Create unique run and task ids - run_id = __create_run_id( - charset=run_id_charset, - length=run_id_length, + run_id = generate_id( + charset=controller_conf.runs_id.charset, + length=controller_conf.runs_id.length, ) task_id = uuid() # Set temporary and output directories - current_tmp_dir = os.path.abspath(os.path.join(tmp_dir, run_id)) - current_out_dir = os.path.abspath(os.path.join(out_dir, run_id)) + current_tmp_dir = storage_conf.tmp_dir.resolve() / run_id + current_out_dir = storage_conf.permanent_dir.resolve() / run_id # Try to create workflow run directory (temporary) try: - # TODO: Think about permissions - # TODO: Add working dir (currently one has to run the app from - # outermost dir) - os.makedirs(current_tmp_dir) - os.makedirs(current_out_dir) + current_tmp_dir.mkdir(parents=True, exist_ok=True) + current_out_dir.mkdir(parents=True, exist_ok=True) # Try new run id if directory already exists except FileExistsError: @@ -212,8 +253,8 @@ def __create_run_environment(config: Config, document: Dict, **kwargs) -> Dict: document["user_id"] = kwargs["user_id"] else: document["user_id"] = None - document["internal"]["tmp_dir"] = current_tmp_dir - document["internal"]["out_dir"] = current_out_dir + document["internal"]["tmp_dir"] = str(current_tmp_dir) + document["internal"]["out_dir"] = str(current_out_dir) # Process worflow attachments document = __process_workflow_attachments(document) @@ -233,46 +274,38 @@ def __create_run_environment(config: Config, document: Dict, **kwargs) -> Dict: # Catch other database errors # TODO: implement properly - except Exception as e: + except PyMongoError as exc: print("Database error") - print(e) + print(exc) break # Exit loop break # translate DRS URIs to access URLs - drs_server_conf = current_app.config.foca.custom.controller.drs_server - service_info_conf = current_app.config.foca.custom.service_info - file_types: List[str] = drs_server_conf.file_types - supported_access_methods: List[ - str - ] = service_info_conf.supported_filesystem_protocols - port: Optional[int] = drs_server_conf.port - base_path: Optional[str] = drs_server_conf.base_path - use_http: bool = drs_server_conf.use_http translate_drs_uris( path=document["internal"]["workflow_files"], - file_types=file_types, - supported_access_methods=supported_access_methods, - port=port, - base_path=base_path, - use_http=use_http, + file_types=controller_conf.drs_server.file_types, + supported_access_methods=info_conf.supported_filesystem_protocols, + port=controller_conf.drs_server.port, + base_path=controller_conf.drs_server.base_path, + use_http=controller_conf.drs_server.use_http, ) return document -def __create_run_id(charset: str = "0123456789", length: int = 6) -> str: - """Creates random run ID.""" - return "".join(choice(charset) for __ in range(length)) +def __process_workflow_attachments( # pylint: disable=too-many-branches + data: Dict, +) -> Dict: + """Process workflow attachments. + Args: + data: Workflow run document. -def __process_workflow_attachments(data: Dict) -> Dict: - """Processes workflow attachments.""" - # TODO: implement properly - # Current workaround until processing of workflow attachments is - # implemented + Returns: + Workflow run document. + """ # Use 'workflow_url' for path to (main) CWL workflow file on local file # system or in Git repo # Use 'workflow_params' or file in Git repo to generate YAML file @@ -302,43 +335,36 @@ def __process_workflow_attachments(data: Dict) -> Dict: # specified, are: ',', ';', ':', '|' re_git_file = re.compile( ( - r"^(https?:.*)\/(blob|src|tree)\/(.*?)\/(.*?\.(cwl|yml|yaml|json))" - r"[,:;|]?(.*\.(yml|yaml|json))?" + r"^(?Phttps?:.*)\/(blob|src|tree)\/" + r"(?P.*?)\/(?P.*?\.(cwl|yml|yaml|json))" + r"[,:;|]?(?P.*\.(yml|yaml|json))?" ) ) # Create directory for storing workflow files - data["internal"]["workflow_files"] = workflow_dir = os.path.abspath( - os.path.join(data["internal"]["out_dir"], "workflow_files") - ) - try: - os.mkdir(workflow_dir) - - except OSError: - # TODO: Do something more reasonable here - pass + workflow_dir = Path(data["internal"]["out_dir"]) / "workflow_files" + data["internal"]["workflow_files"] = str(workflow_dir) + workflow_dir.mkdir() # Get main workflow file - user_string = data["api"]["request"]["workflow_url"] - m = re_git_file.match(user_string) + match = re_git_file.match(data["api"]["request"]["workflow_url"]) # Get workflow from Git repo if regex matches - if m: - - repo_url = ".".join([m.group(1), "git"]) - branch_commit = m.group(3) - cwl_path = m.group(4) + if match: # Try to clone repo if not subprocess.run( - ["git", "clone", repo_url, os.path.join(workflow_dir, "repo")], + [ + "git", + "clone", + match.group("repo_url") + ".git", + str(workflow_dir / "repo"), + ], check=True, ): logger.error( - ( - "Could not clone Git repository. Check value of " - "'workflow_url' in run request." - ) + "Could not clone Git repository. Check value of " + "'workflow_url' in run request." ) raise BadRequest @@ -347,25 +373,23 @@ def __process_workflow_attachments(data: Dict) -> Dict: [ "git", "--git-dir", - os.path.join(workflow_dir, "repo", ".git"), + str(workflow_dir / "repo" / ".git"), "--work-tree", - os.path.join(workflow_dir, "repo"), + str(workflow_dir / "repo"), "checkout", - branch_commit, + match.group("branch_commit"), ], check=True, ): logger.error( - ( - "Could not checkout repository commit/branch. Check value " - "of 'workflow_url' in run request." - ) + "Could not checkout repository commit/branch. Check value " + "of 'workflow_url' in run request." ) raise BadRequest # Set CWL path - data["internal"]["cwl_path"] = os.path.join( - workflow_dir, "repo", cwl_path + data["internal"]["cwl_path"] = str( + workflow_dir / "repo" / match.group("cwl_path") ) # Else assume value of 'workflow_url' represents file on local file system, @@ -388,23 +412,16 @@ def __process_workflow_attachments(data: Dict) -> Dict: workflow_url = __secure_join( workflow_dir, req_data["workflow_url"] ) - if os.path.exists(workflow_url): - req_data["workflow_url"] = workflow_url + if workflow_url.exists(): + req_data["workflow_url"] = str(workflow_url) # Set main CWL workflow file path - data["internal"]["cwl_path"] = os.path.abspath( - data["api"]["request"]["workflow_url"] - ) - - # Extract name and extensions of workflow - workflow_name_ext = os.path.splitext( - os.path.basename(data["internal"]["cwl_path"]) + data["internal"]["cwl_path"] = str( + Path(data["api"]["request"]["workflow_url"]).resolve() ) # Get parameter file - workflow_name_ext = os.path.splitext( - os.path.basename(data["internal"]["cwl_path"]) - ) + workflow_base_name = Path(data["internal"]["cwl_path"]).stem # Try to get parameters from 'workflow_params' field if data["api"]["request"]["workflow_params"]: @@ -412,16 +429,14 @@ def __process_workflow_attachments(data: Dict) -> Dict: # Replace `DRS URIs` in 'workflow_params' # replace_drs_uris(data['api']['request']['workflow_params']) - data["internal"]["param_file_path"] = os.path.join( - workflow_dir, - ".".join( - [ - str(workflow_name_ext[0]), - "yml", - ] - ), + data["internal"]["param_file_path"] = str( + workflow_dir / f"{workflow_base_name}.yml" ) - with open(data["internal"]["param_file_path"], "w") as yaml_file: + with open( + data["internal"]["param_file_path"], + mode="w", + encoding="utf-8", + ) as yaml_file: dump( data["api"]["request"]["workflow_params"], yaml_file, @@ -430,43 +445,28 @@ def __process_workflow_attachments(data: Dict) -> Dict: ) # Or from provided relative file path in repo - elif m and m.group(6): - param_path = m.group(6) - data["internal"]["param_file_path"] = os.path.join( - workflow_dir, - "repo", - param_path, + elif match and match.group("params_path"): + data["internal"]["param_file_path"] = str( + workflow_dir / "repo" / match.group("params_path") ) # Else try to see if there is a 'yml', 'yaml' or 'json' file with exactly # the same basename as CWL in same dir else: - param_file_extensions = ["yml", "yaml", "json"] - for ext in param_file_extensions: - possible_param_file = os.path.join( - workflow_dir, - "repo", - ".".join( - [ - str(workflow_name_ext[0]), - ext, - ] - ), + for ext in ["yml", "yaml", "json"]: + candidate_file = ( + workflow_dir / "repo" / f"{workflow_base_name}.{ext}" ) - if os.path.isfile(possible_param_file): - data["internal"]["param_file_path"] = possible_param_file + if candidate_file.is_file(): + data["internal"]["param_file_path"] = str(candidate_file) break - # Raise BadRequest if not parameter file was found + # Raise BadRequest if no parameter file was found if "param_file_path" not in data["internal"]: raise BadRequest # Extract workflow attachments from form data dictionary if "workflow_attachment" in data["api"]["request"]: - - # TODO: do something with data['workflow_attachment'] - - # Strip workflow attachments from data del data["api"]["request"]["workflow_attachment"] # Return form data stripped of workflow attachments @@ -474,7 +474,16 @@ def __process_workflow_attachments(data: Dict) -> Dict: def __run_workflow(config: Config, document: Dict, **kwargs) -> None: - """Helper function `run_workflow()`.""" + """Run workflow helper function. + + Args: + config: Flask configuration object. + document: Workflow run document. + **kwargs: Additional keyword arguments. + + Raises: + BadRequest: If workflow run fails. + """ tes_url = config.foca.custom.controller.tes_server.url remote_storage_url = config.foca.custom.storage.remote_storage_url run_id = document["run_id"] @@ -529,14 +538,8 @@ def __run_workflow(config: Config, document: Dict, **kwargs) -> None: # Execute command as background task logger.info( - ( - "Starting execution of run '{run_id}' as task '{task_id}' in " - "'{tmp_dir}'..." - ).format( - run_id=run_id, - task_id=task_id, - tmp_dir=tmp_dir, - ) + f"Starting execution of run '{run_id}' as task '{task_id}' in: " + f"{tmp_dir}" ) task__run_workflow.apply_async( None, @@ -548,4 +551,3 @@ def __run_workflow(config: Config, document: Dict, **kwargs) -> None: task_id=task_id, soft_time_limit=timeout_duration, ) - return None diff --git a/cwl_wes/ga4gh/wes/server.py b/cwl_wes/ga4gh/wes/server.py index e510c2c..0e06676 100644 --- a/cwl_wes/ga4gh/wes/server.py +++ b/cwl_wes/ga4gh/wes/server.py @@ -1,20 +1,23 @@ """Controller for GA4GH WES API endpoints.""" import logging +from typing import Dict, Optional -from celery import current_app as celery_app +from bson.objectid import ObjectId +from celery import uuid from connexion import request from flask import current_app +from pymongo.collection import Collection from foca.utils.logging import log_traffic -from cwl_wes.ga4gh.wes.endpoints.cancel_run import cancel_run -from cwl_wes.ga4gh.wes.endpoints.get_run_log import get_run_log -from cwl_wes.ga4gh.wes.endpoints.get_run_status import get_run_status -from cwl_wes.ga4gh.wes.endpoints.list_runs import list_runs from cwl_wes.ga4gh.wes.endpoints.run_workflow import run_workflow from cwl_wes.ga4gh.wes.endpoints.get_service_info import get_service_info +from cwl_wes.ga4gh.wes.states import States +from cwl_wes.tasks.cancel_run import task__cancel_run +from cwl_wes.utils.controllers import get_document_if_allowed +# pragma pylint: disable=invalid-name,unused-argument # Get logger instance logger = logging.getLogger(__name__) @@ -22,59 +25,167 @@ # GET /runs/ @log_traffic -def GetRunLog(run_id, *args, **kwargs): - """Returns detailed run info.""" - response = get_run_log( - config=current_app.config, run_id=run_id, *args, **kwargs +def GetRunLog(run_id, *args, **kwargs) -> Dict: + """Get detailed run info. + + Returns: + Run info object. + """ + document = get_document_if_allowed( + config=current_app.config, + run_id=run_id, + projection={ + "user_id": True, + "api": True, + "_id": False, + }, + user_id=kwargs.get("user_id"), ) - return response + assert "api" in document, "'api' key not in document" + return document["api"] # POST /runs//cancel @log_traffic -def CancelRun(run_id, *args, **kwargs): - """Cancels unfinished workflow run.""" - response = cancel_run( +def CancelRun(run_id, *args, **kwargs) -> Dict: + """Cancel unfinished workflow run. + + Returns: + Run identifier object. + """ + document = get_document_if_allowed( config=current_app.config, - celery_app=celery_app, run_id=run_id, - *args, - **kwargs + projection={ + "user_id": True, + "task_id": True, + "api.state": True, + "_id": False, + }, + user_id=kwargs.get("user_id"), ) - return response + assert "api" in document, "'api' key not in document" + assert "state" in document["api"], "'state' key not in document['api']" + + if document["api"]["state"] in States.CANCELABLE: + timeout_duration = ( + current_app.config.foca.custom.controller.timeout_cancel_run + ) + task_id = uuid() + logger.info(f"Canceling run '{run_id}' as background task: {task_id}") + task__cancel_run.apply_async( + None, + { + "run_id": run_id, + "task_id": document["task_id"], + "token": kwargs.get("jwt"), + }, + task_id=task_id, + soft_time_limit=timeout_duration, + ) + + return {"run_id": run_id} # GET /runs//status @log_traffic -def GetRunStatus(run_id, *args, **kwargs): - """Returns run status.""" - response = get_run_status( - config=current_app.config, run_id=run_id, *args, **kwargs +def GetRunStatus(run_id, *args, **kwargs) -> Dict: + """Get run status. + + Returns: + Run status object. + """ + document = get_document_if_allowed( + config=current_app.config, + run_id=run_id, + projection={ + "user_id": True, + "api.state": True, + "_id": False, + }, + user_id=kwargs.get("user_id"), ) - return response + assert "api" in document, "'api' key not in document" + assert "state" in document["api"], "'state' key not in document['api']" + return {"run_id": run_id, "state": document["api"]["state"]} # GET /service-info @log_traffic -def GetServiceInfo(*args, **kwargs): - """Returns service info.""" - response = get_service_info(config=current_app.config, *args, **kwargs) +def GetServiceInfo(*args, **kwargs) -> Optional[Dict]: + """Get service info. + + Returns: + Service info object. + """ + response = get_service_info( + config=current_app.config, + *args, + **kwargs, + ) return response # GET /runs @log_traffic -def ListRuns(*args, **kwargs): - """Lists IDs and status of all workflow runs.""" - response = list_runs(config=current_app.config, *args, **kwargs) - return response +def ListRuns(*args, **kwargs) -> Dict: + """List IDs and status of all workflow runs. + + Returns: + Run list object. + """ + collection_runs: Collection = ( + current_app.config.foca.db.dbs["cwl-wes-db"].collections["runs"].client + ) + page_size = kwargs.get( + "page_size", + current_app.config.foca.custom.controller.default_page_size, + ) + page_token = kwargs.get("page_token", "") + + filter_dict = {} + if "user_id" in kwargs: + filter_dict["user_id"] = kwargs["user_id"] + if page_token != "": + filter_dict["_id"] = {"$lt": ObjectId(page_token)} + cursor = ( + collection_runs.find( + filter=filter_dict, + projection={ + "run_id": True, + "api.state": True, + }, + ) + .sort("_id", -1) + .limit(page_size) + ) + runs_list = list(cursor) + + if runs_list: + next_page_token = str(runs_list[-1]["_id"]) + else: + next_page_token = "" + + for run in runs_list: + del run["_id"] + run["state"] = run["api"]["state"] + del run["api"] + + return {"next_page_token": next_page_token, "runs": runs_list} # POST /runs @log_traffic -def RunWorkflow(*args, **kwargs): - """Executes workflow.""" +def RunWorkflow(*args, **kwargs) -> Dict: + """Trigger workflow run. + + Returns: + Run identifier object. + """ response = run_workflow( - config=current_app.config, form_data=request.form, *args, **kwargs + config=current_app.config, + form_data=request.form, + *args, + **kwargs, ) return response diff --git a/cwl_wes/ga4gh/wes/service_info.py b/cwl_wes/ga4gh/wes/service_info.py index 0bcf351..5d73591 100644 --- a/cwl_wes/ga4gh/wes/service_info.py +++ b/cwl_wes/ga4gh/wes/service_info.py @@ -1,4 +1,4 @@ -"""Controllers for the `/service-info route.""" +"""Controller for the `/service-info route.""" import logging from typing import Dict @@ -17,19 +17,21 @@ class ServiceInfo: - def __init__(self) -> None: - """Class for WES API service info server-side controller methods. + """Class for WES API service info server-side controller methods. - Creates service info upon first request, if it does not exist. + Creates service info upon first request, if it does not exist. - Attributes: - config: App configuration. - foca_config: FOCA configuration. - db_client_service_info: Database collection storing service info - objects. - db_client_runs: Database collection storing workflow run objects. - object_id: Database identifier for service info. - """ + Attributes: + config: App configuration. + foca_config: FOCA configuration. + db_client_service_info: Database collection storing service info + objects. + db_client_runs: Database collection storing workflow run objects. + object_id: Database identifier for service info. + """ + + def __init__(self) -> None: + """Construct class instance.""" self.config: Dict = current_app.config self.foca_config: Config = self.config.foca self.db_client_service_info: Collection = ( @@ -81,7 +83,7 @@ def set_service_info( logger.info("Service info set.") def _get_state_counts(self) -> Dict[str, int]: - """Gets current system state counts.""" + """Get current system state counts.""" current_counts = {state: 0 for state in States.ALL} cursor = self.db_client_runs.find( filter={}, diff --git a/cwl_wes/ga4gh/wes/states.py b/cwl_wes/ga4gh/wes/states.py index e0692ae..2fd4002 100644 --- a/cwl_wes/ga4gh/wes/states.py +++ b/cwl_wes/ga4gh/wes/states.py @@ -1,4 +1,10 @@ +"""WES run states.""" + +# pragma pylint: disable=too-few-public-methods + + class States: + """WES run states.""" UNDEFINED = [ "UNKNOWN", diff --git a/cwl_wes/gunicorn.py b/cwl_wes/gunicorn.py index 06b006c..9d297ae 100644 --- a/cwl_wes/gunicorn.py +++ b/cwl_wes/gunicorn.py @@ -1,3 +1,5 @@ +"""Gunicorn entry point.""" + import os from cwl_wes.app import init_app @@ -11,22 +13,19 @@ threads = int(os.environ.get("GUNICORN_THREADS", "1")) # Set allowed IPs -forwarded_allow_ips = "*" +forwarded_allow_ips = "*" # pylint: disable=invalid-name # Set Gunicorn bind address -bind = "{address}:{port}".format( - address=app_config.server.host, - port=app_config.server.port, -) +bind = f"{app_config.server.host}:{app_config.server.port}" # Source the environment variables for the Gunicorn workers raw_env = [ - "WES_CONFIG=%s" % os.environ.get("WES_CONFIG", ""), - "RABBIT_HOST=%s" % os.environ.get("RABBIT_HOST", app_config.jobs.host), - "RABBIT_PORT=%s" % os.environ.get("RABBIT_PORT", app_config.jobs.port), - "MONGO_HOST=%s" % os.environ.get("MONGO_HOST", app_config.db.host), - "MONGO_PORT=%s" % os.environ.get("MONGO_PORT", app_config.db.port), - "MONGO_DBNAME=%s" % os.environ.get("MONGO_DBNAME", "cwl-wes-db"), - "MONGO_USERNAME=%s" % os.environ.get("MONGO_USERNAME", ""), - "MONGO_PASSWORD=%s" % os.environ.get("MONGO_PASSWORD", ""), + f"WES_CONFIG={os.environ.get('WES_CONFIG', '')}", + f"RABBIT_HOST={os.environ.get('RABBIT_HOST', app_config.jobs.host)}", + f"RABBIT_PORT={os.environ.get('RABBIT_PORT', app_config.jobs.port)}", + f"MONGO_HOST={os.environ.get('MONGO_HOST', app_config.db.host)}", + f"MONGO_PORT={os.environ.get('MONGO_PORT', app_config.db.port)}", + f"MONGO_DBNAME={os.environ.get('MONGO_DBNAME', 'cwl-wes-db')}", + f"MONGO_USERNAME={os.environ.get('MONGO_USERNAME', '')}", + f"MONGO_PASSWORD={os.environ.get('MONGO_PASSWORD', '')}", ] diff --git a/cwl_wes/tasks/cancel_run.py b/cwl_wes/tasks/cancel_run.py index 27a4c7c..628dce7 100644 --- a/cwl_wes/tasks/cancel_run.py +++ b/cwl_wes/tasks/cancel_run.py @@ -1,7 +1,6 @@ """Celery background task to cancel workflow run and related TES tasks.""" import logging -from requests import HTTPError import time from typing import List, Optional @@ -9,13 +8,13 @@ from flask import current_app from foca.database.register_mongodb import _create_mongo_client from pymongo import collection as Collection +from requests import HTTPError import tes from cwl_wes.ga4gh.wes.states import States import cwl_wes.utils.db as db_utils from cwl_wes.worker import celery_app - # Get logger instance logger = logging.getLogger(__name__) @@ -26,7 +25,6 @@ bind=True, ) def task__cancel_run( - self, run_id: str, task_id: str, token: Optional[str] = None, @@ -59,7 +57,7 @@ def task__cancel_run( timeout=tes_server_config.timeout, token=token, ) - except SoftTimeLimitExceeded as e: + except SoftTimeLimitExceeded as exc: db_utils.set_run_state( collection=collection, run_id=run_id, @@ -67,15 +65,9 @@ def task__cancel_run( state="SYSTEM_ERROR", ) logger.warning( - ( - "Canceling workflow run '{run_id}' timed out. Run state " - "was set to 'SYSTEM_ERROR'. Original error message: " - "{type}: {msg}" - ).format( - run_id=run_id, - type=type(e).__name__, - msg=e, - ) + f"Canceling workflow run '{run_id}' timed out. Run state was set " + "to 'SYSTEM_ERROR'. Original error message: " + f"{type(exc).__name__}: {exc}" ) @@ -92,7 +84,7 @@ def __cancel_tes_tasks( timeout=timeout, token=token, ) - canceled: List = list() + canceled: List = [] while True: task_ids = db_utils.find_tes_task_ids( collection=collection, diff --git a/cwl_wes/tasks/cwl_log_processor.py b/cwl_wes/tasks/cwl_log_processor.py index 248cbc4..3a46722 100644 --- a/cwl_wes/tasks/cwl_log_processor.py +++ b/cwl_wes/tasks/cwl_log_processor.py @@ -1,22 +1,36 @@ """cwl-tes log parser executed on worker.""" from ast import literal_eval -from _io import TextIOWrapper import logging import os import re from typing import Dict, List, Optional, Tuple +from _io import TextIOWrapper +from pymongo.errors import PyMongoError +import tes + import cwl_wes.utils.db as db_utils from cwl_wes.worker import celery_app -import tes # Get logger instance logger = logging.getLogger(__name__) class CWLLogProcessor: + """cwl-tes log parser executed on worker. + + Args: + tes_config: TES configuration. + collection: MongoDB collection. + + Attributes: + tes_config: TES configuration. + collection: MongoDB collection. + """ + def __init__(self, tes_config, collection) -> None: + """Construct class instance.""" self.tes_config = tes_config self.collection = collection @@ -26,11 +40,20 @@ def process_cwl_logs( stream: TextIOWrapper, token: Optional[str] = None, ) -> Tuple[List, List]: - """Parses combinend cwl-tes STDOUT/STDERR and sends TES task IDs and state - updates to broker. + """Parse cwl-tes logs. + + Args: + task: Celery task instance. + stream: Combined STDOUT/STDERR stream. + token: OAuth2 token. + + Returns: + Tuple of lists containing the following: + - List of log lines. + - List of TES task IDs. """ - stream_container: List = list() - tes_states: Dict = dict() + stream_container: List = [] + tes_states: Dict = {} # Iterate over STDOUT/STDERR stream for line in iter(stream.readline, ""): @@ -42,9 +65,9 @@ def process_cwl_logs( # Handle special cases lines = self.process_tes_log(line) - for line in lines: - stream_container.append(line) - logger.info(f"[{task}] {line}") + for processed_line in lines: + stream_container.append(processed_line) + logger.info(f"[{task}] {processed_line}") continue # Detect TES task state changes @@ -76,16 +99,23 @@ def process_cwl_logs( return (stream_container, list(tes_states.keys())) def process_tes_log(self, line: str) -> List[str]: - """Handles irregularities arising from log parsing.""" - lines: List = list() + """Handle irregularities arising from log parsing. + + Args: + line: Log line. + + Returns: + List of log lines. + """ + lines: List = [] # Handle special case where FTP and cwl-tes logs are on same line re_ftp_cwl_tes = re.compile( r"^(\*cmd\* .*)(\[step \w*\] produced output \{)$" ) - m = re_ftp_cwl_tes.match(line) - if m: - lines.append(m.group(1)) + match = re_ftp_cwl_tes.match(line) + if match: + lines.append(match.group(1)) return lines @@ -93,24 +123,31 @@ def extract_tes_state( self, line: str, ) -> Tuple[Optional[str], Optional[str]]: - """Extracts task ID and state from cwl-tes log.""" + """Extract task ID and state from cwl-tes log. + + Args: + line: Log line. + + Returns: + Tuple of task ID and state. + """ task_id: Optional[str] = None task_state: Optional[str] = None # Extract new task ID re_task_new = re.compile(r"^\[job [\w\-]*\] task id: (\S*)$") - m = re_task_new.match(line) - if m: - task_id = m.group(1) + match = re_task_new.match(line) + if match: + task_id = match.group(1) # Extract task ID and state re_task_state_poll = re.compile( r'^\[job [\w\-]*\] POLLING "(\S*)", result: (\w*)' ) - m = re_task_state_poll.match(line) - if m: - task_id = m.group(1) - task_state = m.group(2) + match = re_task_state_poll.match(line) + if match: + task_id = match.group(1) + task_state = match.group(2) return (task_id, task_state) @@ -121,12 +158,19 @@ def capture_tes_task_update( tes_state: Optional[str] = None, token: Optional[str] = None, ) -> None: - """Event handler for TES task state changes.""" + """Handle TES task state change events. + + Args: + task: Celery task instance. + tes_id: TES task ID. + tes_state: TES task state. + token: OAuth2 token. + """ # If TES task is new, add task log to database logger.info(f"TES_STATE------------->{tes_state}") cwl_tes_processor = CWLTesProcessor(tes_config=self.tes_config) if not tes_state: - tes_log = cwl_tes_processor.__get_tes_task_log( + tes_log = cwl_tes_processor.get_tes_task_log( tes_id=tes_id, token=token, ) @@ -137,18 +181,12 @@ def capture_tes_task_update( task_id=task.task_id, tes_log=tes_log, ) - except Exception as e: + except PyMongoError as exc: logger.exception( - ( - "Database error. Could not update log information for " - "task '{task}'. Original error message: {type}: {msg}" - ).format( - task=task.task_id, - type=type(e).__name__, - msg=e, - ) + "Database error. Could not update log information for" + f" task '{task.task_id}'. Original error message:" + f" {type(exc).__name__}: {exc}" ) - pass # Otherwise only update state else: @@ -160,71 +198,79 @@ def capture_tes_task_update( state=tes_state, ) logger.info( - ( - "State of TES task '{tes_id}' of run with task ID " - "'{task_id}' changed to '{state}'." - ).format( - task_id=task.task_id, - tes_id=tes_id, - state=tes_state, - ) + f"State of TES task '{tes_id}' of run with task ID " + f"'{task.task_id}' changed to '{tes_state}'." ) - except Exception as e: + except PyMongoError as exc: logger.exception( - ( - "Database error. Could not update log information for " - "task '{task}'. Original error message: {type}: {msg}" - ).format( - task=task.task_id, - type=type(e).__name__, - msg=e, - ) + "Database error. Could not update log information for" + f" task '{task.task_id}'. Original error message:" + f" {type(exc).__name__}: {exc}" ) - pass class CWLTesProcessor: + """Class for processing cwl-tes logs. + + Args: + tes_config: TES configuration. + + Attributes: + tes_config: TES configuration. + """ + def __init__(self, tes_config) -> None: + """Construct class instance.""" self.tes_config = tes_config @staticmethod - def __cwl_tes_outputs_parser(log: str) -> Dict: - """Parses outputs from cwl-tes log.""" - # Find outputs object in log string + def cwl_tes_outputs_parser(log: str) -> Dict: + """Parse outputs from cwl-tes log. + + Args: + log: cwl-tes log. + + Returns: + Outputs dictionary. + """ re_outputs = re.compile( r'(^\{$\n^ {4}"\S+": [\[\{]$\n(^ {4,}.*$\n)*^ {4}[\]\}]$\n^\}$\n)', re.MULTILINE, ) - m = re_outputs.search(log) - if m: - return literal_eval(m.group(1)) - else: - return dict() + match = re_outputs.search(log) + if match: + return literal_eval(match.group(1)) + return {} @staticmethod - def __cwl_tes_outputs_parser_list(log: List) -> Dict: + def cwl_tes_outputs_parser_list(log: List) -> Dict: """Parse outputs from cwl-tes log. The outputs JSON starts at the line before last in the logs. So unless the outputs are empty ({}), parse upward, until you find the beginning of the JSON containing the outputs. - """ + Args: + log: cwl-tes log. + + Returns: + Outputs dictionary. + """ indices = range(len(log) - 1, -1, -1) start = -1 end = -1 for index in indices: if log[index].rstrip() == "{}": - return dict() - elif log[index].rstrip() == "}": + return {} + if log[index].rstrip() == "}": end = index break # No valid JSON was found and the previous loop # reached the end of the log if end == 0: - return dict() + return {} indices = range(end - 1, -1, -1) for index in indices: @@ -232,45 +278,61 @@ def __cwl_tes_outputs_parser_list(log: List) -> Dict: start = index break - json = os.linesep.join(log[start:end + 1]) + json = os.linesep.join(log[start : end + 1]) # noqa: E203 try: return literal_eval(json) - except ValueError as verr: + except ValueError as exc: logger.exception( - "ValueError when evaluation JSON: '%s'. Original error message: %s" - % (json, verr) + f"ValueError when evaluation JSON: {json}. Original error" + f" message: {exc}" ) - return dict() - except SyntaxError as serr: + return {} + except SyntaxError as exc: logger.exception( - "SyntaxError when evaluation JSON: '%s'. Original error message: %s" - % (json, serr) + f"SyntaxError when evaluation JSON: {json}. Original error" + f" message: {exc}" ) - return dict() + return {} - def __get_tes_task_logs( + def get_tes_task_logs( self, - tes_ids: List = list(), + tes_ids: List, token: Optional[str] = None, ) -> List[Dict]: - """Gets multiple task logs from TES instance.""" - task_logs = list() + """Get multiple task logs from TES instance. + + Args: + tes_ids: TES task IDs. + token: OAuth2 token. + + Returns: + Task logs. + """ + task_logs = [] for tes_id in tes_ids: task_logs.append( - self.__get_tes_task_log( + self.get_tes_task_log( tes_id=tes_id, token=token, ) ) return task_logs - def __get_tes_task_log( + def get_tes_task_log( self, tes_id: str, token: Optional[str] = None, ) -> Dict: - """Gets task log from TES instance.""" + """Get single task log from TES instance. + + Args: + tes_id: TES task ID. + token: OAuth2 token. + + Returns: + Task log. + """ tes_client = tes.HTTPClient( url=self.tes_config["url"], timeout=self.tes_config["timeout"], @@ -284,12 +346,10 @@ def __get_tes_task_log( task_id=tes_id, view=self.tes_config["query_params"], ).as_dict() - except Exception as e: - # TODO: handle more robustly: only 400/Bad Request is okay; - # TODO: other errors (e.g. 500) should be dealt with + except Exception as exc: # pylint: disable=broad-except logger.warning( "Could not obtain task log. Setting default. Original error " - f"message: {type(e).__name__}: {e}" + f"message: {type(exc).__name__}: {exc}" ) task_log = {} diff --git a/cwl_wes/tasks/run_workflow.py b/cwl_wes/tasks/run_workflow.py index 1903c76..e567732 100644 --- a/cwl_wes/tasks/run_workflow.py +++ b/cwl_wes/tasks/run_workflow.py @@ -1,7 +1,7 @@ """Celery background task to start workflow run.""" import logging -from typing import List, Optional, Tuple +from typing import List, Optional from cwl_wes.worker import celery_app from cwl_wes.tasks.workflow_run_manager import WorkflowRunManager @@ -22,11 +22,10 @@ def task__run_workflow( command_list: List, tmp_dir: str, token: Optional[str] = None, -) -> Tuple[int, List[str], List[str], Optional[str]]: - """Adds workflow run to task queue.""" +) -> None: + """Add workflow run to task queue.""" # Execute task in background workflow_run_manager = WorkflowRunManager( task=self, command_list=command_list, tmp_dir=tmp_dir, token=token ) - return_val = workflow_run_manager.run_workflow() - return return_val + workflow_run_manager.run_workflow() diff --git a/cwl_wes/tasks/workflow_run_manager.py b/cwl_wes/tasks/workflow_run_manager.py index cad3b73..97b7751 100644 --- a/cwl_wes/tasks/workflow_run_manager.py +++ b/cwl_wes/tasks/workflow_run_manager.py @@ -8,6 +8,7 @@ from typing import Dict, List, Optional from foca.models.config import Config +from pymongo.errors import PyMongoError from cwl_wes.tasks.cwl_log_processor import CWLLogProcessor, CWLTesProcessor import cwl_wes.utils.db as db_utils @@ -17,7 +18,7 @@ logger = logging.getLogger(__name__) -class WorkflowRunManager: +class WorkflowRunManager: # pylint: disable=too-many-instance-attributes """Workflow run manager.""" def __init__( @@ -32,37 +33,39 @@ def __init__( Args: task: Celery task instance for initiating workflow run. task_id: Unique identifier for workflow run task. - command_list: List of commands to be executed as a part of workflow run. - tmp_dir: Current working directory to be passed for child process execution - context. + command_list: List of commands to be executed as a part of workflow + run. + tmp_dir: Current working directory to be passed for child process + execution context. token: JSON Web Token (JWT). foca_config: :py:class:`foca.models.config.Config` instance describing configurations registered with `celery_app`. - custom_config: :py:class:`cwl_wes.custom_config.CustomConfig` instance - describing custom configuration model for cwl-WES specific - configurations. + custom_config: :py:class:`cwl_wes.custom_config.CustomConfig` + instance describing custom configuration model for cwl-WES + specific configurations. collection: Collection client for saving task run progress. tes_config: TES (Task Execution Service) endpoint configurations. - authorization: Boolean to define the security auth configuration for - the app. + authorization: Boolean to define the security auth configuration + for the app. string_format: String time format for task timestamps. Attributes: task: Celery task instance for initiating workflow run. task_id: Unique identifier for workflow run task. - command_list: List of commands to be executed as a part of workflow run. - tmp_dir: Current working directory to be passed for child process execution - context. + command_list: List of commands to be executed as a part of workflow + run. + tmp_dir: Current working directory to be passed for child process + execution context. token: JSON Web Token (JWT). foca_config: :py:class:`foca.models.config.Config` instance describing configurations registered with `celery_app`. - custom_config: :py:class:`cwl_wes.custom_config.CustomConfig` instance - describing custom configuration model for cwl-WES specific - configurations. + custom_config: :py:class:`cwl_wes.custom_config.CustomConfig` + instance describing custom configuration model for cwl-WES + specific configurations. collection: Collection client for saving task run progress. tes_config: TES (Task Execution Service) endpoint configurations. - authorization: Boolean to define the security auth configuration for - the app. + authorization: Boolean to define the security auth configuration + for the app. string_format: String time format for task timestamps. """ self.task = task @@ -77,17 +80,19 @@ def __init__( ) self.tes_config = { "url": self.controller_config.tes_server.url, - "query_params": self.controller_config.tes_server.status_query_params, + "query_params": ( + self.controller_config.tes_server.status_query_params + ), "timeout": self.controller_config.tes_server.timeout, } self.authorization = self.foca_config.security.auth.required self.string_format: str = "%Y-%m-%d %H:%M:%S.%f" def trigger_task_start_events(self) -> None: - """Method to trigger task start events.""" + """Trigger task start events.""" if not self.collection.find_one({"task_id": self.task.request.id}): - return None - internal = dict() + return + internal = {} current_ts = time.time() internal["task_started"] = datetime.utcfromtimestamp(current_ts) # Update run document in database @@ -99,25 +104,25 @@ def trigger_task_start_events(self) -> None: self.string_format ), ) - except Exception as e: + except PyMongoError as exc: logger.exception( - ( - "Database error. Could not update log information for " - "task '{task}'. Original error message: {type}: {msg}" - ).format( - task=self.task_id, - type=type(e).__name__, - msg=e, - ) + "Database error. Could not update log information for task" + f" '{self.task_id}'. Original error message:" + f" {type(exc).__name__}: {exc}" ) + raise def trigger_task_failure_events(self, task_end_ts): - """Method to trigger task failure events.""" + """Trigger task failure events. + + Args: + task_end_ts: Task end timestamp. + """ if not self.collection.find_one({"task_id": self.task_id}): - return None + return # Create dictionary for internal parameters - internal = dict() + internal = {} internal["task_finished"] = datetime.utcfromtimestamp(task_end_ts) task_meta_data = celery_app.AsyncResult(id=self.task_id) internal["traceback"] = task_meta_data.traceback @@ -132,7 +137,7 @@ def trigger_task_failure_events(self, task_end_ts): exception=task_meta_data.result, ) - def trigger_task_success_events( + def trigger_task_success_events( # pylint: disable=too-many-arguments self, returncode: int, log: str, @@ -140,7 +145,7 @@ def trigger_task_success_events( token: str, task_end_ts: float, ) -> None: - """Method to trigger task success events. + """Trigger task success events. Args: returncode: Task completion status code. @@ -150,26 +155,14 @@ def trigger_task_success_events( task_end_ts: Task end timestamp. """ if not self.collection.find_one({"task_id": self.task_id}): - return None + return # Parse subprocess results - try: - log_list = log - log = os.linesep.join(log) - except Exception as e: - logger.exception( - ( - "Field 'result' in event message malformed. Original " - "error message: {type}: {msg}" - ).format( - type=type(e).__name__, - msg=e, - ) - ) - pass + log_list = log + log = os.linesep.join(log) # Create dictionary for internal parameters - internal = dict() + internal = {} internal["task_finished"] = datetime.utcfromtimestamp(task_end_ts) # Set final state to be set @@ -189,10 +182,10 @@ def trigger_task_success_events( # Extract run outputs cwl_tes_processor = CWLTesProcessor(tes_config=self.tes_config) - outputs = cwl_tes_processor.__cwl_tes_outputs_parser_list(log=log_list) + outputs = cwl_tes_processor.cwl_tes_outputs_parser_list(log=log_list) # Get task logs - task_logs = cwl_tes_processor.__get_tes_task_logs( + task_logs = cwl_tes_processor.get_tes_task_logs( tes_ids=tes_ids, token=token, ) @@ -211,23 +204,22 @@ def trigger_task_success_events( stdout=log, stderr="", ) - except Exception as e: + except PyMongoError as exc: logger.exception( - ( - "Database error. Could not update log information for " - "task '{task}'. Original error message: {type}: {msg}" - ).format( - task=self.task_id, - type=type(e).__name__, - msg=e, - ) + "Database error. Could not update log information for task" + f" '{self.task_id}'. Original error message:" + f" {type(exc).__name__}: {exc}" ) - pass + raise def trigger_task_end_events( - self, returncode: int, log: str, tes_ids: List[str], token: str + self, + returncode: int, + log: str, + tes_ids: List[str], + token: str, ) -> None: - """Method to trigger task completion events. + """Trigger task completion events. Args: returncode: Task completion status code. @@ -248,15 +240,17 @@ def trigger_task_end_events( else: self.trigger_task_failure_events(task_end_ts=task_end_ts) - def update_run_document( + def update_run_document( # pylint: disable=too-many-branches self, state: Optional[str] = None, internal: Optional[Dict] = None, outputs: Optional[Dict] = None, task_logs: Optional[List[Dict]] = None, - **run_log_params + **run_log_params, ): - """Updates state, internal and run log parameters in database + """Update run document. + + Specifically, update state, internal and run log parameters in database document. Args: @@ -264,7 +258,7 @@ def update_run_document( internal: Task specific internal parameters. outputs: Task specific output parameters. task_logs: Task run logs. - + **run_log_params: Run log parameters. """ # TODO: Minimize db ops; try to compile entire object & update once # Update internal parameters @@ -306,23 +300,20 @@ def update_run_document( # Calculate queue, execution and run time if document and document["internal"]: run_log = document["internal"] - durations = dict() + durations = {} if "task_started" in run_log_params: if "task_started" in run_log and "task_received" in run_log: - pass durations["time_queue"] = ( run_log["task_started"] - run_log["task_received"] ).total_seconds() if "task_finished" in run_log_params: if "task_finished" in run_log and "task_started" in run_log: - pass durations["time_execution"] = ( run_log["task_finished"] - run_log["task_started"] ).total_seconds() if "task_finished" in run_log and "task_received" in run_log: - pass durations["time_total"] = ( run_log["task_finished"] - run_log["task_received"] ).total_seconds() @@ -343,28 +334,27 @@ def update_run_document( task_id=self.task_id, state=state, ) - except Exception: + except PyMongoError as exc: + logger.exception( + "Database error. Could not update log information for task" + f" '{self.task_id}'. Original error message:" + f" {type(exc).__name__}: {exc}" + ) raise # Log info message if document: logger.info( - ( - "State of run '{run_id}' (task id: '{task_id}') changed " - "to '{state}'." - ).format( - run_id=document["run_id"], - task_id=self.task_id, - state=state, - ) + f"State of run '{document['run_id']}' (task id:" + f" '{self.task_id}') changed to '{state}'." ) return document def run_workflow(self): - """Method to initiate workflow run.""" + """Initiate workflow run.""" self.trigger_task_start_events() - proc = subprocess.Popen( + proc = subprocess.Popen( # pylint: disable=consider-using-with self.command_list, cwd=self.tmp_dir, stdout=subprocess.PIPE, diff --git a/cwl_wes/utils/controllers.py b/cwl_wes/utils/controllers.py new file mode 100644 index 0000000..09a98b0 --- /dev/null +++ b/cwl_wes/utils/controllers.py @@ -0,0 +1,50 @@ +"""Controller utilities.""" + +import logging +from typing import Dict, Optional + +from connexion.exceptions import Forbidden +from flask import Config +from pymongo.collection import Collection + +from cwl_wes.exceptions import WorkflowNotFound + +logger = logging.getLogger(__name__) + + +def get_document_if_allowed( + config: Config, + run_id: str, + projection: Dict, + user_id: Optional[str], +) -> Dict: + """Get document from database, if allowed. + + Args: + config: Flask configuration object. + run_id: Workflow run ID. + projection: Projection for database query. + user_id: User ID. + + Raises: + WorkflowNotFound: If workflow run is not found. + Forbidden: If user is not allowed to access workflow run. + + Returns: + Document from database. + """ + collection_runs: Collection = ( + config.foca.db.dbs["cwl-wes-db"].collections["runs"].client + ) + document = collection_runs.find_one( + filter={"run_id": run_id}, + projection=projection, + ) + + if document is None: + raise WorkflowNotFound + + if document["user_id"] != user_id: + raise Forbidden + + return document diff --git a/cwl_wes/utils/db.py b/cwl_wes/utils/db.py index 89785ef..65c3863 100644 --- a/cwl_wes/utils/db.py +++ b/cwl_wes/utils/db.py @@ -6,6 +6,7 @@ from bson.objectid import ObjectId from pymongo import collection as Collection from pymongo.collection import ReturnDocument +from pymongo.errors import PyMongoError # Get logger instance logger = logging.getLogger(__name__) @@ -14,7 +15,7 @@ def update_run_state( collection: Collection, task_id: str, state: str = "UNKNOWN" ) -> Optional[Mapping[Any, Any]]: - """Updates state of workflow run and returns document.""" + """Update state of workflow run and returns document.""" return collection.find_one_and_update( {"task_id": task_id}, {"$set": {"api.state": state}}, @@ -25,8 +26,16 @@ def update_run_state( def upsert_fields_in_root_object( collection: Collection, task_id: str, root: str, **kwargs ) -> Optional[Mapping[Any, Any]]: - """Inserts (or updates) fields in(to) the same root (object) field and - returns document. + """Insert or update fields in(to) the same root (object) field. + + Args: + collection: MongoDB collection. + task_id: Task identifier of workflow run. + root: Root field name. + **kwargs: Key-value pairs of fields to insert/update. + + Returns: + Inserted/updated document, or `None` if database operation failed. """ return collection.find_one_and_update( {"task_id": task_id}, @@ -42,7 +51,17 @@ def upsert_fields_in_root_object( def update_tes_task_state( collection: Collection, task_id: str, tes_id: str, state: str ) -> Optional[Mapping[Any, Any]]: - """Updates `state` field in TES task log and returns updated document.""" + """Update field 'state' in TES task log and return updated document. + + Args: + collection: MongoDB collection. + task_id: Task identifier of workflow run. + tes_id: Identifier of TES task. + state: New state of TES task. + + Returns: + Updated document, or `None` if database operation failed. + """ return collection.find_one_and_update( {"task_id": task_id, "api.task_logs": {"$elemMatch": {"id": tes_id}}}, {"$set": {"api.task_logs.$.state": state}}, @@ -55,7 +74,16 @@ def append_to_tes_task_logs( task_id: str, tes_log: Mapping, ) -> Optional[Mapping[Any, Any]]: - """Appends task log to TES task logs and returns updated document.""" + """Append task log to TES task logs. + + Args: + collection: MongoDB collection. + task_id: Task identifier of workflow run. + tes_log: Task log to append. + + Returns: + Updated document, or `None` if database operation failed. + """ return collection.find_one_and_update( {"task_id": task_id}, {"$push": {"api.task_logs": tes_log}}, @@ -64,7 +92,15 @@ def append_to_tes_task_logs( def find_tes_task_ids(collection: Collection, run_id: str) -> List: - """Get list of TES task ids associated with a run of interest.""" + """Get list of TES task ids associated with a run of interest. + + Args: + collection: MongoDB collection. + run_id: Run identifier. + + Returns: + List of TES task ids. + """ return collection.distinct("api.task_logs.id", {"run_id": run_id}) @@ -73,8 +109,15 @@ def set_run_state( run_id: str, task_id: Optional[str] = None, state: str = "UNKNOWN", -): - """Set/update state of run associated with Celery task.""" +) -> None: + """Set/update state of run associated with Celery task. + + Args: + collection: MongoDB collection. + run_id: Run identifier. + task_id: Task identifier of workflow run. + state: New state of workflow run. + """ if not task_id: document = collection.find_one( filter={"run_id": run_id}, @@ -92,37 +135,25 @@ def set_run_state( task_id=_task_id, state=state, ) - except Exception as e: + except PyMongoError as exc: logger.exception( - ( - "Database error. Could not update state of run '{run_id}' " - "(task id: '{task_id}') to state '{state}'. Original error " - "message: {type}: {msg}" - ).format( - run_id=run_id, - task_id=_task_id, - state=state, - type=type(e).__name__, - msg=e, - ) + f"Database error. Could not update state of run '{run_id}' (task " + f"id: '{task_id}') to state '{state}'. Original error message: " + f"{type(exc).__name__}: {exc}" ) finally: if document: logger.info( - ( - "State of run '{run_id}' (task id: '{task_id}') " - "changed to '{state}'." - ).format( - run_id=run_id, - task_id=_task_id, - state=state, - ) + f"State of run '{run_id}' (task id: '{task_id}') changed to: " + f"{state}." ) def find_one_latest(collection: Collection) -> Optional[Mapping[Any, Any]]: - """Returns newest object, stripped of the object id, or None if no object - exists. + """Find newest object. + + Returns: + Object stripped of object id, or `None` if no object exists. """ try: return ( @@ -136,7 +167,11 @@ def find_one_latest(collection: Collection) -> Optional[Mapping[Any, Any]]: def find_id_latest(collection: Collection) -> Optional[ObjectId]: - """Returns object id of newest object, or None if no object exists.""" + """Find identifier of newest object. + + Returns: + Object identifier, or `None` if no object exists. + """ try: return collection.find().sort([("_id", -1)]).limit(1).next()["_id"] except StopIteration: diff --git a/cwl_wes/utils/drs.py b/cwl_wes/utils/drs.py index 1ecffda..cac98ef 100644 --- a/cwl_wes/utils/drs.py +++ b/cwl_wes/utils/drs.py @@ -5,7 +5,6 @@ import logging import os import re -from requests.exceptions import ConnectionError import sys from typing import Iterator, List, Match, Optional @@ -17,6 +16,7 @@ InternalServerError, ) +# pragma pylint: disable=too-many-arguments # Get logger instance logger = logging.getLogger(__name__) @@ -30,8 +30,10 @@ def translate_drs_uris( base_path: Optional[str] = None, use_http: bool = False, ) -> None: - """Replace hostname-based DRS URIs with access links either in a file or, - recursively, in all files of a directory. + """Replace hostname-based DRS URIs with access links. + + Replacement takes place either in a file or, recursively, in all files of a + directory. For hostname-based DRS URIs, cf. https://ga4gh.github.io/data-repository-service-schemas/preview/develop/docs/#_hostname_based_drs_uris @@ -51,15 +53,15 @@ def translate_drs_uris( documentation/specification. """ # define regex for identifying DRS URIs - _RE_DOMAIN_PART = r"[a-z0-9]([a-z0-9-]{1,61}[a-z0-9]?)?" - _RE_DOMAIN = rf"({_RE_DOMAIN_PART}\.)+{_RE_DOMAIN_PART}\.?" - _RE_OBJECT_ID = rf"(?Pdrs:\/\/{_RE_DOMAIN}\/\S+)" + re_domain_part = r"[a-z0-9]([a-z0-9-]{1,61}[a-z0-9]?)?" + re_domain = rf"({re_domain_part}\.)+{re_domain_part}\.?" + re_object_id = rf"(?Pdrs:\/\/{re_domain}\/\S+)" # get absolute paths of file or directory (including subdirectories) logger.debug(f"Collecting file(s) for provided path '{path}'...") files = ( abs_paths( - dir=path, + root_dir=path, file_ext=file_types, ) if os.path.isdir(path) @@ -73,7 +75,7 @@ def translate_drs_uris( for line in _f: sys.stdout.write( re.sub( - pattern=_RE_OBJECT_ID, + pattern=re_object_id, repl=partial( get_replacement_string, ref="drs_uri", @@ -88,20 +90,20 @@ def translate_drs_uris( def abs_paths( - dir: str, + root_dir: str, file_ext: List[str], ) -> Iterator[str]: - """Yields absolute paths of files with the indicated file extensions in - specified directory and subdirectories. + """Get absolute paths of files in directory and subdirectories. Arguments: dir: Directory to search files in. - file_ext: List of file extensions for files to return. + file_ext: Limit results to files having either of the indicated + extensions. Returns: Generator yielding absolute file paths. """ - for dirpath, _, files in os.walk(dir): + for dirpath, _, files in os.walk(root_dir): for _file in files: if _file.endswith(tuple(file_ext)): yield os.path.abspath(os.path.join(dirpath, _file)) @@ -115,7 +117,7 @@ def get_replacement_string( base_path: Optional[str] = None, use_http: bool = False, ) -> str: - """Helper function to get string replacement string. + """Get string replacement string helper function. Args: match: Match object from `re.sub()` call @@ -150,7 +152,8 @@ def get_access_url_from_drs( base_path: Optional[str] = None, use_http: bool = False, ) -> str: - """ + """Get access URL from DRS URI. + Arguments: drs_uri: A DRS URI pointing to a DRS object. supported_access_methods: List of access methods/file transfer @@ -182,31 +185,29 @@ def get_access_url_from_drs( base_path=base_path, use_http=use_http, ) - except InvalidURI: + except InvalidURI as exc: logger.error(f"The provided DRS URI '{drs_uri}' is invalid.") - raise BadRequest + raise BadRequest from exc # get DRS object try: - object = client.get_object(object_id=drs_uri) - except (ConnectionError, InvalidResponseError): + obj = client.get_object(object_id=drs_uri) + except (ConnectionError, InvalidResponseError) as exc: logger.error(f"Could not connect to DRS host for DRS URI '{drs_uri}'.") - raise InternalServerError - if isinstance(object, Error): - if object.status_code == 404: + raise InternalServerError from exc + if isinstance(obj, Error): + if obj.status_code == 404: logger.error(f"Could not access DRS host for DRS URI '{drs_uri}'.") raise BadRequest - # TODO: handle 401 & 403 - else: - logger.error(f"DRS returned error: {object}'.") - raise InternalServerError + logger.error(f"DRS returned error: {obj}'.") + raise InternalServerError # get access methods and access method types/protocols - available_methods = object.access_methods + available_methods: Optional[List] = obj.access_methods + assert available_methods is not None available_types = [m.type.value for m in available_methods] # iterate through supported methods by order of preference - # TODO: add support for access URL headers for supported_method in supported_access_methods: try: access_url = str( diff --git a/cwl_wes/worker.py b/cwl_wes/worker.py index d4bd7a5..2d8e301 100644 --- a/cwl_wes/worker.py +++ b/cwl_wes/worker.py @@ -1,4 +1,4 @@ -"""Entry point for Celery workers.""" +"""Celery worker entry point.""" from foca.foca import Foca diff --git a/cwl_wes/wsgi.py b/cwl_wes/wsgi.py index b8e3dad..10b4251 100644 --- a/cwl_wes/wsgi.py +++ b/cwl_wes/wsgi.py @@ -1,3 +1,5 @@ +"""WSGI entry point.""" + from cwl_wes.app import init_app app = init_app() diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000..e3c9c44 --- /dev/null +++ b/pylintrc @@ -0,0 +1,4 @@ +[MESSAGES CONTROL] +disable=W0511,W1201,W1202,W1203 +#extension-pkg-white-list= +#ignored-classes= diff --git a/requirements.txt b/requirements.txt index 46397fc..bf2f80c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -foca==0.12.0 +foca~=0.12.0 cwl-tes @ git+https://github.com/ohsu-comp-bio/cwl-tes.git@7b44cb1825a302bb7eccb3f2d91dc233adc0e32f#egg=cwl-tes -drs-cli==0.2.3 -gunicorn==19.9.0 -py-tes==0.4.2 +drs-cli~=0.2.3 +gunicorn~=19.9.0 +py-tes~=0.4.2 diff --git a/requirements_dev.txt b/requirements_dev.txt index cb10cdf..58ebd76 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,5 +1,5 @@ black~=22.12.0 -flake8~=6.0.0 +flake8~=5.0.4 flake8-docstrings~=1.6.0 mypy~=0.991 pylint~=2.15.9 diff --git a/setup.py b/setup.py index 12b07fc..7860021 100644 --- a/setup.py +++ b/setup.py @@ -5,19 +5,18 @@ root_dir = Path(__file__).parent.resolve() -exec(open(root_dir / "cwl_wes" / "version.py").read()) +with open(root_dir / "cwl_wes" / "version.py", encoding="utf-8") as _file: + exec(_file.read()) # pylint: disable=exec-used -file_name = root_dir / "README.md" -with open(file_name, "r") as _file: +with open(root_dir / "README.md", encoding="utf-8") as _file: LONG_DESCRIPTION = _file.read() -req = root_dir / "requirements.txt" -with open(req, "r") as _file: +with open(root_dir / "requirements.txt", encoding="utf-8") as _file: INSTALL_REQUIRES = _file.read().splitlines() setup( name="cwl-wes", - version=__version__, # noqa: F821 + version=__version__, # noqa: F821 # pylint: disable=undefined-variable author="Elixir Cloud & AAI", author_email="alexander.kanitz@alumni.ethz.ch", description="Flask- and MongoDB-powered GA4GH WES server", From 04d0b319da2346c39d182959cd251dc9aab53f6f Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Sun, 1 Jan 2023 19:34:01 +0100 Subject: [PATCH 16/29] fix Dockerfile --- Dockerfile | 50 +++++++++++--------------------------------------- 1 file changed, 11 insertions(+), 39 deletions(-) diff --git a/Dockerfile b/Dockerfile index f91914d..6f6535c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,51 +1,23 @@ -##### BASE IMAGE ##### -FROM python:3.7-slim-stretch +FROM elixircloud/foca:20221110-py3.7 -##### METADATA ##### -LABEL base.image="python:3.6-slim-stretch" LABEL version="1.1" LABEL software="cwl-WES" -LABEL software.version="1.0" -LABEL software.description="Flask microservice implementing the Global Alliance for Genomics and Health (GA4GH) Workflow Execution Service (WES) API specification." +LABEL software.description="Trigger CWL workflows via GA4GH WES and TES" LABEL software.website="https://github.com/elixir-cloud-aai/cwl-WES" LABEL software.documentation="https://github.com/elixir-cloud-aai/cwl-WES" -LABEL software.license="https://github.com/elixir-cloud-aai/cwl-WES/blob/master/LICENSE" -LABEL software.tags="General" -LABEL maintainer="alexander.kanitz@alumni.ethz.ch" -LABEL maintainer.organisation="Biozentrum, University of Basel" -LABEL maintainer.location="Klingelbergstrasse 50/70, CH-4056 Basel, Switzerland" -LABEL maintainer.lab="ELIXIR Cloud & AAI" -LABEL maintainer.license="https://spdx.org/licenses/Apache-2.0" +LABEL software.license="https://spdx.org/licenses/Apache-2.0" +LABEL maintainer="cloud-service@elixir-europe.org" +LABEL maintainer.organisation="ELIXIR Cloud & AAI" # Python UserID workaround for OpenShift/K8S ENV LOGNAME=ipython ENV USER=ipython -ENV HOME=/tmp/user -# Install general dependencies -RUN apt-get update && apt-get install -y nodejs openssl git build-essential python3-dev curl jq - -## Set working directory WORKDIR /app +COPY ./ . +RUN pip install -e . \ + && pip install -r requirements_dev.txt -## Copy Python requirements -COPY ./requirements.txt /app/requirements.txt - -## Install Python dependencies -RUN cd /app \ - && pip install -r requirements.txt \ - && cd /app/src/cwl-tes \ - && python setup.py develop \ - && cd / \ - && mkdir -p /tmp/user - -## Copy remaining app files -COPY ./ /app - -## Install app & set write permissions for specs directory -RUN cd /app \ - && python setup.py develop \ - && cd / \ - && chmod g+w /app/cwl_wes/api/ \ - && chmod g+w -R /tmp/user - +## Add permissions for storing updated API specification +## (required by FOCA) +RUN chmod -R a+rwx /app/cwl_wes/api From 3d4b950cbf6969994b87f8c6952423e4be052533 Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Mon, 2 Jan 2023 12:01:54 +0100 Subject: [PATCH 17/29] testing build --- Dockerfile | 3 +-- README.md | 4 +-- cwl_wes/config.yaml | 58 ++++++++++++++++++++-------------------- cwl_wes/custom_config.py | 8 +++--- requirements.txt | 1 + 5 files changed, 37 insertions(+), 37 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6f6535c..bd03711 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,8 +15,7 @@ ENV USER=ipython WORKDIR /app COPY ./ . -RUN pip install -e . \ - && pip install -r requirements_dev.txt +RUN pip install -e . ## Add permissions for storing updated API specification ## (required by FOCA) diff --git a/README.md b/README.md index 976e091..cd0a693 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ cd app * Via the **app configuration file** ```bash - vi cwl_wes/config/app_config.yaml + vi cwl_wes/config.yaml ``` * Via **environment variables** @@ -253,7 +253,7 @@ question etc. [badge-url-ci]: [badge-url-health]: [badge-url-license]: -[config-app]: cwl_wes/config/app_config.yaml +[config-app]: cwl_wes/config.yaml [docs-kubernetes]: deployment/README.md [elixir-aai]: https://perun.elixir-czech.cz/ [elixir-user-group-apply]: https://perun.elixir-czech.cz/fed/registrar/?vo=elixir&group=ECP_CLN:OSS diff --git a/cwl_wes/config.yaml b/cwl_wes/config.yaml index d42dc57..5320ce2 100644 --- a/cwl_wes/config.yaml +++ b/cwl_wes/config.yaml @@ -6,7 +6,7 @@ # Server configuration # Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.ServerConfig server: - host: '0.0.0.0' + host: "0.0.0.0" port: 8080 debug: True environment: development @@ -20,12 +20,12 @@ security: required: False add_key_to_claims: True algorithms: - - RS256 + - RS256 allow_expired: False audience: null validation_methods: - - userinfo - - public_key + - userinfo + - public_key validation_checks: all # Database configuration @@ -42,25 +42,25 @@ db: run_id: 1 task_id: 1 options: - 'unique': True - 'sparse': True + "unique": True + "sparse": True service_info: indexes: - keys: id: 1 options: - 'unique': True + "unique": True # API configuration # Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.APIConfig api: specs: - path: - - api/20181010.be85140.workflow_execution_service.swagger.yaml + - api/20181010.be85140.workflow_execution_service.swagger.yaml add_security_fields: x-apikeyInfoFunc: app.validate_token add_operation_fields: - x-swagger-router-controller: ga4gh.wes.server + x-swagger-router-controller: ga4gh.wes.server disable_auth: True connexion: strict_validation: True @@ -98,7 +98,7 @@ log: jobs: host: rabbitmq port: 5672 - backend: 'rpc://' + backend: "rpc://" include: - cwl_wes.tasks.run_workflow - cwl_wes.tasks.cancel_run @@ -106,17 +106,17 @@ jobs: # Exception configuration # Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.ExceptionConfig exceptions: - required_members: [['message'], ['code']] - status_member: ['code'] + required_members: [["message"], ["code"]] + status_member: ["code"] exceptions: cwl_wes.exceptions.exceptions # Custom configuration # Available in app context as attributes of `current_app.config.foca` custom: storage: - permanent_dir: '/data/output' - tmp_dir: '/data/tmp' - remote_storage_url: 'ftp://ftp-private.ebi.ac.uk/upload/foivos' + permanent_dir: "/data/output" + tmp_dir: "/data/tmp" + remote_storage_url: "ftp://ftp-private.ebi.ac.uk/upload/foivos" celery: timeout: 0.1 message_maxsize: 16777216 @@ -125,23 +125,23 @@ custom: timeout_cancel_run: 60 timeout_run_workflow: null tes_server: - url: 'http://62.217.122.249:31567/' - timeout: 5 - status_query_params: 'FULL' + - url: "http://62.217.122.249:31567/" + timeout: 5 + status_query_params: "FULL" drs_server: - port: null # use this port for resolving DRS URIs; set to `null` to use default (443) - base_path: null # use this base path for resolving DRS URIs; set to `null` to use default (`ga4gh/drs/v1`) - use_http: False # use `http` for resolving DRS URIs; set to `False` to use default (`https`) - file_types: # extensions of files to scan for DRS URI resolution - - cwl - - yaml - - yml + - port: null # use this port for resolving DRS URIs; set to `null` to use default (443) + base_path: null # use this base path for resolving DRS URIs; set to `null` to use default (`ga4gh/drs/v1`) + use_http: False # use `http` for resolving DRS URIs; set to `False` to use default (`https`) + file_types: # extensions of files to scan for DRS URI resolution + - cwl + - yaml + - yml runs_id: length: 6 charset: string.ascii_uppercase + string.digits service_info: - contact_info: 'https://github.com/elixir-cloud-aai/cwl-WES' - auth_instructions_url: 'https://www.elixir-europe.org/services/compute/aai' + contact_info: "https://github.com/elixir-cloud-aai/cwl-WES" + auth_instructions_url: "https://www.elixir-europe.org/services/compute/aai" supported_filesystem_protocols: - ftp - https @@ -158,6 +158,6 @@ custom: - type: string default_value: some_string - type: int - default_value: '5' + default_value: "5" tags: - known_tes_endpoints: 'https://tes.tsi.ebi.ac.uk/|https://tes-dev.tsi.ebi.ac.uk/|https://csc-tesk.c03.k8s-popup.csc.fi/|https://tesk.c01.k8s-popup.csc.fi/' + known_tes_endpoints: "https://csc-tesk-noauth.rahtiapp.fi/swagger-ui.html|https://tesk-na.cloud.e-infra.cz/swagger-ui.html" diff --git a/cwl_wes/custom_config.py b/cwl_wes/custom_config.py index 940192a..aee9fb4 100644 --- a/cwl_wes/custom_config.py +++ b/cwl_wes/custom_config.py @@ -202,11 +202,11 @@ class ServiceInfoConfig(FOCABaseConfig): default_workflow_engine_parameters: List[ DefaultWorkflowEngineParameterConfig ] = [] - tags: TagsConfig = TagsConfig() + tags: TagsConfig class TesServerConfig(FOCABaseConfig): - """Model for tes server configuration. + """Model for TES server configuration. Args: url: TES Endpoint URL. @@ -328,8 +328,8 @@ class ControllerConfig(FOCABaseConfig): default_page_size: int = 5 timeout_cancel_run: int = 60 timeout_run_workflow: Optional[int] = None - tes_server: TesServerConfig = TesServerConfig() - drs_server: DRSServerConfig = DRSServerConfig() + tes_server: List[TesServerConfig] = [] + drs_server: List[DRSServerConfig] = [] runs_id: IdConfig = IdConfig() diff --git a/requirements.txt b/requirements.txt index bf2f80c..396497f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ cwl-tes @ git+https://github.com/ohsu-comp-bio/cwl-tes.git@7b44cb1825a302bb7eccb drs-cli~=0.2.3 gunicorn~=19.9.0 py-tes~=0.4.2 +importlib-metadata==4.13.0 From 9cc40f0da29b09b73dd4eb599841b4038647309c Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Mon, 2 Jan 2023 15:17:03 +0100 Subject: [PATCH 18/29] app should be staring now --- .github/workflows/checks.yml | 76 ++++++ .github/workflows/docker-image.yml | 56 ---- .github/workflows/pr-test.yaml | 30 --- Dockerfile | 2 + cwl_wes/app.py | 11 +- cwl_wes/config.yaml | 248 +++++++++--------- cwl_wes/custom_config.py | 2 +- .../ga4gh/wes/endpoints/get_service_info.py | 106 -------- .../ga4gh/wes/{ => endpoints}/service_info.py | 56 ++-- cwl_wes/ga4gh/wes/server.py | 10 +- 10 files changed, 237 insertions(+), 360 deletions(-) create mode 100644 .github/workflows/checks.yml delete mode 100644 .github/workflows/docker-image.yml delete mode 100644 .github/workflows/pr-test.yaml delete mode 100644 cwl_wes/ga4gh/wes/endpoints/get_service_info.py rename cwl_wes/ga4gh/wes/{ => endpoints}/service_info.py (58%) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml new file mode 100644 index 0000000..33f74aa --- /dev/null +++ b/.github/workflows/checks.yml @@ -0,0 +1,76 @@ +name: Docker Image CI + +on: + push: + branches: [dev] + +env: + DOCKER_REPO_NAME: elixircloud/cwl-wes + +jobs: + lint: + name: Run linting + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install requirements + run: pip install -r requirements.txt -r requirements_dev.txt + - name: Lint with Flake8 + run: flake8 cwl_wes/ setup.py + - name: Lint with Pylint + run: pylint cwl_wes/ setup.py + test: + name: Run tests + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - name: Check out repository + uses: actions/checkout@v3 + - name: Test build + run: docker-compose up -d --build + - name: Sleep + shell: bash + run: sleep 30; + - name: Test endpoint + shell: bash + run: bash test-http-call.bash + - name: End test + run: docker-compose down + publish: + name: Build and publish app image + runs-on: ubuntu-latest + if: ${{ github.event_name == 'push' }} + needs: [lint, test] + env: + DOCKER_REPO_NAME: elixircloud/cwl-wes + steps: + - name: Check out repository + uses: actions/checkout@v3 + - name: Generate tag + run: | + echo "TAG=$(date '+%Y%m%d')" >> $GITHUB_ENV + - name: Build and publish image + id: docker + uses: philips-software/docker-ci-scripts@v5.0.0 + with: + dockerfile: . + image-name: "protes" + tags: "latest ${{ env.TAG }}" + push-branches: "${{ github.event.repository.default_branch }}" + env: + REGISTRY_USERNAME: ${{ secrets.DOCKERHUB_LOGIN }} + REGISTRY_TOKEN: "${{ secrets.DOCKERHUB_TOKEN }}" + DOCKER_ORGANIZATION: ${{ secrets.DOCKERHUB_ORG }} + GITHUB_ORGANIZATION: ${{ github.repository_owner }} + - name: Verify that image was pushed + run: | + echo "Push indicator: ${{ steps.docker.outputs.push-indicator }}" + echo "# Set to 'true' if image was pushed, empty string otherwise" + test "${{ steps.docker.outputs.push-indicator }}" == "true" diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml deleted file mode 100644 index bf8d389..0000000 --- a/.github/workflows/docker-image.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: Docker Image CI - -on: - push: - branches: [ dev ] - -env: - DOCKER_REPO_NAME: elixircloud/cwl-wes - -jobs: - test: - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - name: Test build - run: docker-compose up -d - - name: Sleep - shell: bash - run: sleep 30; - - name: Test endpoint - shell: bash - run: bash test-http-call.bash - - name: End test - run: docker-compose down - - build: - - runs-on: ubuntu-latest - env: - DOCKER_REPO_NAME: elixircloud/cwl-wes - steps: - - uses: actions/checkout@v3 - - name: Build the Docker image - run: docker build . --file Dockerfile --tag ${DOCKER_REPO_NAME}:$(date +%Y%m%d) --tag ${DOCKER_REPO_NAME}:latest - - name: Login to DockerHub - if: github.event_name != 'pull_request' - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Set today env variable - run: | - echo "today=$(date +%Y%m%d)" >> $GITHUB_ENV - - name: Build and push - uses: docker/build-push-action@v2 - if: github.ref == 'refs/heads/dev' - with: - context: . - push: true - tags: | - ${{ env.DOCKER_REPO_NAME }}:${{ env.today }} - ${{ env.DOCKER_REPO_NAME }}:latest diff --git a/.github/workflows/pr-test.yaml b/.github/workflows/pr-test.yaml deleted file mode 100644 index b4271d3..0000000 --- a/.github/workflows/pr-test.yaml +++ /dev/null @@ -1,30 +0,0 @@ -name: Test - -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. - -on: - pull_request: - branches: [ dev ] - -jobs: - test: - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - name: Test build - run: docker-compose up -d - - name: Sleep - shell: bash - run: sleep 30; - - name: Test endpoint - shell: bash - run: bash test-http-call.bash - - name: End test - run: docker-compose down \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index bd03711..05153ec 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,6 +14,8 @@ ENV LOGNAME=ipython ENV USER=ipython WORKDIR /app +COPY ./requirements.txt /app/requirements.txt +RUN pip install -r requirements.txt COPY ./ . RUN pip install -e . diff --git a/cwl_wes/app.py b/cwl_wes/app.py index 949da1c..2ce5661 100644 --- a/cwl_wes/app.py +++ b/cwl_wes/app.py @@ -3,11 +3,9 @@ from pathlib import Path from connexion import App -from flask import current_app from foca import Foca -from cwl_wes.ga4gh.wes.service_info import ServiceInfo -from cwl_wes.exceptions import NotFound +from cwl_wes.ga4gh.wes.endpoints.service_info import ServiceInfo def init_app() -> App: @@ -23,12 +21,7 @@ def init_app() -> App: app = foca.create_app() with app.app.app_context(): service_info = ServiceInfo() - try: - service_info.get_service_info() - except NotFound: - service_info.set_service_info( - data=current_app.config.foca.custom.service_info.dict() - ) + service_info.init_service_info_from_config() return app diff --git a/cwl_wes/config.yaml b/cwl_wes/config.yaml index 5320ce2..ed16f04 100644 --- a/cwl_wes/config.yaml +++ b/cwl_wes/config.yaml @@ -6,158 +6,152 @@ # Server configuration # Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.ServerConfig server: - host: "0.0.0.0" - port: 8080 - debug: True - environment: development - testing: False - use_reloader: True + host: "0.0.0.0" + port: 8080 + debug: True + environment: development + testing: False + use_reloader: True # Security configuration # Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.SecurityConfig security: - auth: - required: False - add_key_to_claims: True - algorithms: - - RS256 - allow_expired: False - audience: null - validation_methods: - - userinfo - - public_key - validation_checks: all + auth: + required: False + add_key_to_claims: True + algorithms: + - RS256 + allow_expired: False + audience: null + validation_methods: + - userinfo + - public_key + validation_checks: all # Database configuration # Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.DBConfig db: - host: mongodb - port: 27017 - dbs: - cwl-wes-db: - collections: - runs: - indexes: - - keys: - run_id: 1 - task_id: 1 - options: - "unique": True - "sparse": True - service_info: - indexes: - - keys: - id: 1 - options: - "unique": True + host: mongodb + port: 27017 + dbs: + cwl-wes-db: + collections: + runs: + indexes: + - keys: + run_id: 1 + task_id: 1 + options: + "unique": True + "sparse": True + service_info: [] # API configuration # Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.APIConfig api: - specs: - - path: - - api/20181010.be85140.workflow_execution_service.swagger.yaml - add_security_fields: - x-apikeyInfoFunc: app.validate_token - add_operation_fields: - x-swagger-router-controller: ga4gh.wes.server - disable_auth: True - connexion: - strict_validation: True - validate_responses: False - options: - swagger_ui: True - serve_spec: True + specs: + - path: + - api/20181010.be85140.workflow_execution_service.swagger.yaml + add_security_fields: + x-apikeyInfoFunc: app.validate_token + add_operation_fields: + x-swagger-router-controller: ga4gh.wes.server + disable_auth: True + connexion: + strict_validation: True + validate_responses: False + options: + swagger_ui: True + serve_spec: True # Logging configuration # Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.LogConfig log: - version: 1 - disable_existing_loggers: False - formatters: - standard: - class: logging.Formatter - style: "{" - format: "[{asctime}: {levelname:<8}] {message} [{name}]" - long: - class: logging.Formatter - style: "{" - format: "[{asctime}: {levelname:<8}] {message} [{name}]" - handlers: - console: - class: logging.StreamHandler - level: 20 - formatter: standard - stream: ext://sys.stderr - root: - level: 10 - handlers: [console] + version: 1 + disable_existing_loggers: False + formatters: + standard: + class: logging.Formatter + style: "{" + format: "[{asctime}: {levelname:<8}] {message} [{name}]" + long: + class: logging.Formatter + style: "{" + format: "[{asctime}: {levelname:<8}] {message} [{name}]" + handlers: + console: + class: logging.StreamHandler + level: 20 + formatter: standard + stream: ext://sys.stderr + root: + level: 10 + handlers: [console] # Background job configuration # Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.JobsConfig jobs: - host: rabbitmq - port: 5672 - backend: "rpc://" - include: - - cwl_wes.tasks.run_workflow - - cwl_wes.tasks.cancel_run + host: rabbitmq + port: 5672 + backend: "rpc://" + include: + - cwl_wes.tasks.run_workflow + - cwl_wes.tasks.cancel_run # Exception configuration # Cf. https://foca.readthedocs.io/en/latest/modules/foca.models.html#foca.models.config.ExceptionConfig exceptions: - required_members: [["message"], ["code"]] - status_member: ["code"] - exceptions: cwl_wes.exceptions.exceptions + required_members: [["message"], ["code"]] + status_member: ["code"] + exceptions: cwl_wes.exceptions.exceptions # Custom configuration # Available in app context as attributes of `current_app.config.foca` custom: - storage: - permanent_dir: "/data/output" - tmp_dir: "/data/tmp" - remote_storage_url: "ftp://ftp-private.ebi.ac.uk/upload/foivos" - celery: - timeout: 0.1 - message_maxsize: 16777216 - controller: - default_page_size: 5 - timeout_cancel_run: 60 - timeout_run_workflow: null - tes_server: - - url: "http://62.217.122.249:31567/" - timeout: 5 - status_query_params: "FULL" - drs_server: - - port: null # use this port for resolving DRS URIs; set to `null` to use default (443) - base_path: null # use this base path for resolving DRS URIs; set to `null` to use default (`ga4gh/drs/v1`) - use_http: False # use `http` for resolving DRS URIs; set to `False` to use default (`https`) - file_types: # extensions of files to scan for DRS URI resolution - - cwl - - yaml - - yml - runs_id: - length: 6 - charset: string.ascii_uppercase + string.digits - service_info: - contact_info: "https://github.com/elixir-cloud-aai/cwl-WES" - auth_instructions_url: "https://www.elixir-europe.org/services/compute/aai" - supported_filesystem_protocols: - - ftp - - https - - local - supported_wes_versions: - - 1.0.0 - workflow_type_versions: - CWL: - workflow_type_version: - - v1.0 - workflow_engine_versions: - cwl-tes: 0.2.0 - default_workflow_engine_parameters: - - type: string - default_value: some_string - - type: int - default_value: "5" - tags: - known_tes_endpoints: "https://csc-tesk-noauth.rahtiapp.fi/swagger-ui.html|https://tesk-na.cloud.e-infra.cz/swagger-ui.html" + storage: + permanent_dir: "/data/output" + tmp_dir: "/data/tmp" + remote_storage_url: "ftp://ftp-private.ebi.ac.uk/upload/foivos" + celery: + timeout: 0.1 + message_maxsize: 16777216 + controller: + default_page_size: 5 + timeout_cancel_run: 60 + timeout_run_workflow: null + tes_server: + - url: "http://62.217.122.249:31567/" + timeout: 5 + status_query_params: "FULL" + drs_server: + - port: null # use this port for resolving DRS URIs; set to `null` to use default (443) + base_path: null # use this base path for resolving DRS URIs; set to `null` to use default (`ga4gh/drs/v1`) + use_http: False # use `http` for resolving DRS URIs; set to `False` to use default (`https`) + file_types: # extensions of files to scan for DRS URI resolution + - cwl + - yaml + - yml + runs_id: + length: 6 + charset: string.ascii_uppercase + string.digits + service_info: + contact_info: "https://github.com/elixir-cloud-aai/cwl-WES" + auth_instructions_url: "https://github.com/elixir-cloud-aai/cwl-WES" + supported_filesystem_protocols: + - ftp + - https + - local + supported_wes_versions: + - 1.0.0 + - 1.0.1 + workflow_type_versions: + CWL: + workflow_type_version: + - v1.0 + - v1.1 + - v1.2 + workflow_engine_versions: + cwl-tes: 0.3.0, commit 7b44cb1 + default_workflow_engine_parameters: [] + tags: + known_tes_endpoints: "https://csc-tesk-noauth.rahtiapp.fi/swagger-ui.html|https://tesk-na.cloud.e-infra.cz/swagger-ui.html" diff --git a/cwl_wes/custom_config.py b/cwl_wes/custom_config.py index aee9fb4..c1a47b1 100644 --- a/cwl_wes/custom_config.py +++ b/cwl_wes/custom_config.py @@ -352,4 +352,4 @@ class CustomConfig(FOCABaseConfig): storage: StorageConfig = StorageConfig() celery: CeleryConfig = CeleryConfig() controller: ControllerConfig = ControllerConfig() - service_info: ServiceInfoConfig = ServiceInfoConfig() + service_info: ServiceInfoConfig diff --git a/cwl_wes/ga4gh/wes/endpoints/get_service_info.py b/cwl_wes/ga4gh/wes/endpoints/get_service_info.py deleted file mode 100644 index 6ef16dd..0000000 --- a/cwl_wes/ga4gh/wes/endpoints/get_service_info.py +++ /dev/null @@ -1,106 +0,0 @@ -"""Utility functions for GET /service-info endpoint.""" - -from copy import deepcopy -from datetime import datetime -import logging -from typing import Any, Dict - -from flask import Config -from foca.utils.db import find_id_latest, find_one_latest -from pymongo import collection as Collection - -from cwl_wes.ga4gh.wes.states import States - -# pragma pylint: disable=unused-argument - -# Get logger instance -logger = logging.getLogger(__name__) - - -# Helper function GET /service-info -def get_service_info( - config: Config, - *args: Any, - silent: bool = False, - **kwarg: Any, -): - """Get formatted service info. - - Creates service info database document if it does not exist. - - Args: - config: App configuration. - *args: Variable length argument list. - silent: Whether to return service info or `None` (in silent mode). - **kwargs: Arbitrary keyword arguments. - - Returns: - Readily formatted service info, or `None` (in silent mode); - """ - collection_service_info: Collection.Collection = ( - config.foca.db.dbs["cwl-wes-db"].collections["service_info"].client - ) - collection_runs: Collection.Collection = ( - config.foca.db.dbs["cwl-wes-db"].collections["runs"].client - ) - service_info = deepcopy(config.foca.custom.service_info.dict()) - - # Write current service info to database if absent or different from latest - if not service_info == find_one_latest(collection_service_info): - collection_service_info.insert(service_info) - logger.info(f"Updated service info: {service_info}") - else: - logger.debug("No change in service info. Not updated.") - - # Return None when called in silent mode: - if silent: - return None - - # Add current system state counts - service_info["system_state_counts"] = __get_system_state_counts( - collection_runs - ) - - # Add timestamps - _id = find_id_latest(collection_service_info) - if _id: - service_info["tags"]["last_service_info_update"] = _id.generation_time - service_info["tags"]["current_time"] = datetime.utcnow().isoformat() - - return service_info - - -def __get_system_state_counts(collection: Collection) -> Dict[str, int]: - """Get current system state counts. - - Args: - collection: MongoDB collection object. - - Returns: - Dictionary of counts per state. - """ - current_counts = __init_system_state_counts() - - # Query database for workflow run states - cursor = collection.find( - filter={}, - projection={ - "api.state": True, - "_id": False, - }, - ) - - # Iterate over states and increase counter - for record in cursor: - current_counts[record["api"]["state"]] += 1 - - return current_counts - - -def __init_system_state_counts() -> Dict[str, int]: - """Initialize system state counts. - - Returns: - Dictionary of state counts, inititalized to zero. - """ - return {state: 0 for state in States.ALL} diff --git a/cwl_wes/ga4gh/wes/service_info.py b/cwl_wes/ga4gh/wes/endpoints/service_info.py similarity index 58% rename from cwl_wes/ga4gh/wes/service_info.py rename to cwl_wes/ga4gh/wes/endpoints/service_info.py index 5d73591..ed26a34 100644 --- a/cwl_wes/ga4gh/wes/service_info.py +++ b/cwl_wes/ga4gh/wes/endpoints/service_info.py @@ -4,7 +4,6 @@ from typing import Dict from bson.objectid import ObjectId -from foca.models.config import Config from flask import current_app from pymongo.collection import Collection @@ -22,26 +21,17 @@ class ServiceInfo: Creates service info upon first request, if it does not exist. Attributes: - config: App configuration. - foca_config: FOCA configuration. - db_client_service_info: Database collection storing service info - objects. - db_client_runs: Database collection storing workflow run objects. + db_collections: FOCA MongoDB collections. + db_client: Database collection storing service info objects. object_id: Database identifier for service info. """ def __init__(self) -> None: """Construct class instance.""" - self.config: Dict = current_app.config - self.foca_config: Config = self.config.foca - self.db_client_service_info: Collection = ( - self.foca_config.db.dbs["cwl-wes-db"] - .collections["service_info"] - .client - ) - self.db_client_runs: Collection = ( - self.foca_config.db.dbs["cwl-wes-db"].collections["runs"].client - ) + self.db_collections = current_app.config.foca.db.dbs[ + "cwl-wes-db" + ].collections + self.db_client: Collection = self.db_collections["service_info"].client self.object_id: str = "000000000000000000000000" def get_service_info(self, get_counts: bool = True) -> Dict: @@ -56,7 +46,7 @@ def get_service_info(self, get_counts: bool = True) -> Dict: Raises: NotFound: Service info was not found. """ - service_info = self.db_client_service_info.find_one( + service_info = self.db_client.find_one( {"_id": ObjectId(self.object_id)}, {"_id": False}, ) @@ -66,26 +56,44 @@ def get_service_info(self, get_counts: bool = True) -> Dict: service_info["system_state_counts"] = self._get_state_counts() return service_info - def set_service_info( - self, - data: Dict, - ) -> None: + def set_service_info(self, data: Dict) -> None: """Create or update service info. Arguments: data: Dictionary of service info values. Cf. """ - self.db_client_service_info.replace_one( + self.db_client.replace_one( filter={"_id": ObjectId(self.object_id)}, replacement=data, upsert=True, ) - logger.info("Service info set.") + logger.info(f"Service info set: {data}") + + def init_service_info_from_config(self) -> None: + """Initialize service info from config. + + Set service info only if it does not yet exist. + """ + service_info_conf = current_app.config.foca.custom.service_info.dict() + try: + service_info_db = self.get_service_info(get_counts=False) + except NotFound: + logger.info("Initializing service info.") + self.set_service_info(data=service_info_conf) + return + if service_info_db != service_info_conf: + logger.info( + "Service info configuration changed. Updating service info." + ) + self.set_service_info(data=service_info_conf) + return + logger.debug("Service info already initialized and up to date.") def _get_state_counts(self) -> Dict[str, int]: """Get current system state counts.""" current_counts = {state: 0 for state in States.ALL} - cursor = self.db_client_runs.find( + db_client_runs: Collection = self.db_collections["runs"].client + cursor = db_client_runs.find( filter={}, projection={ "run_log.state": True, diff --git a/cwl_wes/ga4gh/wes/server.py b/cwl_wes/ga4gh/wes/server.py index 0e06676..370aea3 100644 --- a/cwl_wes/ga4gh/wes/server.py +++ b/cwl_wes/ga4gh/wes/server.py @@ -12,7 +12,7 @@ from foca.utils.logging import log_traffic from cwl_wes.ga4gh.wes.endpoints.run_workflow import run_workflow -from cwl_wes.ga4gh.wes.endpoints.get_service_info import get_service_info +from cwl_wes.ga4gh.wes.endpoints.service_info import ServiceInfo from cwl_wes.ga4gh.wes.states import States from cwl_wes.tasks.cancel_run import task__cancel_run from cwl_wes.utils.controllers import get_document_if_allowed @@ -118,12 +118,8 @@ def GetServiceInfo(*args, **kwargs) -> Optional[Dict]: Returns: Service info object. """ - response = get_service_info( - config=current_app.config, - *args, - **kwargs, - ) - return response + service_info = ServiceInfo() + return service_info.get_service_info() # GET /runs From 9774f31ec5b2b3bb6bb4a4b1490eb95f70a5e9ba Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Mon, 2 Jan 2023 15:36:03 +0100 Subject: [PATCH 19/29] run GH Actions on PR --- .github/workflows/checks.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 33f74aa..97762c4 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -3,6 +3,8 @@ name: Docker Image CI on: push: branches: [dev] + pull_request: + branches: [dev] env: DOCKER_REPO_NAME: elixircloud/cwl-wes From 3e099bc41024ffe7570d0cb642075a943bcc02dd Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Mon, 2 Jan 2023 15:40:50 +0100 Subject: [PATCH 20/29] fix GH Actions workflow --- .github/workflows/checks.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 97762c4..2e33929 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -21,7 +21,9 @@ jobs: with: python-version: "3.10" - name: Install requirements - run: pip install -r requirements.txt -r requirements_dev.txt + run: | + pip install . + pip install -r requirements_dev.txt - name: Lint with Flake8 run: flake8 cwl_wes/ setup.py - name: Lint with Pylint From d9c99da3f68a2a9e4612ebf3946f7337494d7c83 Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Mon, 2 Jan 2023 15:51:56 +0100 Subject: [PATCH 21/29] fix GH Actions workflow --- .github/workflows/checks.yml | 2 +- cwl_wes/custom_config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 2e33929..c5d5819 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -19,7 +19,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: "3.10" + python-version: "3.7" - name: Install requirements run: | pip install . diff --git a/cwl_wes/custom_config.py b/cwl_wes/custom_config.py index c1a47b1..85e6ba4 100644 --- a/cwl_wes/custom_config.py +++ b/cwl_wes/custom_config.py @@ -329,7 +329,7 @@ class ControllerConfig(FOCABaseConfig): timeout_cancel_run: int = 60 timeout_run_workflow: Optional[int] = None tes_server: List[TesServerConfig] = [] - drs_server: List[DRSServerConfig] = [] + drs_server: DRSServerConfig = DRSServerConfig() runs_id: IdConfig = IdConfig() From 7b8aca2aa22604c46a39c6ae69e3ebb3304c83c1 Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Mon, 2 Jan 2023 15:57:55 +0100 Subject: [PATCH 22/29] fix app --- cwl_wes/config.yaml | 14 +++++++------- docker-compose.yaml | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cwl_wes/config.yaml b/cwl_wes/config.yaml index ed16f04..b7225db 100644 --- a/cwl_wes/config.yaml +++ b/cwl_wes/config.yaml @@ -124,13 +124,13 @@ custom: timeout: 5 status_query_params: "FULL" drs_server: - - port: null # use this port for resolving DRS URIs; set to `null` to use default (443) - base_path: null # use this base path for resolving DRS URIs; set to `null` to use default (`ga4gh/drs/v1`) - use_http: False # use `http` for resolving DRS URIs; set to `False` to use default (`https`) - file_types: # extensions of files to scan for DRS URI resolution - - cwl - - yaml - - yml + port: null # use this port for resolving DRS URIs; set to `null` to use default (443) + base_path: null # use this base path for resolving DRS URIs; set to `null` to use default (`ga4gh/drs/v1`) + use_http: False # use `http` for resolving DRS URIs; set to `False` to use default (`https`) + file_types: # extensions of files to scan for DRS URI resolution + - cwl + - yaml + - yml runs_id: length: 6 charset: string.ascii_uppercase + string.digits diff --git a/docker-compose.yaml b/docker-compose.yaml index 7a3699d..723bcea 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -13,7 +13,7 @@ services: volumes: - ../data/cwl_wes:/data ports: - - "8080:8080" + - "8079:8080" wes-worker: image: elixircloud/cwl-wes:latest @@ -34,7 +34,7 @@ services: links: - mongodb ports: - - "5672:5672" + - "5671:5672" mongodb: image: mongo:3.6 @@ -42,7 +42,7 @@ services: volumes: - ../data/cwl_wes/db:/data/db ports: - - "27017:27017" + - "27016:27017" flower: image: mher/flower:0.9.7 @@ -51,5 +51,5 @@ services: - wes-worker command: flower --broker=amqp://guest:guest@rabbitmq:5672// --port=5555 ports: - - "5555:5555" + - "5554:5555" From 2c468b5f275fa5a5de10839f09c8879d29af925d Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Mon, 2 Jan 2023 16:06:44 +0100 Subject: [PATCH 23/29] fix accidental docker compose changes --- docker-compose.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 723bcea..7a3699d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -13,7 +13,7 @@ services: volumes: - ../data/cwl_wes:/data ports: - - "8079:8080" + - "8080:8080" wes-worker: image: elixircloud/cwl-wes:latest @@ -34,7 +34,7 @@ services: links: - mongodb ports: - - "5671:5672" + - "5672:5672" mongodb: image: mongo:3.6 @@ -42,7 +42,7 @@ services: volumes: - ../data/cwl_wes/db:/data/db ports: - - "27016:27017" + - "27017:27017" flower: image: mher/flower:0.9.7 @@ -51,5 +51,5 @@ services: - wes-worker command: flower --broker=amqp://guest:guest@rabbitmq:5672// --port=5555 ports: - - "5554:5555" + - "5555:5555" From 9cca437830d76deb3b174dc05ef32938f830f87c Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Mon, 2 Jan 2023 18:05:43 +0100 Subject: [PATCH 24/29] fix app; fix service info bug; more tests --- .github/workflows/checks.yml | 12 +- cwl_wes/config.yaml | 6 +- cwl_wes/custom_config.py | 4 +- cwl_wes/ga4gh/wes/endpoints/service_info.py | 4 +- test-http-call.bash | 5 - tests/integration_tests.sh | 139 ++++++++++++++++++++ tests/run_tests.sh | 13 -- 7 files changed, 152 insertions(+), 31 deletions(-) delete mode 100644 test-http-call.bash create mode 100755 tests/integration_tests.sh delete mode 100755 tests/run_tests.sh diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index c5d5819..5d568f2 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -37,15 +37,15 @@ jobs: steps: - name: Check out repository uses: actions/checkout@v3 - - name: Test build + - name: Deploy app run: docker-compose up -d --build - - name: Sleep + - name: Wait for app startup shell: bash - run: sleep 30; - - name: Test endpoint + run: sleep 20 + - name: Run integration tests shell: bash - run: bash test-http-call.bash - - name: End test + run: bash tests/integration_tests.sh + - name: Tear down app run: docker-compose down publish: name: Build and publish app image diff --git a/cwl_wes/config.yaml b/cwl_wes/config.yaml index b7225db..c518afe 100644 --- a/cwl_wes/config.yaml +++ b/cwl_wes/config.yaml @@ -120,9 +120,9 @@ custom: timeout_cancel_run: 60 timeout_run_workflow: null tes_server: - - url: "http://62.217.122.249:31567/" - timeout: 5 - status_query_params: "FULL" + url: "http://62.217.122.249:31567/" + timeout: 5 + status_query_params: "FULL" drs_server: port: null # use this port for resolving DRS URIs; set to `null` to use default (443) base_path: null # use this base path for resolving DRS URIs; set to `null` to use default (`ga4gh/drs/v1`) diff --git a/cwl_wes/custom_config.py b/cwl_wes/custom_config.py index 85e6ba4..3453ce3 100644 --- a/cwl_wes/custom_config.py +++ b/cwl_wes/custom_config.py @@ -328,7 +328,7 @@ class ControllerConfig(FOCABaseConfig): default_page_size: int = 5 timeout_cancel_run: int = 60 timeout_run_workflow: Optional[int] = None - tes_server: List[TesServerConfig] = [] + tes_server: TesServerConfig drs_server: DRSServerConfig = DRSServerConfig() runs_id: IdConfig = IdConfig() @@ -351,5 +351,5 @@ class CustomConfig(FOCABaseConfig): storage: StorageConfig = StorageConfig() celery: CeleryConfig = CeleryConfig() - controller: ControllerConfig = ControllerConfig() + controller: ControllerConfig service_info: ServiceInfoConfig diff --git a/cwl_wes/ga4gh/wes/endpoints/service_info.py b/cwl_wes/ga4gh/wes/endpoints/service_info.py index ed26a34..b62f15d 100644 --- a/cwl_wes/ga4gh/wes/endpoints/service_info.py +++ b/cwl_wes/ga4gh/wes/endpoints/service_info.py @@ -96,10 +96,10 @@ def _get_state_counts(self) -> Dict[str, int]: cursor = db_client_runs.find( filter={}, projection={ - "run_log.state": True, + "api.state": True, "_id": False, }, ) for record in cursor: - current_counts[record["run_log"]["state"]] += 1 + current_counts[record["api"]["state"]] += 1 return current_counts diff --git a/test-http-call.bash b/test-http-call.bash deleted file mode 100644 index d676c8e..0000000 --- a/test-http-call.bash +++ /dev/null @@ -1,5 +0,0 @@ -CODE=$(curl --write-out '%{http_code}' --output /dev/null --silent localhost:8080/ga4gh/wes/v1/runs) -if [ $CODE != "200" ] -then - exit 1; -fi \ No newline at end of file diff --git a/tests/integration_tests.sh b/tests/integration_tests.sh new file mode 100755 index 0000000..a2d9aed --- /dev/null +++ b/tests/integration_tests.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash + +set -euo pipefail + +WES_ROOT="http://localhost:8079/ga4gh/wes/v1" + +# GET /service-info +ENDPOINT="/service-info" +METHOD="GET" +EXPECTED_CODE="200" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out "%{http_code}" \ + --output "/dev/null" \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# GET /runs +ENDPOINT="/runs" +METHOD="GET" +EXPECTED_CODE="200" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out "%{http_code}" \ + --output "/dev/null" \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# GET /runs/{run_id} 404 +RUN_ID="INVALID_ID" +ENDPOINT="/runs/$RUN_ID" +METHOD="GET" +EXPECTED_CODE="404" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out "%{http_code}" \ + --output "/dev/null" \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# GET /runs/{run_id}/status 404 +RUN_ID="INVALID_ID" +ENDPOINT="/runs/$RUN_ID/status" +METHOD="GET" +EXPECTED_CODE="404" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out "%{http_code}" \ + --output "/dev/null" \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# POST /runs 200 +ENDPOINT="/runs" +METHOD="POST" +EXPECTED_CODE="200" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out '%{http_code}' \ + --output /dev/null \ + --request "$METHOD" \ + --header "Accept: application/json" \ + --header "Content-Type: multipart/form-data" \ + --form workflow_params='{"input":{"class":"File","path":"https://raw.githubusercontent.com/uniqueg/cwl-example-workflows/master/hashsplitter-workflow.cwl"}}' \ + --form workflow_type="CWL" \ + --form workflow_type_version="v1.0" \ + --form workflow_url="https://github.com/uniqueg/cwl-example-workflows/blob/master/hashsplitter-workflow.cwl" \ + "${WES_ROOT}${ENDPOINT}" +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# GET /runs; fetch latest run identifier +ENDPOINT="/runs" +METHOD="GET" +echo -n "Fetching run identifier | Identifier: " +RUN_ID=$(curl \ + --silent \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ + | jq .runs[1].run_id \ + | tr -d '"' \ +) +echo -n "$RUN_ID | Result: " +test $RUN_ID != "null" && echo "PASSED" || (echo "FAILED" && exit 1) + +# GET /runs/{run_id}/status 200 +ENDPOINT="/runs/$RUN_ID" +METHOD="GET" +EXPECTED_CODE="200" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out "%{http_code}" \ + --output "/dev/null" \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# GET /runs/{run_id}/status 200 +ENDPOINT="/runs/$RUN_ID/status" +METHOD="GET" +EXPECTED_CODE="200" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out "%{http_code}" \ + --output "/dev/null" \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) diff --git a/tests/run_tests.sh b/tests/run_tests.sh deleted file mode 100755 index f2b90af..0000000 --- a/tests/run_tests.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -cwl-tes --tes https://tes-dev.tsi.ebi.ac.uk/ cwl/tools/echo.cwl cwl/tools/echo-job.json - -cwl-tes --tes https://tes-dev.tsi.ebi.ac.uk/ cwl/tools/sleep.cwl cwl/tools/sleep-job.json - -## Post tests - -# Post: sleep command -#curl -X POST --header 'Content-Type: multipart/form-data' --header 'Accept: application/json' -F workflow_params=tests%2Fcwl%2Ftools%2Fsleep-job.yml -F workflow_type=cwl -F workflow_type_version=v1.0 -F tags=empty -F workflow_engine_parameters=empty -F workflow_url=tests%2Fcwl%2Ftools%2Fsleep.cwl -F workflow_attachment=empty 'http://localhost:7777/ga4gh/wes/v1/runs' - -# Post: echo command -#curl -X POST --header 'Content-Type: multipart/form-data' --header 'Accept: application/json' -F workflow_params=tests%2Fcwl%2Ftools%2Fecho-job.yml -F workflow_type=cwl -F workflow_type_version=v1.0 -F tags=empty -F workflow_engine_parameters=empty -F workflow_url=tests%2Fcwl%2Ftools%2Fecho.cwl -F workflow_attachment=empty 'http://localhost:7777/ga4gh/wes/v1/runs' From f2ca8f5df4949be183c286dd3fd533b8bd89eaae Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Mon, 2 Jan 2023 18:13:14 +0100 Subject: [PATCH 25/29] fix tests --- tests/integration_tests.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/integration_tests.sh b/tests/integration_tests.sh index a2d9aed..00db97d 100755 --- a/tests/integration_tests.sh +++ b/tests/integration_tests.sh @@ -2,7 +2,7 @@ set -euo pipefail -WES_ROOT="http://localhost:8079/ga4gh/wes/v1" +WES_ROOT="http://localhost:8080/ga4gh/wes/v1" # GET /service-info ENDPOINT="/service-info" @@ -137,3 +137,12 @@ RESPONSE_CODE=$(curl \ ) echo -n "$RESPONSE_CODE | Result: " test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# TODO +# POST /runs 200 (as above) +# Fetch identifier (as above) +# CANCEL /runs/{run_id} 200 +# Check that status changed to CANCELING +# Sleep 3-5 min +# Check that second run has status CANCELED +# Check that second run has status COMPLETE From 4b44b17dcd27e7037732dcdfc170859371b21d99 Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Mon, 2 Jan 2023 18:20:04 +0100 Subject: [PATCH 26/29] fix tests --- .github/workflows/checks.yml | 2 +- tests/integration_tests.sh | 64 +++++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 5d568f2..9c6fea9 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -1,4 +1,4 @@ -name: Docker Image CI +name: cwl-WES checks on: push: diff --git a/tests/integration_tests.sh b/tests/integration_tests.sh index 00db97d..b4f5f0a 100755 --- a/tests/integration_tests.sh +++ b/tests/integration_tests.sh @@ -37,8 +37,8 @@ echo -n "$RESPONSE_CODE | Result: " test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) # GET /runs/{run_id} 404 -RUN_ID="INVALID_ID" -ENDPOINT="/runs/$RUN_ID" +RUN_ID_INVALID="INVALID_ID" +ENDPOINT="/runs/$RUN_ID_INVALID" METHOD="GET" EXPECTED_CODE="404" echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " @@ -54,8 +54,8 @@ echo -n "$RESPONSE_CODE | Result: " test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) # GET /runs/{run_id}/status 404 -RUN_ID="INVALID_ID" -ENDPOINT="/runs/$RUN_ID/status" +RUN_ID_INVALID="INVALID_ID" +ENDPOINT="/runs/$RUN_ID_INVALID/status" METHOD="GET" EXPECTED_CODE="404" echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " @@ -91,23 +91,23 @@ RESPONSE_CODE=$(curl \ echo -n "$RESPONSE_CODE | Result: " test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) -# GET /runs; fetch latest run identifier +# Fetch latest run identifier ENDPOINT="/runs" METHOD="GET" echo -n "Fetching run identifier | Identifier: " -RUN_ID=$(curl \ +RUN_ID_COMPLETE=$(curl \ --silent \ --request "$METHOD" \ --header "Accept: application/json" \ "${WES_ROOT}${ENDPOINT}" \ - | jq .runs[1].run_id \ + | jq .runs[0].run_id \ | tr -d '"' \ ) -echo -n "$RUN_ID | Result: " -test $RUN_ID != "null" && echo "PASSED" || (echo "FAILED" && exit 1) +echo -n "$RUN_ID_COMPLETE | Result: " +test $RUN_ID_COMPLETE != "null" && echo "PASSED" || (echo "FAILED" && exit 1) # GET /runs/{run_id}/status 200 -ENDPOINT="/runs/$RUN_ID" +ENDPOINT="/runs/$RUN_ID_COMPLETE" METHOD="GET" EXPECTED_CODE="200" echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " @@ -123,7 +123,7 @@ echo -n "$RESPONSE_CODE | Result: " test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) # GET /runs/{run_id}/status 200 -ENDPOINT="/runs/$RUN_ID/status" +ENDPOINT="/runs/$RUN_ID_COMPLETE/status" METHOD="GET" EXPECTED_CODE="200" echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " @@ -138,11 +138,45 @@ RESPONSE_CODE=$(curl \ echo -n "$RESPONSE_CODE | Result: " test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) +# POST /runs 200 +ENDPOINT="/runs" +METHOD="POST" +EXPECTED_CODE="200" +echo -n "Testing '$METHOD $ENDPOINT' | Expecting: $EXPECTED_CODE | Got: " +RESPONSE_CODE=$(curl \ + --silent \ + --write-out '%{http_code}' \ + --output /dev/null \ + --request "$METHOD" \ + --header "Accept: application/json" \ + --header "Content-Type: multipart/form-data" \ + --form workflow_params='{"input":{"class":"File","path":"https://raw.githubusercontent.com/uniqueg/cwl-example-workflows/master/hashsplitter-workflow.cwl"}}' \ + --form workflow_type="CWL" \ + --form workflow_type_version="v1.0" \ + --form workflow_url="https://github.com/uniqueg/cwl-example-workflows/blob/master/hashsplitter-workflow.cwl" \ + "${WES_ROOT}${ENDPOINT}" +) +echo -n "$RESPONSE_CODE | Result: " +test $RESPONSE_CODE = $EXPECTED_CODE && echo "PASSED" || (echo "FAILED" && exit 1) + +# Fetch latest run identifier +ENDPOINT="/runs" +METHOD="GET" +echo -n "Fetching run identifier | Identifier: " +RUN_ID_CANCEL=$(curl \ + --silent \ + --request "$METHOD" \ + --header "Accept: application/json" \ + "${WES_ROOT}${ENDPOINT}" \ + | jq .runs[0].run_id \ + | tr -d '"' \ +) +echo -n "$RUN_ID_CANCEL | Result: " +test $RUN_ID_CANCEL != "null" && echo "PASSED" || (echo "FAILED" && exit 1) + # TODO -# POST /runs 200 (as above) -# Fetch identifier (as above) # CANCEL /runs/{run_id} 200 # Check that status changed to CANCELING # Sleep 3-5 min -# Check that second run has status CANCELED -# Check that second run has status COMPLETE +# Check that run with $RUN_ID_COMPLETE has status COMPLETE +# Check that run with $RUN_ID_CANCEL has status CANCELED From 2e4be9728b53688d3f650da6ae5a17a3c8a844fa Mon Sep 17 00:00:00 2001 From: Alex Kanitz Date: Tue, 3 Jan 2023 23:59:31 +0100 Subject: [PATCH 27/29] fix GitHub Actions publish --- .github/workflows/checks.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 9c6fea9..01b7762 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -6,9 +6,6 @@ on: pull_request: branches: [dev] -env: - DOCKER_REPO_NAME: elixircloud/cwl-wes - jobs: lint: name: Run linting @@ -52,8 +49,6 @@ jobs: runs-on: ubuntu-latest if: ${{ github.event_name == 'push' }} needs: [lint, test] - env: - DOCKER_REPO_NAME: elixircloud/cwl-wes steps: - name: Check out repository uses: actions/checkout@v3 @@ -65,7 +60,7 @@ jobs: uses: philips-software/docker-ci-scripts@v5.0.0 with: dockerfile: . - image-name: "protes" + image-name: "cwl-wes" tags: "latest ${{ env.TAG }}" push-branches: "${{ github.event.repository.default_branch }}" env: From 08efbbc2af69c4273f61b1c21ea0141ef90e6af6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Gonz=C3=A1lez?= Date: Fri, 22 Dec 2023 15:01:05 +0200 Subject: [PATCH 28/29] Upgrade to v3.8 --- .github/workflows/checks.yml | 2 +- Dockerfile | 2 +- docker-compose.yaml | 4 ++-- requirements.txt | 3 ++- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 01b7762..d16499e 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -16,7 +16,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: "3.7" + python-version: "3.8" - name: Install requirements run: | pip install . diff --git a/Dockerfile b/Dockerfile index 05153ec..b343527 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM elixircloud/foca:20221110-py3.7 +FROM docker.io/elixircloud/foca:20231219-py3.11 LABEL version="1.1" LABEL software="cwl-WES" diff --git a/docker-compose.yaml b/docker-compose.yaml index 7a3699d..e52b483 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -2,7 +2,7 @@ version: '3.6' services: wes: - image: elixircloud/cwl-wes:latest + image: docker.io/elixircloud/cwl-wes:latest build: context: . dockerfile: Dockerfile @@ -16,7 +16,7 @@ services: - "8080:8080" wes-worker: - image: elixircloud/cwl-wes:latest + image: docker.io/elixircloud/cwl-wes:latest restart: unless-stopped depends_on: - wes diff --git a/requirements.txt b/requirements.txt index 396497f..48f48e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ foca~=0.12.0 -cwl-tes @ git+https://github.com/ohsu-comp-bio/cwl-tes.git@7b44cb1825a302bb7eccb3f2d91dc233adc0e32f#egg=cwl-tes +#cwl-tes @ git+https://github.com/ohsu-comp-bio/cwl-tes.git@7b44cb1825a302bb7eccb3f2d91dc233adc0e32f#egg=cwl-tes +cwl-tes==0.3.0 drs-cli~=0.2.3 gunicorn~=19.9.0 py-tes~=0.4.2 From cd3bcfc44caaeaf7880c42e530c50e00d707851e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Gonz=C3=A1lez?= Date: Fri, 22 Dec 2023 15:59:33 +0200 Subject: [PATCH 29/29] Upgrade python setup in actions --- .github/workflows/checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index d16499e..2782534 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -14,7 +14,7 @@ jobs: - name: Check out repository uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.8" - name: Install requirements