diff --git a/datalad_registry/__init__.py b/datalad_registry/__init__.py index 64c58c44..beff4215 100644 --- a/datalad_registry/__init__.py +++ b/datalad_registry/__init__.py @@ -1,7 +1,9 @@ +import logging from pathlib import Path import sys from celery import Celery, Task +import datalad from flask import Flask, request from flask_openapi3 import Info, OpenAPI from kombu.serialization import register @@ -19,12 +21,21 @@ __version__ = version("datalad-registry") +datalad.enable_librarymode() + +# === A temporary solution for removing the custom log handlers set +# by the datalad package +# (until https://github.com/datalad/datalad/pull/7521 is resolved and released) === +datalad_lgr = logging.getLogger("datalad") +for h in datalad_lgr.handlers: + datalad_lgr.removeHandler(h) +# === End of temporary solution === + def create_app() -> Flask: """ Factory function for producing Flask app """ - config = compile_config_from_env() app = OpenAPI( diff --git a/datalad_registry/make_celery.py b/datalad_registry/make_celery.py index e6de1eb0..98ecd869 100644 --- a/datalad_registry/make_celery.py +++ b/datalad_registry/make_celery.py @@ -1,9 +1,60 @@ # This file provides Celery commands access to the Celery app created through # the factory functions in datalad_registry/__init__.py +import logging +import re from celery import Celery from . import create_app + +# === Code for suppressing known git progress reports === +class SuppressKnownGitProgressReport(logging.Filter): + # Known git progress report types + # These types can be found in the definition of + # `datalad.support.gitrepo.GitProgress` + known_git_progress_report_types = { + "Counting objects", + "Compressing objects", + "Writing objects", + "Receiving objects", + "Resolving deltas", + "Finding sources", + "Checking out files", + "Enumerating objects", + } + + re_op_absolute = re.compile(r"(?:remote: )?([\w\s]+):\s+\d+.*") + re_op_relative = re.compile(r"(?:remote: )?([\w\s]+):\s+\d+% \(\d+/\d+\).*") + + def filter(self, record): + # The following logic is based on the logic in + # `datalad.support.gitrepo.GitProgress._parse_progress_line` + + msg = record.getMessage() + + match = self.re_op_relative.match(msg) + if match is None: + match = self.re_op_absolute.match(msg) + + if match is None: + # === msg does not match the pattern of a git progress report === + return True + + op_name = match.group(1) + + # Return False (filtering out the log message) only + # if the message matches the pattern of a git progress report and + # is of a known git progress report type + return op_name not in self.known_git_progress_report_types + + +# Retrieve a reference to the "datalad.gitrepo" logger +dl_gitrepo_lgr = logging.getLogger("datalad.gitrepo") + +# Add a filter to the "datalad.gitrepo" logger to suppress known git progress reports +dl_gitrepo_lgr.addFilter(SuppressKnownGitProgressReport()) +# === End of code for suppressing known git progress reports === + flask_app = create_app() celery_app: Celery = flask_app.extensions["celery"] diff --git a/datalad_registry/tests/test_make_celery.py b/datalad_registry/tests/test_make_celery.py new file mode 100644 index 00000000..108eacbd --- /dev/null +++ b/datalad_registry/tests/test_make_celery.py @@ -0,0 +1,63 @@ +import logging + +from celery import Celery +import pytest + + +@pytest.mark.usefixtures("set_test_env") +def test_celery_app_instantiation(): + """ + Test that the Celery app is instantiated correctly. + """ + from datalad_registry.make_celery import celery_app, flask_app + + assert celery_app is flask_app.extensions["celery"] + assert isinstance(celery_app, Celery) + + +class TestSuppressKnownGitProgressReport: + # Set needed environment to instantiate a flask app in datalad_registry.make_celery + @pytest.mark.usefixtures("set_test_env") + @pytest.mark.parametrize( + "msg, expected_result", + [ + ("hello", True), + ("Start receiving objects", True), + ("Start counting objects", True), + ( + "Failed to get 'sourcedata/templateflow/tpl-NKI.path', " + "skipping this submodule", + True, + ), + ( + "Failed to get 'sourcedata/templateflow/tpl-WHS.path', " + "skipping this submodule", + True, + ), + ("Finished enumerating objects", True), + ("Finished compressing objects", True), + ("Resolving deltas: 65% (19502/30003)", False), + ("Receiving objects: 2% (8943/447132)", False), + ("Receiving objects: 57% (194348/340960), 9.80 MiB | 9.79 MiB/s", False), + ("remote: Compressing objects: 100% (111150/111150), done.", False), + ("remote: Compressing objects: 89% (98924/111150)", False), + ("Resolving deltas: 90% (27003/30003)", False), + ], + ) + def test_filter(self, msg, expected_result): + """ + Test the filter method of the SuppressKnownGitProgressReport class + """ + from datalad_registry.make_celery import SuppressKnownGitProgressReport + + record = logging.LogRecord( + name="logger", + level=logging.DEBUG, + pathname="", + lineno=1, + msg=msg, + args=(), + exc_info=None, + ) + + assert SuppressKnownGitProgressReport().filter(record) is expected_result