Add benchmark tests

AlanCoding · AlanCoding · commit e82029b49aec · 2025-03-07T13:13:55.000-05:00
Adopt new error handling patterns done elsewhere

Propoerly parameterize the worker number

Move event trigger to drain_queue method

Fix changed event meanings

Add artifacting of benchmark data

Add benchmark test for control task

Add some control message benchmarks

Combine with existing test methods module

Update unit test

Update to new config problems

Avoid retyping no longer necessary

Do some modernization

combine test_pool files

Update test to new pattern

Use better start_working call
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -27,11 +27,12 @@ jobs:
         run: pip install -e .[pg_notify]
       - run: make postgres
       - run: pip install pytest pytest-asyncio
-      - run: pytest tests/unit tests/integration -vv -s
+      - run: pytest tests/unit tests/integration -vv -s -m "not integration"
 
   black:
     name: Run black
     runs-on: ubuntu-latest
+    timeout-minutes: 3
     steps:
       - uses: actions/checkout@v4
         with:
@@ -43,6 +44,7 @@ jobs:
   isort:
     name: Run isort
     runs-on: ubuntu-latest
+    timeout-minutes: 3
     steps:
       - uses: actions/checkout@v4
         with:
@@ -54,6 +56,7 @@ jobs:
   mypy:
     name: Run mypy
     runs-on: ubuntu-latest
+    timeout-minutes: 3
     steps:
       - uses: actions/checkout@v4
         with:
@@ -67,10 +70,36 @@ jobs:
   flake8:
     name: Run flake8
     runs-on: ubuntu-latest
+    timeout-minutes: 3
     steps:
       - uses: actions/checkout@v4
         with:
           show-progress: false
 
       - run: pip install flake8
       - run: flake8 dispatcher
+
+  benchmark:
+    name: Run benchmark tests
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          show-progress: false
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+
+      - name: Install dispatcher
+        run: pip install -e .[pg_notify]
+      - run: make postgres
+      - run: pip install pytest pytest-benchmark pytest-asyncio
+      - run: make benchmark
+
+      - name: Save benchmark results as artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-results
+          path: benchmark_data.json
diff --git a/.gitignore b/.gitignore
@@ -48,3 +48,6 @@ tools/generated/*
 
 # Gets created when testing sonar-scanner locally
 .scannerwork
+
+# benchmark tests output
+benchmark_data.json
diff --git a/Makefile b/Makefile
@@ -20,3 +20,6 @@ linters:
 	isort dispatcher/
 	flake8 dispatcher/
 	mypy dispatcher
+
+benchmark:
+	py.test tests/ --benchmark-columns=mean,min,max,stddev,rounds --benchmark-json=benchmark_data.json --benchmark-only
diff --git a/dispatcher/worker/task.py b/dispatcher/worker/task.py
@@ -250,7 +250,8 @@ def work_loop(worker_id: int, settings: dict, finished_queue: multiprocessing.Qu
         result = worker.perform_work(message)
 
         # Indicate that the task is finished by putting a message in the finished_queue
-        finished_queue.put(worker.get_finished_message(result, message, time_started))
+        to_send = worker.get_finished_message(result, message, time_started)
+        finished_queue.put(to_send)
 
     finished_queue.put(worker.get_shutdown_message())
     logger.debug(f'Worker {worker_id} informed the pool manager that we have exited')
diff --git a/tests/benchmark/conftest.py b/tests/benchmark/conftest.py
@@ -0,0 +1,181 @@
+import asyncio
+import contextlib
+import multiprocessing
+import time
+from copy import deepcopy
+
+import pytest
+
+from dispatcher.brokers.pg_notify import create_connection
+from dispatcher.config import DispatcherSettings
+from dispatcher.factories import from_settings
+
+
+class PoolServer:
+    """Before you read more, know there are 3 contexts involved.
+
+    This produces a method to be passed to pytest-benchmark.
+    That method has to be ran inside a context manager,
+    which will run (and stop) the relevant dispatcher code in a background process.
+    """
+
+    def __init__(self, config):
+        self.config = config
+
+    def run_benchmark_test(self, queue_in, queue_out, times):
+        print(f'submitting message to pool server {times}')
+        queue_in.put(str(times))
+        print('waiting for reply message from pool server')
+        message_in = queue_out.get()
+        print(f'finished running round with {times} messages, got: {message_in}')
+        if message_in == 'error':
+            raise Exception('Test subprocess runner exception, look back in logs')
+
+    @classmethod
+    async def run_pool(cls, config, queue_in, queue_out, workers, function='lambda: __import__("time").sleep(0.01)'):
+        this_config = config.copy()
+        this_config['service']['pool_kwargs']['max_workers'] = workers
+        dispatcher = from_settings(DispatcherSettings(this_config))
+        pool = dispatcher.pool
+        await pool.start_working(dispatcher)
+        queue_out.put('ready')
+
+        print('waiting for message to start test')
+        loop = asyncio.get_event_loop()
+        while True:
+            print('pool server listening on queue_in')
+            message = await loop.run_in_executor(None, queue_in.get)
+            print(f'pool server got message {message}')
+            if message == 'stop':
+                print('shutting down pool server')
+                pool.shutdown()
+                break
+            else:
+                times = int(message.strip())
+                print('creating cleared event task')
+                cleared_event = asyncio.create_task(pool.events.work_cleared.wait())
+                print('creating tasks for submissions')
+                submissions = [pool.dispatch_task({'task': function, 'uuid': str(i)}) for i in range(times)]
+                print('awaiting submission task')
+                await asyncio.gather(*submissions)
+                print('waiting for cleared event')
+                await cleared_event
+                pool.events.work_cleared.clear()
+                await loop.run_in_executor(None, queue_out.put, 'done')
+        print('exited forever loop of pool server')
+
+    @classmethod
+    def run_pool_loop(cls, config, queue_in, queue_out, workers, **kwargs):
+        loop = asyncio.get_event_loop()
+        try:
+            loop.run_until_complete(cls.run_pool(config, queue_in, queue_out, workers, **kwargs))
+        except Exception:
+            import traceback
+
+            traceback.print_exc()
+            # We are in a subprocess here, so even if we handle the exception
+            # the main process will not know and still wait forever
+            # so give them a kick on our way out
+            print('sending error message after error')
+            queue_out.put('error')
+        finally:
+            print('closing asyncio loop')
+            loop.close()
+        print('finished closing async loop')
+
+    def start_server(self, workers, **kwargs):
+        self.queue_in = multiprocessing.Queue()
+        self.queue_out = multiprocessing.Queue()
+        process = multiprocessing.Process(target=self.run_pool_loop, args=(self.config, self.queue_in, self.queue_out, workers), kwargs=kwargs)
+        process.start()
+        return process
+
+    @contextlib.contextmanager
+    def with_server(self, *args, **kwargs):
+        process = self.start_server(*args, **kwargs)
+        msg = self.queue_out.get()
+        if msg != 'ready':
+            raise RuntimeError('never got ready message from subprocess')
+        try:
+            yield self
+        finally:
+            self.queue_in.put('stop')
+            process.terminate()  # SIGTERM
+            # Poll to close process resources, due to race condition where it is not still running
+            for i in range(3):
+                time.sleep(0.1)
+                try:
+                    process.close()
+                    break
+                except Exception:
+                    if i == 2:
+                        raise
+
+
+class FullServer(PoolServer):
+    def run_benchmark_test(self, queue_in, queue_out, times):
+        print('sending wakeup message to set new clear event')
+        queue_in.put('wake')
+        print('sending pg_notify messages')
+        function = 'lambda: __import__("time").sleep(0.01)'
+        conn = create_connection(**self.config['brokers']['pg_notify']['config'])
+        with conn.cursor() as cur:
+            for i in range(times):
+                cur.execute(f"SELECT pg_notify('test_channel', '{function}');")
+        print('waiting for reply message from pool server')
+        message_in = queue_out.get()
+        print(f'finished running round with {times} messages, got: {message_in}')
+
+    @classmethod
+    async def run_pool(cls, config, queue_in, queue_out, workers):
+        this_config = config.copy()
+        this_config['service']['pool_kwargs']['max_workers'] = workers
+        this_config['service']['pool_kwargs']['min_workers'] = workers
+        dispatcher = from_settings(DispatcherSettings(this_config))
+        await dispatcher.start_working()
+        # Make sure the dispatcher is listening before starting the tests which will submit messages
+        for producer in dispatcher.producers:
+            await producer.events.ready_event.wait()
+        queue_out.put('ready')
+
+        print('waiting for message to start test')
+        loop = asyncio.get_event_loop()
+        while True:
+            print('pool server listening on queue_in')
+            message = await loop.run_in_executor(None, queue_in.get)
+            print(f'pool server got message {message}')
+            if message == 'stop':
+                print('shutting down server')
+                dispatcher.shutdown()
+                break
+            print('creating cleared event task')
+            cleared_event = asyncio.create_task(dispatcher.pool.events.queue_cleared.wait())
+            print('waiting for cleared event')
+            await cleared_event
+            dispatcher.pool.events.queue_cleared.clear()
+            await loop.run_in_executor(None, queue_out.put, 'done')
+        print('exited forever loop of pool server')
+
+
+@pytest.fixture
+def benchmark_config(test_config):
+    config = deepcopy(test_config)
+    config['service']['main_kwargs']['node_id'] = 'benchmark-server'
+    return config
+
+
+@pytest.fixture
+def benchmark_settings(benchmark_config):
+    return DispatcherSettings(benchmark_config)
+
+
+@pytest.fixture
+def with_pool_server(benchmark_config):
+    server_thing = PoolServer(benchmark_config)
+    return server_thing.with_server
+
+
+@pytest.fixture
+def with_full_server(benchmark_config):
+    server_thing = FullServer(benchmark_config)
+    return server_thing.with_server
diff --git a/tests/benchmark/pool/test_clear_time.py b/tests/benchmark/pool/test_clear_time.py
@@ -0,0 +1,35 @@
+import os
+import sys
+
+import pytest
+
+
+@pytest.mark.benchmark(group="by_task")
+@pytest.mark.parametrize('times', [1, 10, 100, 1000])
+def test_clear_sleep_by_task_number(benchmark, times, with_pool_server):
+    with with_pool_server(4, function='lambda: __import__("time").sleep(0.01)') as pool_server:
+        benchmark(pool_server.run_benchmark_test, pool_server.queue_in, pool_server.queue_out, times)
+
+
+@pytest.mark.benchmark(group="by_task")
+@pytest.mark.parametrize('times', [1, 10, 100, 1000])
+def test_clear_no_op_by_task_number(benchmark, times, with_pool_server):
+    with with_pool_server(4, function='lambda: None') as pool_server:
+        benchmark(pool_server.run_benchmark_test, pool_server.queue_in, pool_server.queue_out, times)
+
+
+@pytest.mark.benchmark(group="by_worker_sleep")
+@pytest.mark.parametrize('workers', [1, 4, 12, 24, 50, 75])
+def test_clear_sleep_by_worker_count(benchmark, workers, with_pool_server):
+    with with_pool_server(workers, function='lambda: __import__("time").sleep(0.01)') as pool_server:
+        benchmark(pool_server.run_benchmark_test, pool_server.queue_in, pool_server.queue_out, 100)
+
+
+@pytest.mark.benchmark(group="by_worker_math")
+@pytest.mark.parametrize('workers', [1, 4, 12, 24, 50, 75])
+def test_clear_math_by_worker_count(benchmark, workers, with_pool_server):
+    root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
+    sys.path.append(root_dir)
+
+    with with_pool_server(workers, function='lambda: __import__("tests.data.methods").fibonacci(26)') as pool_server:
+        benchmark(pool_server.run_benchmark_test, pool_server.queue_in, pool_server.queue_out, 100)
diff --git a/tests/benchmark/test_control.py b/tests/benchmark/test_control.py
@@ -0,0 +1,35 @@
+import asyncio
+
+import pytest
+
+from dispatcher.factories import get_control_from_settings, get_publisher_from_settings
+
+
+@pytest.mark.benchmark(group="control")
+def test_alive_benchmark(benchmark, with_full_server, test_settings):
+    control = get_control_from_settings(settings=test_settings)
+
+    def alive_check():
+        r = control.control_with_reply('alive')
+        assert r == [{'node_id': 'benchmark-server'}]
+
+    with with_full_server(4):
+        benchmark(alive_check)
+
+
+@pytest.mark.benchmark(group="control")
+@pytest.mark.parametrize('messages', [0, 3, 4, 5, 10, 100])
+def test_alive_benchmark_while_busy(benchmark, with_full_server, benchmark_settings, messages):
+    control = get_control_from_settings(settings=benchmark_settings)
+    broker = get_publisher_from_settings('pg_notify', settings=benchmark_settings)
+    broker.get_connection()  # warm connection saver
+
+    def alive_check():
+        function = 'lambda: __import__("time").sleep(0.01)'
+        for i in range(messages):
+            broker.publish_message(channel='test_channel', message=function)
+        r = control.control_with_reply('alive', timeout=2)
+        assert r == [{'node_id': 'benchmark-server'}]
+
+    with with_full_server(4):
+        benchmark(alive_check)
diff --git a/tests/benchmark/test_full_server.py b/tests/benchmark/test_full_server.py
@@ -0,0 +1,13 @@
+import pytest
+
+
+@pytest.mark.benchmark(group="by_system")
+def test_clear_time_with_full_server(benchmark, with_full_server):
+    with with_full_server(4) as server:
+        benchmark(server.run_benchmark_test, server.queue_in, server.queue_out, 100)
+
+
+@pytest.mark.benchmark(group="by_system")
+def test_clear_time_with_only_pool(benchmark, with_pool_server):
+    with with_pool_server(4) as pool_server:
+        benchmark(pool_server.run_benchmark_test, pool_server.queue_in, pool_server.queue_out, 100)
diff --git a/tests/conftest.py b/tests/conftest.py
diff --git a/tests/data/methods.py b/tests/data/methods.py
diff --git a/tests/unit/service/test_pool.py b/tests/unit/service/test_pool.py