diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e85249e..404827e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,6 +11,13 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + - name: Install system dependencies + run: | + sudo apt-get update -y + sudo apt-get install -y gcc-12 g++-12 libnuma-dev + sudo update-alternatives \ + --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 \ + --slave /usr/bin/g++ g++ /usr/bin/g++-12 - name: Set up Python 3.11 uses: actions/setup-python@v4 with: diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..f95e8ec --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,119 @@ +""" +Pytest fixtures for spinning up a live vllm-detector-adapter HTTP server +""" + +# Future +from __future__ import annotations + +# Standard +from collections.abc import Generator +import argparse +import asyncio +import signal +import sys +import threading +import traceback + +# Third Party +from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args +from vllm.utils import FlexibleArgumentParser +import pytest +import requests + +# Local +from .utils import TaskFailedError, get_random_port, wait_until +from vllm_detector_adapter.api_server import add_chat_detection_params, run_server +from vllm_detector_adapter.utils import LocalEnvVarArgumentParser + + +@pytest.fixture(scope="session") +def http_server_port() -> int: + """Port for the http server""" + return get_random_port() + + +@pytest.fixture(scope="session") +def http_server_url(http_server_port: int) -> str: + """Url for the http server""" + return f"http://localhost:{http_server_port}" + + +@pytest.fixture +def args(monkeypatch, http_server_port: int) -> argparse.Namespace: + """Mimic: python -m vllm_detector_adapter.api_server --model …""" + # Use a 'tiny' model for test purposes + model_name = "facebook/opt-125m" + + mock_argv = [ + "__main__.py", + "--model", + model_name, + f"--port={http_server_port}", + "--host=localhost", + "--dtype=float32", + "--device=cpu", + "--disable-frontend-multiprocessing", + "--disable-async-output-proc", + "--enforce-eager", + ] + monkeypatch.setattr(sys, "argv", mock_argv, raising=False) + + # Build parser like __main__ in api.server.py + base_parser = FlexibleArgumentParser(description="vLLM server setup for pytest.") + parser = LocalEnvVarArgumentParser(parser=make_arg_parser(base_parser)) + parser = add_chat_detection_params(parser) + args = parser.parse_args() + validate_parsed_serve_args(args) + return args + + +@pytest.fixture +def _servers( + args: argparse.Namespace, + http_server_url: str, + monkeypatch, +) -> Generator[None, None, None]: + """Start server in background thread""" + loop = asyncio.new_event_loop() + task: asyncio.Task | None = None + + # Patch signal handling so child threads don’t touch the OS handler table + monkeypatch.setattr(loop, "add_signal_handler", lambda *args, **kwargs: None) + monkeypatch.setattr(signal, "signal", lambda *args, **kwargs: None) + + def target() -> None: + nonlocal task + task = loop.create_task(run_server(args)) + try: + print("[conftest] starting run server...", flush=True) + loop.run_until_complete(task) + except Exception as e: + print("[conftest] server failed to start:", e, flush=True) + traceback.print_exc + raise + finally: + loop.close() + + t = threading.Thread(target=target, name="vllm-detector-server") + t.start() + + def _health() -> bool: + if task and task.done(): + raise TaskFailedError(task.exception()) + requests.get(f"{http_server_url}/health", timeout=1).raise_for_status() + return True + + try: + wait_until(_health, timeout=120.0, interval=1.0) + # tests execute with live server + yield + finally: + if task: + task.cancel() + t.join() + + +@pytest.fixture +def api_base_url(_servers, http_server_url: str) -> str: + """Pulls up the server and returns the URL to tests""" + return http_server_url diff --git a/tests/test_http_server.py b/tests/test_http_server.py new file mode 100644 index 0000000..1f885a3 --- /dev/null +++ b/tests/test_http_server.py @@ -0,0 +1,8 @@ +# Third Party +import requests + + +def test_startup(api_base_url): + """Smoke-test: test that the servers starts and health endpoint returns a 200 status code""" + r = requests.get(f"{api_base_url}/health", timeout=5) + assert r.status_code == 200 diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..b1129c3 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,48 @@ +"""Utility helpers shared by the test suite.""" + +# Future +from __future__ import annotations + +# Standard +from typing import Callable, TypeVar +import socket +import time + +__all__ = ["get_random_port", "wait_until", "TaskFailedError"] + +T = TypeVar("T") +Predicate = Callable[[], bool] + + +class TaskFailedError(RuntimeError): + """Raised when the background server task exits unexpectedly.""" + + +def get_random_port() -> int: + """Get an unused TCP port""" + with socket.socket() as s: + s.bind(("localhost", 0)) + return s.getsockname()[1] + + +def wait_until( + predicate: Predicate, + *, + timeout: float = 30.0, + interval: float = 0.5, +) -> None: + """ + Poll predicate until it returns True or timeout seconds elapse. + """ + deadline = time.monotonic() + timeout + while True: + try: + if predicate(): + return + except Exception: + pass + + if time.monotonic() >= deadline: + raise TimeoutError("Timed out waiting for condition") + + time.sleep(interval) diff --git a/tox.ini b/tox.ini index 79447ad..8b8111f 100644 --- a/tox.ini +++ b/tox.ini @@ -6,7 +6,6 @@ description = run tests with pytest with coverage extras = all dev-test - vllm passenv = LOG_LEVEL LOG_FILTERS @@ -15,10 +14,35 @@ passenv = LOG_CHANNEL_WIDTH setenv = DFTYPE = pandas_all + VLLM_LOGGING_LEVEL = DEBUG + VLLM_TARGET_DEVICE=cpu -commands = pytest --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning +allowlist_externals = + git + rm + sh + +# ── BEFORE running pytest, build & install vLLM v0.8.4 CPU-only from source ── +commands_pre = + # 1) clone exactly vLLM v0.8.4 + rm -rf {envtmpdir}/vllm_source + git clone --branch v0.8.4 \ + https://github.com/vllm-project/vllm.git {envtmpdir}/vllm_source + + # 2) install its Python build deps + {envpython} -m pip install --upgrade pip + {envpython} -m pip install "cmake>=3.26" wheel packaging ninja "setuptools-scm>=8" numpy + {envpython} -m pip install -v -r {envtmpdir}/vllm_source/requirements/cpu.txt \ + --extra-index-url https://download.pytorch.org/whl/cpu + + # 3) build & install vLLM in CPU mode + sh -c "cd {envtmpdir}/vllm_source && VLLM_TARGET_DEVICE=cpu {envpython} setup.py install" + #{envpython} -m pip install {envtmpdir}/vllm_source + +commands = pytest -s --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning ; -W ignore::DeprecationWarning + ; Unclear: We probably want to test wheel packaging ; But! tox will fail when this is set and _any_ interpreter is missing ; Without this, sdist packaging is tested so that's a start.