Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install system dependencies
run: |
sudo apt-get update -y
sudo apt-get install -y gcc-12 g++-12 libnuma-dev
sudo update-alternatives \
--install /usr/bin/gcc gcc /usr/bin/gcc-12 10 \
--slave /usr/bin/g++ g++ /usr/bin/g++-12
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
Expand Down
119 changes: 119 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""
Pytest fixtures for spinning up a live vllm-detector-adapter HTTP server
"""

# Future
from __future__ import annotations

# Standard
from collections.abc import Generator
import argparse
import asyncio
import signal
import sys
import threading
import traceback

# Third Party
from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
from vllm.utils import FlexibleArgumentParser
import pytest
import requests

# Local
from .utils import TaskFailedError, get_random_port, wait_until
from vllm_detector_adapter.api_server import add_chat_detection_params, run_server
from vllm_detector_adapter.utils import LocalEnvVarArgumentParser


@pytest.fixture(scope="session")
def http_server_port() -> int:
"""Port for the http server"""
return get_random_port()


@pytest.fixture(scope="session")
def http_server_url(http_server_port: int) -> str:
"""Url for the http server"""
return f"http://localhost:{http_server_port}"


@pytest.fixture
def args(monkeypatch, http_server_port: int) -> argparse.Namespace:
"""Mimic: python -m vllm_detector_adapter.api_server --model <MODEL> …"""
# Use a 'tiny' model for test purposes
model_name = "facebook/opt-125m"

mock_argv = [
"__main__.py",
"--model",
model_name,
f"--port={http_server_port}",
"--host=localhost",
"--dtype=float32",
"--device=cpu",
"--disable-frontend-multiprocessing",
"--disable-async-output-proc",
"--enforce-eager",
]
monkeypatch.setattr(sys, "argv", mock_argv, raising=False)

# Build parser like __main__ in api.server.py
base_parser = FlexibleArgumentParser(description="vLLM server setup for pytest.")
parser = LocalEnvVarArgumentParser(parser=make_arg_parser(base_parser))
parser = add_chat_detection_params(parser)
args = parser.parse_args()
validate_parsed_serve_args(args)
return args


@pytest.fixture
def _servers(
args: argparse.Namespace,
http_server_url: str,
monkeypatch,
) -> Generator[None, None, None]:
"""Start server in background thread"""
loop = asyncio.new_event_loop()
task: asyncio.Task | None = None

# Patch signal handling so child threads don’t touch the OS handler table
monkeypatch.setattr(loop, "add_signal_handler", lambda *args, **kwargs: None)
monkeypatch.setattr(signal, "signal", lambda *args, **kwargs: None)

def target() -> None:
nonlocal task
task = loop.create_task(run_server(args))
try:
print("[conftest] starting run server...", flush=True)
loop.run_until_complete(task)
except Exception as e:
print("[conftest] server failed to start:", e, flush=True)
traceback.print_exc
raise
finally:
loop.close()

t = threading.Thread(target=target, name="vllm-detector-server")
t.start()

def _health() -> bool:
if task and task.done():
raise TaskFailedError(task.exception())
requests.get(f"{http_server_url}/health", timeout=1).raise_for_status()
return True

try:
wait_until(_health, timeout=120.0, interval=1.0)
# tests execute with live server
yield
finally:
if task:
task.cancel()
t.join()


@pytest.fixture
def api_base_url(_servers, http_server_url: str) -> str:
"""Pulls up the server and returns the URL to tests"""
return http_server_url
8 changes: 8 additions & 0 deletions tests/test_http_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Third Party
import requests


def test_startup(api_base_url):
"""Smoke-test: test that the servers starts and health endpoint returns a 200 status code"""
r = requests.get(f"{api_base_url}/health", timeout=5)
assert r.status_code == 200
48 changes: 48 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Utility helpers shared by the test suite."""

# Future
from __future__ import annotations

# Standard
from typing import Callable, TypeVar
import socket
import time

__all__ = ["get_random_port", "wait_until", "TaskFailedError"]

T = TypeVar("T")
Predicate = Callable[[], bool]


class TaskFailedError(RuntimeError):
"""Raised when the background server task exits unexpectedly."""


def get_random_port() -> int:
"""Get an unused TCP port"""
with socket.socket() as s:
s.bind(("localhost", 0))
return s.getsockname()[1]


def wait_until(
predicate: Predicate,
*,
timeout: float = 30.0,
interval: float = 0.5,
) -> None:
"""
Poll predicate until it returns True or timeout seconds elapse.
"""
deadline = time.monotonic() + timeout
while True:
try:
if predicate():
return
except Exception:
pass

if time.monotonic() >= deadline:
raise TimeoutError("Timed out waiting for condition")

time.sleep(interval)
28 changes: 26 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ description = run tests with pytest with coverage
extras =
all
dev-test
vllm
passenv =
LOG_LEVEL
LOG_FILTERS
Expand All @@ -15,10 +14,35 @@ passenv =
LOG_CHANNEL_WIDTH
setenv =
DFTYPE = pandas_all
VLLM_LOGGING_LEVEL = DEBUG
VLLM_TARGET_DEVICE=cpu

commands = pytest --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning
allowlist_externals =
git
rm
sh

# ── BEFORE running pytest, build & install vLLM v0.8.4 CPU-only from source ──
commands_pre =
# 1) clone exactly vLLM v0.8.4
rm -rf {envtmpdir}/vllm_source
git clone --branch v0.8.4 \
https://github.com/vllm-project/vllm.git {envtmpdir}/vllm_source

# 2) install its Python build deps
{envpython} -m pip install --upgrade pip
{envpython} -m pip install "cmake>=3.26" wheel packaging ninja "setuptools-scm>=8" numpy
{envpython} -m pip install -v -r {envtmpdir}/vllm_source/requirements/cpu.txt \
--extra-index-url https://download.pytorch.org/whl/cpu

# 3) build & install vLLM in CPU mode
sh -c "cd {envtmpdir}/vllm_source && VLLM_TARGET_DEVICE=cpu {envpython} setup.py install"
#{envpython} -m pip install {envtmpdir}/vllm_source

commands = pytest -s --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning
; -W ignore::DeprecationWarning


; Unclear: We probably want to test wheel packaging
; But! tox will fail when this is set and _any_ interpreter is missing
; Without this, sdist packaging is tested so that's a start.
Expand Down
Loading