diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index e85249e..404827e 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -11,6 +11,13 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
+      - name: Install system dependencies
+        run: | 
+         sudo apt-get update  -y 
+         sudo apt-get install -y gcc-12 g++-12 libnuma-dev
+         sudo update-alternatives \
+           --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 \
+           --slave /usr/bin/g++ g++ /usr/bin/g++-12
       - name: Set up Python 3.11
         uses: actions/setup-python@v4
         with:
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..f95e8ec
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,119 @@
+"""
+Pytest fixtures for spinning up a live vllm-detector-adapter HTTP server
+"""
+
+# Future
+from __future__ import annotations
+
+# Standard
+from collections.abc import Generator
+import argparse
+import asyncio
+import signal
+import sys
+import threading
+import traceback
+
+# Third Party
+from vllm.entrypoints.openai.cli_args import make_arg_parser, validate_parsed_serve_args
+from vllm.utils import FlexibleArgumentParser
+import pytest
+import requests
+
+# Local
+from .utils import TaskFailedError, get_random_port, wait_until
+from vllm_detector_adapter.api_server import add_chat_detection_params, run_server
+from vllm_detector_adapter.utils import LocalEnvVarArgumentParser
+
+
+@pytest.fixture(scope="session")
+def http_server_port() -> int:
+    """Port for the http server"""
+    return get_random_port()
+
+
+@pytest.fixture(scope="session")
+def http_server_url(http_server_port: int) -> str:
+    """Url for the http server"""
+    return f"http://localhost:{http_server_port}"
+
+
+@pytest.fixture
+def args(monkeypatch, http_server_port: int) -> argparse.Namespace:
+    """Mimic: python -m vllm_detector_adapter.api_server --model <MODEL> …"""
+    # Use a 'tiny' model for test purposes
+    model_name = "facebook/opt-125m"
+
+    mock_argv = [
+        "__main__.py",
+        "--model",
+        model_name,
+        f"--port={http_server_port}",
+        "--host=localhost",
+        "--dtype=float32",
+        "--device=cpu",
+        "--disable-frontend-multiprocessing",
+        "--disable-async-output-proc",
+        "--enforce-eager",
+    ]
+    monkeypatch.setattr(sys, "argv", mock_argv, raising=False)
+
+    # Build parser like __main__ in api.server.py
+    base_parser = FlexibleArgumentParser(description="vLLM server setup for pytest.")
+    parser = LocalEnvVarArgumentParser(parser=make_arg_parser(base_parser))
+    parser = add_chat_detection_params(parser)
+    args = parser.parse_args()
+    validate_parsed_serve_args(args)
+    return args
+
+
+@pytest.fixture
+def _servers(
+    args: argparse.Namespace,
+    http_server_url: str,
+    monkeypatch,
+) -> Generator[None, None, None]:
+    """Start server in background thread"""
+    loop = asyncio.new_event_loop()
+    task: asyncio.Task | None = None
+
+    # Patch signal handling so child threads don’t touch the OS handler table
+    monkeypatch.setattr(loop, "add_signal_handler", lambda *args, **kwargs: None)
+    monkeypatch.setattr(signal, "signal", lambda *args, **kwargs: None)
+
+    def target() -> None:
+        nonlocal task
+        task = loop.create_task(run_server(args))
+        try:
+            print("[conftest] starting run server...", flush=True)
+            loop.run_until_complete(task)
+        except Exception as e:
+            print("[conftest] server failed to start:", e, flush=True)
+            traceback.print_exc
+            raise
+        finally:
+            loop.close()
+
+    t = threading.Thread(target=target, name="vllm-detector-server")
+    t.start()
+
+    def _health() -> bool:
+        if task and task.done():
+            raise TaskFailedError(task.exception())
+        requests.get(f"{http_server_url}/health", timeout=1).raise_for_status()
+        return True
+
+    try:
+        wait_until(_health, timeout=120.0, interval=1.0)
+        # tests execute with live server
+        yield
+    finally:
+        if task:
+            task.cancel()
+        t.join()
+
+
+@pytest.fixture
+def api_base_url(_servers, http_server_url: str) -> str:
+    """Pulls up the server and returns the URL to tests"""
+    return http_server_url
diff --git a/tests/test_http_server.py b/tests/test_http_server.py
new file mode 100644
index 0000000..1f885a3
--- /dev/null
+++ b/tests/test_http_server.py
@@ -0,0 +1,8 @@
+# Third Party
+import requests
+
+
+def test_startup(api_base_url):
+    """Smoke-test: test that the servers starts and health endpoint returns a 200 status code"""
+    r = requests.get(f"{api_base_url}/health", timeout=5)
+    assert r.status_code == 200
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 0000000..b1129c3
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,48 @@
+"""Utility helpers shared by the test suite."""
+
+# Future
+from __future__ import annotations
+
+# Standard
+from typing import Callable, TypeVar
+import socket
+import time
+
+__all__ = ["get_random_port", "wait_until", "TaskFailedError"]
+
+T = TypeVar("T")
+Predicate = Callable[[], bool]
+
+
+class TaskFailedError(RuntimeError):
+    """Raised when the background server task exits unexpectedly."""
+
+
+def get_random_port() -> int:
+    """Get an unused TCP port"""
+    with socket.socket() as s:
+        s.bind(("localhost", 0))
+        return s.getsockname()[1]
+
+
+def wait_until(
+    predicate: Predicate,
+    *,
+    timeout: float = 30.0,
+    interval: float = 0.5,
+) -> None:
+    """
+    Poll predicate until it returns True or timeout seconds elapse.
+    """
+    deadline = time.monotonic() + timeout
+    while True:
+        try:
+            if predicate():
+                return
+        except Exception:
+            pass
+
+        if time.monotonic() >= deadline:
+            raise TimeoutError("Timed out waiting for condition")
+
+        time.sleep(interval)
diff --git a/tox.ini b/tox.ini
index 79447ad..8b8111f 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,7 +6,6 @@ description = run tests with pytest with coverage
 extras =
     all
     dev-test
-    vllm
 passenv =
     LOG_LEVEL
     LOG_FILTERS
@@ -15,10 +14,35 @@ passenv =
     LOG_CHANNEL_WIDTH
 setenv =
     DFTYPE = pandas_all
+    VLLM_LOGGING_LEVEL = DEBUG
+    VLLM_TARGET_DEVICE=cpu
 
-commands = pytest --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning
+allowlist_externals = 
+    git 
+    rm
+    sh
+
+# ── BEFORE running pytest, build & install vLLM v0.8.4 CPU-only from source ──
+commands_pre =
+    # 1) clone exactly vLLM v0.8.4
+    rm -rf {envtmpdir}/vllm_source
+    git clone --branch v0.8.4 \
+      https://github.com/vllm-project/vllm.git {envtmpdir}/vllm_source
+
+    # 2) install its Python build deps
+    {envpython} -m pip install --upgrade pip
+    {envpython} -m pip install "cmake>=3.26" wheel packaging ninja "setuptools-scm>=8" numpy
+    {envpython} -m pip install -v -r {envtmpdir}/vllm_source/requirements/cpu.txt \
+        --extra-index-url https://download.pytorch.org/whl/cpu
+
+    # 3) build & install vLLM in CPU mode
+    sh -c "cd {envtmpdir}/vllm_source && VLLM_TARGET_DEVICE=cpu {envpython} setup.py install"
+    #{envpython} -m pip install {envtmpdir}/vllm_source
+
+commands = pytest -s --cov=vllm_detector_adapter --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests} -W error::UserWarning
 ; -W ignore::DeprecationWarning
 
+
 ; Unclear: We probably want to test wheel packaging
 ; But! tox will fail when this is set and _any_ interpreter is missing
 ; Without this, sdist packaging is tested so that's a start.