Skip to content

Commit aaa42ad

Browse files
Support ray/dynamo nightly + vLLM 0.22 (cu129) across all extras
Enable the Ray 3.0 nightly + ai-dynamo nightly + vLLM 0.22 inference stack on the CUDA-12.9 image while keeping the full Curator dependency set (`uv sync --all-extras --all-groups`) resolvable and buildable. pyproject.toml: - ray: track the 3.0.0.dev0 nightly wheel (rolling /latest/ URL) - ai-dynamo and ai-dynamo-runtime >=1.3.0.dev0, both first-party so prerelease="if-necessary-or-explicit" enables the newest nightly without blanket prereleases (runtime is a transitive with stable releases, so it needs an explicit marker or uv backtracks to an older dynamo dev) - vLLM 0.22.0+cu129 via a dedicated cu129 wheel index + tool.uv.sources (default vLLM is now cu130; keep torch/vllm on CUDA 12.9) - drop nixl-cu13: ray[llm]/nixl hard-pin the CUDA-13 NIXL backend, whose eager `import nixl_ep` dlopens the absent libcudart.so.13 on cu12.9; keep the nixl meta + nixl-cu12 backend - opencv-python -> opencv-python-headless (no libGL/GPL GUI/FFmpeg bundling; matches vllm/mistral_common/albumentations) - bump torch/torchvision/torchaudio/torchcodec to the 2.11 cu129 line dynamo actor venv runtime_env (vllm.py): Ray builds it via a bare `uv pip install ai-dynamo[vllm]` that ignores pyproject, so force cu129 the way uv/vLLM document: --torch-backend cu129, unsafe-best-match (needed for nixl's split index resolution), and a per-version cu129 vllm index derived from ai-dynamo's own pin; the --override file pins ray== and drops nixl-cu13. Signed-off-by: Praateek <praateekm@gmail.com>
1 parent 8457b78 commit aaa42ad

4 files changed

Lines changed: 1911 additions & 664 deletions

File tree

nemo_curator/core/serve/dynamo/vllm.py

Lines changed: 74 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from __future__ import annotations
1818

19+
import importlib.metadata
1920
import json
2021
import tempfile
2122
from functools import reduce
@@ -24,6 +25,7 @@
2425

2526
import ray
2627
from loguru import logger
28+
from packaging.requirements import Requirement
2729

2830
from nemo_curator.core.serve.base import BaseModelConfig
2931
from nemo_curator.core.serve.dynamo.infra import (
@@ -50,19 +52,80 @@
5052
from nemo_curator.core.serve.placement import ReplicaBundleSpec
5153

5254

53-
# ai-dynamo[vllm]'s [vllm] extra carries a hard ray pin, but Ray refuses
54-
# actor venvs whose ray version differs from the cluster head's. uv has no
55-
# inline override syntax — only ``--override <file>`` — so we materialize a
56-
# tiny constraints file at a fixed path on every node via
57-
# ``ensure_actor_overrides_on_all_nodes``; the content is derived from the
58-
# driver's ``ray.__version__`` at fan-out time so a future Curator ray bump
59-
# doesn't need a code change here.
55+
# The actor venv ``uv pip install`` needs overrides that pyproject's ``[tool.uv]``
56+
# can't reach (Ray runs it in an empty cwd). uv has no inline override syntax —
57+
# only ``--override <file>`` — so we materialize a constraints file at a fixed path
58+
# on every node via ``ensure_actor_overrides_on_all_nodes``. It carries:
59+
# * ``ray==<driver version>`` — ai-dynamo[vllm]'s [vllm] extra has a hard ray pin,
60+
# but Ray refuses actor venvs whose ray differs from the cluster head's. Derived
61+
# from the driver's ``ray.__version__`` so a future Curator ray bump needs no edit.
62+
# * ``nixl-cu13`` dropped — ai-dynamo[vllm] pulls the CUDA-13 NIXL backend, whose
63+
# eagerly-imported ``nixl_ep_cpp.so`` dlopens libcudart.so.13 (absent on this
64+
# CUDA-12.9 image). The base image excludes it via pyproject, but that override
65+
# doesn't reach this standalone install; re-apply it here so the cu12 backend wins.
6066
_ACTOR_VENV_OVERRIDES_PATH = Path(tempfile.gettempdir()) / "nemo_curator_dynamo_actor_overrides.txt"
67+
_ACTOR_VENV_NIXL_CU13_EXCLUSION = "nixl-cu13 ; sys_platform == 'never'"
68+
# The CUDA build the actor venv must match (torch ecosystem + vllm wheel variant).
69+
_ACTOR_VENV_CUDA_TAG = "cu129"
70+
71+
72+
def _vllm_cu129_index_url() -> str | None:
73+
"""The vLLM cu129 wheel index for the exact version ai-dynamo[vllm] pins.
74+
75+
ai-dynamo's [vllm] extra pins an exact vllm (e.g. ``==0.22.1``) that may
76+
differ from Curator's base vllm — the base installs ai-dynamo WITHOUT its
77+
[vllm] extra, so its vllm comes from Curator's own pin, while the actor
78+
venv installs ``ai-dynamo[vllm]`` and must honor ai-dynamo's pin. vLLM
79+
publishes a per-version cu129 wheel index at ``wheels.vllm.ai/<v>/cu129``;
80+
pointing at the pinned version means its ``+cu129`` local build sorts above
81+
the default cu130 wheel under unsafe-best-match. Derived from ai-dynamo's
82+
own metadata so a nightly bump (which changes the vllm pin) needs no edit.
83+
84+
Returns None if ai-dynamo (or its vllm pin) can't be found — only happens
85+
when the dynamo backend isn't actually installed, where this is unused.
86+
"""
87+
try:
88+
requirements = importlib.metadata.requires("ai-dynamo") or []
89+
except importlib.metadata.PackageNotFoundError:
90+
return None
91+
for raw in requirements:
92+
req = Requirement(raw)
93+
if req.name != "vllm":
94+
continue
95+
pinned = next((spec.version for spec in req.specifier if spec.operator in ("==", "===")), None)
96+
if pinned:
97+
return f"https://wheels.vllm.ai/{pinned}/{_ACTOR_VENV_CUDA_TAG}"
98+
return None
99+
100+
101+
# Ray builds the actor venv with a bare ``uv pip install`` in an empty cwd, so it
102+
# inherits none of the project's ``[tool.uv]`` index/source/prerelease config — only
103+
# what we pass here. Force CUDA 12.9 the way vLLM documents for uv: --torch-backend
104+
# routes the torch ecosystem to the cu129 index, and the per-version cu129 vllm index
105+
# (see ``_vllm_cu129_index_url``) keeps vllm on cu129. ``unsafe-best-match`` is REQUIRED
106+
# so nixl resolves (its version is split across pypi.nvidia.com and PyPI, which the
107+
# default first-match strategy can't combine).
108+
_ACTOR_VENV_UV_OPTIONS = [
109+
"--override",
110+
str(_ACTOR_VENV_OVERRIDES_PATH),
111+
"--torch-backend",
112+
_ACTOR_VENV_CUDA_TAG,
113+
"--index-strategy",
114+
"unsafe-best-match",
115+
"--prerelease",
116+
"if-necessary-or-explicit",
117+
*(
118+
arg
119+
for url in ("https://pypi.nvidia.com", _vllm_cu129_index_url())
120+
if url is not None
121+
for arg in ("--extra-index-url", url)
122+
),
123+
]
61124

62125
DYNAMO_VLLM_RUNTIME_ENV: dict[str, Any] = {
63126
"uv": {
64127
"packages": ["ai-dynamo[vllm]"],
65-
"uv_pip_install_options": ["--override", str(_ACTOR_VENV_OVERRIDES_PATH)],
128+
"uv_pip_install_options": _ACTOR_VENV_UV_OPTIONS,
66129
},
67130
"config": {"setup_timeout_seconds": 600},
68131
}
@@ -78,7 +141,8 @@ def ensure_actor_overrides_on_all_nodes(*, ignore_head_node: bool = False) -> No
78141
79142
The file pins ``ray=={ray.__version__}`` (read from the driver) so the
80143
actor venv keeps the same ray patch as the cluster head — Ray rejects
81-
any mismatch.
144+
any mismatch — and drops ``nixl-cu13`` so the cu12 NIXL backend is used
145+
(see module comment on :data:`_ACTOR_VENV_OVERRIDES_PATH`).
82146
83147
Must run inside an active Ray context, before any worker spawned with
84148
:data:`DYNAMO_VLLM_RUNTIME_ENV` lands. The runtime_env_agent on each
@@ -91,7 +155,7 @@ def ensure_actor_overrides_on_all_nodes(*, ignore_head_node: bool = False) -> No
91155
run_on_each_node(
92156
_write_actor_overrides_file,
93157
str(_ACTOR_VENV_OVERRIDES_PATH),
94-
f"ray=={ray.__version__}\n",
158+
f"ray=={ray.__version__}\n{_ACTOR_VENV_NIXL_CU13_EXCLUSION}\n",
95159
ignore_head_node=ignore_head_node,
96160
)
97161

pyproject.toml

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ dependencies = [
6666
"openai>=1.0.0",
6767
"pandas>=2.1.0",
6868
"pyarrow",
69-
"ray[default,data]>=2.55.1",
69+
"ray[default,data] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp313-cp313-manylinux2014_x86_64.whl ; python_version == '3.13' and platform_machine == 'x86_64' and platform_system != 'Darwin'",
7070
"torch",
7171
"transformers",
7272
]
@@ -76,14 +76,18 @@ cuda12 = [
7676
"gpustat",
7777
"nvidia-ml-py",
7878
]
79-
vllm = ["vllm>=0.14.1; (platform_machine == 'x86_64' and platform_system != 'Darwin')"]
79+
vllm = ["vllm[flashinfer,runai,otel]==0.22.0+cu129; (platform_machine == 'x86_64' and platform_system != 'Darwin')"]
8080

8181
# Inference Server (Ray Serve + vLLM) - for serving LLMs alongside Curator pipelines
8282
inference_server = [
8383
"nemo_curator[cuda12]",
8484
"nemo_curator[vllm]",
85-
"vllm<0.19; (platform_machine == 'x86_64' and platform_system != 'Darwin')", # Ray Serve LLM 2.55.1 isn't compatible with vllm 0.19+
86-
"ai-dynamo==1.1.0; (platform_machine == 'x86_64' and platform_system != 'Darwin')", # pin so the Dynamo actor venv resolves to the same release we test against; gated to x86_64 since vllm wheels are x86_64-only
85+
"ai-dynamo>=1.3.0.dev0; (platform_machine == 'x86_64' and platform_system != 'Darwin')",
86+
# First-party + explicit .dev0 marker so prerelease="if-necessary-or-explicit" enables
87+
# nightlies for ai-dynamo-runtime too. ai-dynamo pins it (==<its dev>), but it's a
88+
# transitive with stable releases, so without this the newest dynamo nightly can't
89+
# resolve (its runtime pin is a disallowed prerelease) and uv falls back to an older dev.
90+
"ai-dynamo-runtime>=1.3.0.dev0; (platform_machine == 'x86_64' and platform_system != 'Darwin')",
8791
"boto3>=1.35", # Get rid once https://github.com/ray-project/ray/issues/61269 is fixed
8892
"nixl-cu12>=0.10.0; (platform_machine == 'x86_64' and platform_system != 'Darwin')",
8993
"ray[serve,llm]>=2.55.1",
@@ -216,7 +220,7 @@ text_cuda12 = [
216220
# Video Curation Dependencies
217221
video_cpu = [
218222
"av==13.1.0",
219-
"opencv-python",
223+
"opencv-python-headless", # headless: no GUI/FFmpeg (GPL) bundling or libGL system dep; identical for pipeline use and matches vllm/mistral_common/albumentations
220224
"torchvision",
221225
"einops",
222226
"easydict",
@@ -230,7 +234,7 @@ video_cuda12 = [
230234
"flash-attn<=2.8.3; (platform_machine == 'x86_64' and platform_system != 'Darwin')",
231235
"pycuda",
232236
"PyNvVideoCodec==2.0.2; (platform_machine == 'x86_64' and platform_system != 'Darwin')",
233-
"torch<=2.10.0",
237+
"torch<=2.11.0",
234238
"torchaudio",
235239
]
236240

@@ -252,7 +256,7 @@ interleaved_cpu = [
252256
"albumentations",
253257
"matplotlib",
254258
"open_clip_torch",
255-
"opencv-python",
259+
"opencv-python-headless", # headless: no GUI/FFmpeg (GPL) bundling or libGL system dep; identical for pipeline use and matches vllm/mistral_common/albumentations
256260
"Pillow",
257261
"pypdfium2",
258262
"s3fs>=2024.12.0",
@@ -290,7 +294,7 @@ all = [
290294
]
291295

292296
[dependency-groups]
293-
build = ["setuptools", "torch<=2.10.0", "Cython", "packaging"]
297+
build = ["setuptools", "torch<=2.11.0", "Cython", "packaging"]
294298
dev = ["jupyter"]
295299
linting = ["pre-commit", "ruff==0.14.10"]
296300
test = [
@@ -317,6 +321,10 @@ package = true
317321
managed = true
318322
default-groups = ["dev", "test"]
319323
index-strategy = "unsafe-best-match"
324+
# Default mode: only pick a prerelease when a requirement carries an explicit
325+
# prerelease marker (ai-dynamo>=1.3.0.dev0) or when every version in range is a
326+
# prerelease (ray nightly's otel transitives). Avoids blanket prereleases elsewhere.
327+
prerelease = "if-necessary-or-explicit"
320328
no-build-isolation-package = ["flash-attn"]
321329
constraint-dependencies = [
322330
"aiohttp>=3.13.3", # Addresses CVE GHSA-6mq8-rvhq-8wgg
@@ -340,13 +348,15 @@ override-dependencies = [
340348
"kaldiio; sys_platform == 'never'",
341349
"levenshtein; sys_platform == 'never'",
342350
"numpy>=2.0.0,<=2.2.0", # Override nemo-toolkits constraint of <2.0.0, upperbounds for Numba compatibility
351+
"numba==0.65.0", # Override RAPIDS/legacy caps for the inference image; vLLM 0.22 requires numba 0.65.0
343352
"protobuf>=5.29.5,<7.0", # Override nemo-toolkits constraint of ~=5.29.5; <7.0 due to ray serve FieldDescriptor API breakage
344353
"setuptools>=80.10.1", # Override setuptools range in other dependencies to address CVE GHSA-58pv-8j8x-9vj2
345-
"torch==2.10.0", # Override whisperx's <2.9 cap to match cu129 / vllm 0.18.x
346-
"torchaudio==2.10.0", # Override whisperx's <2.9 cap to match cu129 / vllm 0.18.x
347-
"torchvision==0.25.0", # Match torch==2.10.0
348-
"torchcodec~=0.10.0; platform_machine == 'x86_64' and platform_system != 'Darwin'", # pin to torchcodec 0.10.x for torch 2.10 ABI compatibilitytorchcodec doesn't declare a torch dep, so the resolver can't enforce the match; satisfies pyannote-audio's >=0.7.0 floor; x86_64-only since aarch64 lacks wheels
354+
"torch==2.11.0; sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')", # Match vLLM's CUDA requirements; Linux resolves to cu129 via tool.uv.sources
355+
"torchaudio==2.11.0; sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')", # Match torch==2.11.0
356+
"torchvision==0.26.0; sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')", # Match torch==2.11.0
357+
"torchcodec~=0.11.0; platform_machine == 'x86_64' and platform_system != 'Darwin'", # pin to torchcodec 0.11.x for torch 2.11 ABI compatibility; torchcodec does not declare a torch dep, so the resolver cannot enforce the match; satisfies pyannote-audio's >=0.7.0 floor; x86_64-only since aarch64 lacks wheels
349358
"nixl-cu12>=0.10.0; (platform_machine == 'x86_64' and platform_system != 'Darwin')", # Override ray[llm]'s unconditional nixl dep for ARM
359+
"nixl-cu13; sys_platform == 'never'", # ray[llm]/nixl hard-pin the CUDA-13 NIXL backend. On this CUDA-12.9 image vLLM's eager `import nixl_ep` would load cu13's nixl_ep_cpp.so and dlopen the absent libcudart.so.13. Drop it; the nixl meta + nixl-cu12 backend (nixl's own default) remain.
350360
"xgrammar>=0.1.32", # Override vllm's ==0.1.29 pin to address CVE GHSA-7rgv-gqhr-fxg3 (DoS via multi-layer nesting)
351361
]
352362

@@ -365,6 +375,11 @@ name = "pytorch"
365375
url = "https://download.pytorch.org/whl/cu129"
366376
explicit = true
367377

378+
[[tool.uv.index]]
379+
name = "vllm-cu129"
380+
url = "https://wheels.vllm.ai/0.22.0/cu129"
381+
explicit = true
382+
368383
[tool.uv.sources]
369384
torch = [
370385
{ index = "pytorch", marker = "sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')" },
@@ -382,6 +397,12 @@ torchcodec = [
382397
{ index = "pytorch", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'" },
383398
{ index = "pypi", marker = "platform_machine != 'x86_64' or sys_platform == 'darwin'" },
384399
]
400+
ai-dynamo = { index = "nvidia" }
401+
ai-dynamo-runtime = { index = "nvidia" }
402+
vllm = [
403+
{ index = "vllm-cu129", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'" },
404+
{ index = "pypi", marker = "platform_machine != 'x86_64' or sys_platform == 'darwin'" },
405+
]
385406
nixl = { index = "pypi" }
386407
nixl-cu12 = { index = "pypi" }
387408

tests/core/serve/dynamo/test_vllm.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -346,12 +346,13 @@ class TestEnsureActorOverridesOnAllNodes:
346346
``--override`` constraints file before workers are spawned."""
347347

348348
def test_writes_current_ray_version_at_path(self, shared_ray_client: None, tmp_path: Path) -> None:
349-
"""The fan-out writes ``ray=={ray.__version__}`` at the configured
350-
path on every alive node. Catches regressions where the content is
351-
hardcoded and silently drifts after a Curator ray bump.
349+
"""The fan-out writes ``ray=={ray.__version__}`` plus the nixl-cu13
350+
exclusion at the configured path on every alive node. Catches
351+
regressions where the content is hardcoded and silently drifts after
352+
a Curator ray bump.
352353
"""
353354
override_path = tmp_path / "override.txt"
354355
with mock.patch.object(dynamo_vllm, "_ACTOR_VENV_OVERRIDES_PATH", override_path):
355356
dynamo_vllm.ensure_actor_overrides_on_all_nodes()
356357

357-
assert override_path.read_text() == f"ray=={ray.__version__}\n"
358+
assert override_path.read_text() == f"ray=={ray.__version__}\n{dynamo_vllm._ACTOR_VENV_NIXL_CU13_EXCLUSION}\n"

0 commit comments

Comments
 (0)