You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Support ray/dynamo nightly + vLLM 0.22 (cu129) across all extras
Enable the Ray 3.0 nightly + ai-dynamo nightly + vLLM 0.22 inference stack
on the CUDA-12.9 image while keeping the full Curator dependency set
(`uv sync --all-extras --all-groups`) resolvable and buildable.
pyproject.toml:
- ray: track the 3.0.0.dev0 nightly wheel (rolling /latest/ URL)
- ai-dynamo and ai-dynamo-runtime >=1.3.0.dev0, both first-party so
prerelease="if-necessary-or-explicit" enables the newest nightly without
blanket prereleases (runtime is a transitive with stable releases, so it
needs an explicit marker or uv backtracks to an older dynamo dev)
- vLLM 0.22.0+cu129 via a dedicated cu129 wheel index + tool.uv.sources
(default vLLM is now cu130; keep torch/vllm on CUDA 12.9)
- drop nixl-cu13: ray[llm]/nixl hard-pin the CUDA-13 NIXL backend, whose
eager `import nixl_ep` dlopens the absent libcudart.so.13 on cu12.9; keep
the nixl meta + nixl-cu12 backend
- opencv-python -> opencv-python-headless (no libGL/GPL GUI/FFmpeg bundling;
matches vllm/mistral_common/albumentations)
- bump torch/torchvision/torchaudio/torchcodec to the 2.11 cu129 line
dynamo actor venv runtime_env (vllm.py): Ray builds it via a bare
`uv pip install ai-dynamo[vllm]` that ignores pyproject, so force cu129 the
way uv/vLLM document: --torch-backend cu129, unsafe-best-match (needed for
nixl's split index resolution), and a per-version cu129 vllm index derived
from ai-dynamo's own pin; the --override file pins ray== and drops nixl-cu13.
Signed-off-by: Praateek <praateekm@gmail.com>
Copy file name to clipboardExpand all lines: pyproject.toml
+33-12Lines changed: 33 additions & 12 deletions
Original file line number
Diff line number
Diff line change
@@ -66,7 +66,7 @@ dependencies = [
66
66
"openai>=1.0.0",
67
67
"pandas>=2.1.0",
68
68
"pyarrow",
69
-
"ray[default,data]>=2.55.1",
69
+
"ray[default,data] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp313-cp313-manylinux2014_x86_64.whl ; python_version == '3.13' and platform_machine == 'x86_64' and platform_system != 'Darwin'",
70
70
"torch",
71
71
"transformers",
72
72
]
@@ -76,14 +76,18 @@ cuda12 = [
76
76
"gpustat",
77
77
"nvidia-ml-py",
78
78
]
79
-
vllm = ["vllm>=0.14.1; (platform_machine == 'x86_64' and platform_system != 'Darwin')"]
79
+
vllm = ["vllm[flashinfer,runai,otel]==0.22.0+cu129; (platform_machine == 'x86_64' and platform_system != 'Darwin')"]
80
80
81
81
# Inference Server (Ray Serve + vLLM) - for serving LLMs alongside Curator pipelines
82
82
inference_server = [
83
83
"nemo_curator[cuda12]",
84
84
"nemo_curator[vllm]",
85
-
"vllm<0.19; (platform_machine == 'x86_64' and platform_system != 'Darwin')", # Ray Serve LLM 2.55.1 isn't compatible with vllm 0.19+
86
-
"ai-dynamo==1.1.0; (platform_machine == 'x86_64' and platform_system != 'Darwin')", # pin so the Dynamo actor venv resolves to the same release we test against; gated to x86_64 since vllm wheels are x86_64-only
85
+
"ai-dynamo>=1.3.0.dev0; (platform_machine == 'x86_64' and platform_system != 'Darwin')",
86
+
# First-party + explicit .dev0 marker so prerelease="if-necessary-or-explicit" enables
87
+
# nightlies for ai-dynamo-runtime too. ai-dynamo pins it (==<its dev>), but it's a
88
+
# transitive with stable releases, so without this the newest dynamo nightly can't
89
+
# resolve (its runtime pin is a disallowed prerelease) and uv falls back to an older dev.
90
+
"ai-dynamo-runtime>=1.3.0.dev0; (platform_machine == 'x86_64' and platform_system != 'Darwin')",
87
91
"boto3>=1.35", # Get rid once https://github.com/ray-project/ray/issues/61269 is fixed
88
92
"nixl-cu12>=0.10.0; (platform_machine == 'x86_64' and platform_system != 'Darwin')",
89
93
"ray[serve,llm]>=2.55.1",
@@ -216,7 +220,7 @@ text_cuda12 = [
216
220
# Video Curation Dependencies
217
221
video_cpu = [
218
222
"av==13.1.0",
219
-
"opencv-python",
223
+
"opencv-python-headless", # headless: no GUI/FFmpeg (GPL) bundling or libGL system dep; identical for pipeline use and matches vllm/mistral_common/albumentations
220
224
"torchvision",
221
225
"einops",
222
226
"easydict",
@@ -230,7 +234,7 @@ video_cuda12 = [
230
234
"flash-attn<=2.8.3; (platform_machine == 'x86_64' and platform_system != 'Darwin')",
231
235
"pycuda",
232
236
"PyNvVideoCodec==2.0.2; (platform_machine == 'x86_64' and platform_system != 'Darwin')",
233
-
"torch<=2.10.0",
237
+
"torch<=2.11.0",
234
238
"torchaudio",
235
239
]
236
240
@@ -252,7 +256,7 @@ interleaved_cpu = [
252
256
"albumentations",
253
257
"matplotlib",
254
258
"open_clip_torch",
255
-
"opencv-python",
259
+
"opencv-python-headless", # headless: no GUI/FFmpeg (GPL) bundling or libGL system dep; identical for pipeline use and matches vllm/mistral_common/albumentations
"numpy>=2.0.0,<=2.2.0", # Override nemo-toolkits constraint of <2.0.0, upperbounds for Numba compatibility
351
+
"numba==0.65.0", # Override RAPIDS/legacy caps for the inference image; vLLM 0.22 requires numba 0.65.0
343
352
"protobuf>=5.29.5,<7.0", # Override nemo-toolkits constraint of ~=5.29.5; <7.0 due to ray serve FieldDescriptor API breakage
344
353
"setuptools>=80.10.1", # Override setuptools range in other dependencies to address CVE GHSA-58pv-8j8x-9vj2
345
-
"torch==2.10.0", #Override whisperx's <2.9 cap to match cu129 / vllm 0.18.x
346
-
"torchaudio==2.10.0", # Override whisperx's <2.9 cap to match cu129 / vllm 0.18.x
347
-
"torchvision==0.25.0", # Match torch==2.10.0
348
-
"torchcodec~=0.10.0; platform_machine == 'x86_64' and platform_system != 'Darwin'", # pin to torchcodec 0.10.x for torch 2.10 ABI compatibility — torchcodec doesn't declare a torch dep, so the resolver can't enforce the match; satisfies pyannote-audio's >=0.7.0 floor; x86_64-only since aarch64 lacks wheels
354
+
"torch==2.11.0; sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')", #Match vLLM's CUDA requirements; Linux resolves to cu129 via tool.uv.sources
355
+
"torchaudio==2.11.0; sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')", # Match torch==2.11.0
356
+
"torchvision==0.26.0; sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')", # Match torch==2.11.0
357
+
"torchcodec~=0.11.0; platform_machine == 'x86_64' and platform_system != 'Darwin'", # pin to torchcodec 0.11.x for torch 2.11 ABI compatibility; torchcodec does not declare a torch dep, so the resolver cannot enforce the match; satisfies pyannote-audio's >=0.7.0 floor; x86_64-only since aarch64 lacks wheels
349
358
"nixl-cu12>=0.10.0; (platform_machine == 'x86_64' and platform_system != 'Darwin')", # Override ray[llm]'s unconditional nixl dep for ARM
359
+
"nixl-cu13; sys_platform == 'never'", # ray[llm]/nixl hard-pin the CUDA-13 NIXL backend. On this CUDA-12.9 image vLLM's eager `import nixl_ep` would load cu13's nixl_ep_cpp.so and dlopen the absent libcudart.so.13. Drop it; the nixl meta + nixl-cu12 backend (nixl's own default) remain.
350
360
"xgrammar>=0.1.32", # Override vllm's ==0.1.29 pin to address CVE GHSA-7rgv-gqhr-fxg3 (DoS via multi-layer nesting)
351
361
]
352
362
@@ -365,6 +375,11 @@ name = "pytorch"
365
375
url = "https://download.pytorch.org/whl/cu129"
366
376
explicit = true
367
377
378
+
[[tool.uv.index]]
379
+
name = "vllm-cu129"
380
+
url = "https://wheels.vllm.ai/0.22.0/cu129"
381
+
explicit = true
382
+
368
383
[tool.uv.sources]
369
384
torch = [
370
385
{ index = "pytorch", marker = "sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')" },
@@ -382,6 +397,12 @@ torchcodec = [
382
397
{ index = "pytorch", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'" },
383
398
{ index = "pypi", marker = "platform_machine != 'x86_64' or sys_platform == 'darwin'" },
384
399
]
400
+
ai-dynamo = { index = "nvidia" }
401
+
ai-dynamo-runtime = { index = "nvidia" }
402
+
vllm = [
403
+
{ index = "vllm-cu129", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'" },
404
+
{ index = "pypi", marker = "platform_machine != 'x86_64' or sys_platform == 'darwin'" },
0 commit comments