NVIDIA-NeMo · praateekmahajan · Jun 10, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 16, 2026
@@ -252,7 +252,7 @@ entries:
       --input-path={dataset:tinystories,parquet}
       --dataset-size-gb=10
       --model-identifier=google/embeddinggemma-300m
-      --model-variation=vllm_text
+      --model-variation=vllm_text_pretokenized
       --cache-dir={dataset:text_models_hf_cache,files}
     timeout_s: 800
     sink_data:
@@ -278,7 +278,7 @@ entries:
       --input-path={dataset:tinystories,parquet}
       --dataset-size-gb=10
       --model-identifier=google/embeddinggemma-300m
-      --model-variation=vllm_text
+      --model-variation=vllm_text_pretokenized
       --cache-dir={dataset:text_models_hf_cache,files}
     timeout_s: 800
     sink_data:
@@ -875,7 +875,7 @@ entries:
       --video-dir={dataset:videos,mp4}
       --model-dir={dataset:videos_model_weights,files}
       --video-limit=1000
-    timeout_s: 400
+    timeout_s: 900
     sink_data:
       - name: slack
         ping_on_failure:

@@ -72,11 +72,9 @@ COPY docker/common/install_etcd_nats.sh .
 RUN bash install_etcd_nats.sh && \
     rm install_etcd_nats.sh
 
-# Install HAProxy so Ray Serve's HAProxy ingress mode (RAY_SERVE_ENABLE_HA_PROXY=1) has the binary on PATH
-COPY docker/common/install_haproxy.sh .
-RUN bash install_haproxy.sh && \
-    rm install_haproxy.sh
-ENV RAY_SERVE_HAPROXY_BINARY_PATH=/usr/local/bin/haproxy
+# Use the ray-haproxy wheel so Ray Serve's HAProxy ingress mode can discover
+# the packaged binary without compiling HAProxy in the image.
+ENV RAY_SERVE_EXPERIMENTAL_PIP_HAPROXY=1
 
 
 FROM nemo_curator_dep AS nemo_curator

@@ -37,6 +37,35 @@
 os.environ["RAY_MAX_LIMIT_FROM_API_SERVER"] = str(API_LIMIT)
 os.environ["RAY_MAX_LIMIT_FROM_DATA_SOURCE"] = str(API_LIMIT)
 
+
+def _ensure_ray_dashboard_frontend() -> None:
+    """Stub Ray's dashboard frontend dir once (nightly ray only), before any cluster starts.
+
+    Ray *nightly* wheels omit the prebuilt dashboard frontend (``dashboard/client/build``
+    is an npm artifact built only for releases), so the dashboard process dies with
+    ``FrontendNotFoundError`` and its state API server never registers — which breaks
+    every ``ray.util.state`` call (Xenna drives pipelines through it) with "Could not
+    read 'dashboard' from GCS". Creating the dir (relative to the installed ``ray``, so
+    it works in any venv) lets the dashboard start; the web UI itself is unused.
+
+    Gated to dev/nightly builds so published releases (which ship the frontend) are
+    untouched. Best-effort: a read-only install must not break ``import``.
+    """
+    import contextlib
+    from pathlib import Path
+
+    import ray
+    from packaging.version import Version
+
+    if not Version(ray.__version__).is_devrelease:
+        return
+    # Best-effort: a read-only install must not break ``import nemo_curator``.
+    with contextlib.suppress(OSError):
+        (Path(ray.__file__).parent / "dashboard" / "client" / "build" / "static").mkdir(parents=True, exist_ok=True)
+
+
+_ensure_ray_dashboard_frontend()
+
 # Raise an informative error early to users on unsupported systems
 if sys.platform != "linux":
     _msg = (

@@ -16,6 +16,7 @@
 
 from __future__ import annotations
 
+import importlib.metadata
 import json
 import tempfile
 from functools import reduce
@@ -24,6 +25,7 @@
 
 import ray
 from loguru import logger
+from packaging.requirements import InvalidRequirement, Requirement
 
 from nemo_curator.core.serve.base import BaseModelConfig
 from nemo_curator.core.serve.dynamo.infra import (
@@ -50,19 +52,91 @@
     from nemo_curator.core.serve.placement import ReplicaBundleSpec
 
 
-# ai-dynamo[vllm]'s [vllm] extra carries a hard ray pin, but Ray refuses
-# actor venvs whose ray version differs from the cluster head's. uv has no
-# inline override syntax — only ``--override <file>`` — so we materialize a
-# tiny constraints file at a fixed path on every node via
-# ``ensure_actor_overrides_on_all_nodes``; the content is derived from the
-# driver's ``ray.__version__`` at fan-out time so a future Curator ray bump
-# doesn't need a code change here.
+# The actor venv ``uv pip install`` needs overrides that pyproject's ``[tool.uv]``
+# can't reach (Ray runs it in an empty cwd). uv has no inline override syntax —
+# only ``--override <file>`` — so we materialize a constraints file at a fixed path
+# on every node via ``ensure_actor_overrides_on_all_nodes``. It carries:
+#   * ``ray==<driver version>`` — ai-dynamo[vllm]'s [vllm] extra has a hard ray pin,
+#     but Ray refuses actor venvs whose ray differs from the cluster head's. Derived
+#     from the driver's ``ray.__version__`` so a future Curator ray bump needs no edit.
+#   * ``nixl-cu13`` dropped — ai-dynamo[vllm] pulls the CUDA-13 NIXL backend, whose
+#     eagerly-imported ``nixl_ep_cpp.so`` dlopens libcudart.so.13 (absent on this
+#     CUDA-12.9 image). The base image excludes it via pyproject, but that override
+#     doesn't reach this standalone install; re-apply it here so the cu12 backend wins.
 _ACTOR_VENV_OVERRIDES_PATH = Path(tempfile.gettempdir()) / "nemo_curator_dynamo_actor_overrides.txt"
+_ACTOR_VENV_NIXL_CU13_EXCLUSION = "nixl-cu13 ; sys_platform == 'never'"
+# The CUDA build the actor venv must match (torch ecosystem + vllm wheel variant).
+_ACTOR_VENV_CUDA_TAG = "cu129"
+# Latest known nightly that includes ai-dynamo/dynamo#10380 while ai-dynamo[vllm]
+# still pins vLLM 0.22.x. Newer 1.3.0 nightlies moved to vLLM 0.23.0.
+_DYNAMO_NIGHTLY_VERSION = "1.3.0.dev20260615"
+
+
+def _vllm_cu129_index_url() -> str | None:
+    """The vLLM cu129 wheel index for the exact version ai-dynamo[vllm] pins.
+
+    ai-dynamo's [vllm] extra pins an exact vllm (e.g. ``==0.22.1``) that may
+    differ from Curator's base vllm — the base installs ai-dynamo WITHOUT its
+    [vllm] extra, so its vllm comes from Curator's own pin, while the actor
+    venv installs ``ai-dynamo[vllm]`` and must honor ai-dynamo's pin. vLLM
+    publishes a per-version cu129 wheel index at ``wheels.vllm.ai/<v>/cu129``;
+    pointing at the pinned version means its ``+cu129`` local build sorts above
+    the default cu130 wheel under unsafe-best-match. Derived from ai-dynamo's
+    own metadata so a nightly bump (which changes the vllm pin) needs no edit.
+
+    Returns None if ai-dynamo (or its vllm pin) can't be found — only happens
+    when the dynamo backend isn't actually installed, where this is unused.
+    """
+    try:
+        requirements = importlib.metadata.requires("ai-dynamo") or []
+    except importlib.metadata.PackageNotFoundError:
+        return None
+    for raw in requirements:
+        try:
+            req = Requirement(raw)
+        except InvalidRequirement:
+            continue  # a malformed Requires-Dist line must not break module import
+        # Match vllm only as it applies under the [vllm] extra we install (skip a vllm
+        # pin that some other ai-dynamo extra might add under a different marker).
+        if req.name != "vllm" or (req.marker is not None and not req.marker.evaluate({"extra": "vllm"})):
+            continue
+        pinned = next((spec.version for spec in req.specifier if spec.operator in ("==", "===")), None)
+        if pinned:
+            return f"https://wheels.vllm.ai/{pinned}/{_ACTOR_VENV_CUDA_TAG}"
+    return None
+
+
+# Ray builds the actor venv with a bare ``uv pip install`` in an empty cwd, so it
+# inherits none of the project's ``[tool.uv]`` index/source/prerelease config — only
+# what we pass here. Force CUDA 12.9 the way vLLM documents for uv: --torch-backend
+# routes the torch ecosystem to the cu129 index, and the per-version cu129 vllm index
+# (see ``_vllm_cu129_index_url``) keeps vllm on cu129. ``unsafe-best-match`` is REQUIRED
+# so nixl resolves (its version is split across pypi.nvidia.com and PyPI, which the
+# default first-match strategy can't combine).
+_ACTOR_VENV_UV_OPTIONS = [
+    "--override",
+    str(_ACTOR_VENV_OVERRIDES_PATH),
+    "--torch-backend",
+    _ACTOR_VENV_CUDA_TAG,
+    "--index-strategy",
+    "unsafe-best-match",
+    "--prerelease",
+    "if-necessary-or-explicit",
+    *(
+        arg
+        for url in ("https://pypi.nvidia.com", _vllm_cu129_index_url())
+        if url is not None
+        for arg in ("--extra-index-url", url)
+    ),
+]
 
 DYNAMO_VLLM_RUNTIME_ENV: dict[str, Any] = {
     "uv": {
-        "packages": ["ai-dynamo[vllm]"],
-        "uv_pip_install_options": ["--override", str(_ACTOR_VENV_OVERRIDES_PATH)],
+        "packages": [
+            f"ai-dynamo[vllm]=={_DYNAMO_NIGHTLY_VERSION}",
+            f"ai-dynamo-runtime=={_DYNAMO_NIGHTLY_VERSION}",
+        ],
+        "uv_pip_install_options": _ACTOR_VENV_UV_OPTIONS,
     },
     "config": {"setup_timeout_seconds": 600},
 }
@@ -78,7 +152,8 @@ def ensure_actor_overrides_on_all_nodes(*, ignore_head_node: bool = False) -> No
 
     The file pins ``ray=={ray.__version__}`` (read from the driver) so the
     actor venv keeps the same ray patch as the cluster head — Ray rejects
-    any mismatch.
+    any mismatch — and drops ``nixl-cu13`` so the cu12 NIXL backend is used
+    (see module comment on :data:`_ACTOR_VENV_OVERRIDES_PATH`).
 
     Must run inside an active Ray context, before any worker spawned with
     :data:`DYNAMO_VLLM_RUNTIME_ENV` lands. The runtime_env_agent on each
@@ -91,7 +166,7 @@ def ensure_actor_overrides_on_all_nodes(*, ignore_head_node: bool = False) -> No
     run_on_each_node(
         _write_actor_overrides_file,
         str(_ACTOR_VENV_OVERRIDES_PATH),
-        f"ray=={ray.__version__}\n",
+        f"ray=={ray.__version__}\n{_ACTOR_VENV_NIXL_CU13_EXCLUSION}\n",
         ignore_head_node=ignore_head_node,
     )
 

@@ -36,7 +36,7 @@ def __init__(  # noqa: PLR0913
         model_identifier: str,
         vllm_init_kwargs: dict[str, Any] | None = None,
         text_field: str = "text",
-        pretokenize: bool = False,
+        pretokenize: bool = True,
         embedding_field: str = "embeddings",
         max_chars: int | None = None,
         cache_dir: str | None = None,