Merge branch 'main' into fix/tpu7x-chip-counting

burbajr · web-flow · commit f70ba2e27d98 · 2025-12-08T17:37:24.000-07:00
diff --git a/README.md b/README.md
@@ -11,14 +11,12 @@
 
 ---
 
-_Upcoming Events_ 🔥
-
-- Join us at the [PyTorch Conference, October 22-23](https://events.linuxfoundation.org/pytorch-conference/) in San Francisco!
-- Join us at [Ray Summit, November 3-5](https://www.anyscale.com/ray-summit/2025) in San Francisco!
-- Join us at [JAX DevLab on November 18th](https://rsvp.withgoogle.com/events/devlab-fall-2025) in Sunnyvale!
-
 _Latest News_ 🔥
 
+- [Pytorch Conference](https://pytorchconference.sched.com/event/27QCh/sponsored-session-everything-everywhere-all-at-once-vllm-hardware-optionality-with-spotify-and-google-brittany-rockwell-google-shireen-kheradpey-spotify) Learn how Spotify uses vLLM with both GPUs and TPUs to drive down costs and improve user experience.
+- Check back soon for a recording of our session at [Ray Summit, November 3-5](https://www.anyscale.com/ray-summit/2025) in San Francisco!
+- Check back soon for a recording of our session at [JAX DevLab on November 18th](https://rsvp.withgoogle.com/events/devlab-fall-2025) in Sunnyvale!
+
 - [2025/10] [vLLM TPU: A New Unified Backend Supporting PyTorch and JAX on TPU](https://blog.vllm.ai/2025/10/16/vllm-tpu.html)
 
 <details>
diff --git a/tests/test_envs.py b/tests/test_envs.py
@@ -87,6 +87,77 @@ def test_boolean_env_vars(monkeypatch: pytest.MonkeyPatch):
     assert envs.USE_MOE_EP_KERNEL is True
 
 
+def test_boolean_env_vars_string_values(monkeypatch: pytest.MonkeyPatch):
+    """Test that boolean env vars accept string values like 'True' and 'False'"""
+
+    # Test NEW_MODEL_DESIGN with string "True"
+    monkeypatch.setenv("NEW_MODEL_DESIGN", "True")
+    assert envs.NEW_MODEL_DESIGN is True
+
+    monkeypatch.setenv("NEW_MODEL_DESIGN", "true")
+    assert envs.NEW_MODEL_DESIGN is True
+
+    monkeypatch.setenv("NEW_MODEL_DESIGN", "False")
+    assert envs.NEW_MODEL_DESIGN is False
+
+    monkeypatch.setenv("NEW_MODEL_DESIGN", "false")
+    assert envs.NEW_MODEL_DESIGN is False
+
+    # Test SKIP_JAX_PRECOMPILE with string values
+    monkeypatch.setenv("SKIP_JAX_PRECOMPILE", "True")
+    assert envs.SKIP_JAX_PRECOMPILE is True
+
+    monkeypatch.setenv("SKIP_JAX_PRECOMPILE", "false")
+    assert envs.SKIP_JAX_PRECOMPILE is False
+
+    # Test VLLM_XLA_CHECK_RECOMPILATION with string values
+    monkeypatch.setenv("VLLM_XLA_CHECK_RECOMPILATION", "TRUE")
+    assert envs.VLLM_XLA_CHECK_RECOMPILATION is True
+
+    monkeypatch.setenv("VLLM_XLA_CHECK_RECOMPILATION", "FALSE")
+    assert envs.VLLM_XLA_CHECK_RECOMPILATION is False
+
+    # Test USE_MOE_EP_KERNEL with string values
+    monkeypatch.setenv("USE_MOE_EP_KERNEL", "true")
+    assert envs.USE_MOE_EP_KERNEL is True
+
+    monkeypatch.setenv("USE_MOE_EP_KERNEL", "False")
+    assert envs.USE_MOE_EP_KERNEL is False
+
+
+def test_boolean_env_vars_invalid_values(monkeypatch: pytest.MonkeyPatch):
+    """Test that boolean env vars raise errors for invalid values"""
+
+    # Test invalid value for NEW_MODEL_DESIGN
+    monkeypatch.setenv("NEW_MODEL_DESIGN", "yes")
+    with pytest.raises(
+            ValueError,
+            match="Invalid boolean value 'yes' for NEW_MODEL_DESIGN"):
+        _ = envs.NEW_MODEL_DESIGN
+
+    monkeypatch.setenv("NEW_MODEL_DESIGN", "2")
+    with pytest.raises(ValueError,
+                       match="Invalid boolean value '2' for NEW_MODEL_DESIGN"):
+        _ = envs.NEW_MODEL_DESIGN
+
+    # Test invalid value for SKIP_JAX_PRECOMPILE
+    monkeypatch.setenv("SKIP_JAX_PRECOMPILE", "invalid")
+    with pytest.raises(
+            ValueError,
+            match="Invalid boolean value 'invalid' for SKIP_JAX_PRECOMPILE"):
+        _ = envs.SKIP_JAX_PRECOMPILE
+
+
+def test_boolean_env_vars_empty_string(monkeypatch: pytest.MonkeyPatch):
+    """Test that empty string returns default value"""
+
+    monkeypatch.setenv("NEW_MODEL_DESIGN", "")
+    assert envs.NEW_MODEL_DESIGN is False  # Should return default
+
+    monkeypatch.setenv("SKIP_JAX_PRECOMPILE", "")
+    assert envs.SKIP_JAX_PRECOMPILE is False  # Should return default
+
+
 def test_integer_env_vars(monkeypatch: pytest.MonkeyPatch):
     # Ensure clean environment for integer vars by setting to defaults
     monkeypatch.setenv("PYTHON_TRACER_LEVEL", "1")
diff --git a/tpu_inference/envs.py b/tpu_inference/envs.py
@@ -69,6 +69,34 @@ def _get_validated_env() -> str | None:
     return _get_validated_env
 
 
+def env_bool(env_name: str, default: bool = False) -> Callable[[], bool]:
+    """
+    Accepts both numeric strings ("0", "1") and boolean strings
+    ("true", "false", "True", "False").
+
+    Args:
+        env_name: Name of the environment variable
+        default: Default boolean value if not set
+    """
+
+    def _get_bool_env() -> bool:
+        value = os.getenv(env_name)
+        if value is None or value == "":
+            return default
+
+        value_lower = value.lower()
+        if value_lower in ("true", "1"):
+            return True
+        elif value_lower in ("false", "0"):
+            return False
+        else:
+            raise ValueError(
+                f"Invalid boolean value '{value}' for {env_name}. "
+                f"Valid options: '0', '1', 'true', 'false', 'True', 'False'.")
+
+    return _get_bool_env
+
+
 environment_variables: dict[str, Callable[[], Any]] = {
     # JAX platform selection (e.g., "tpu", "cpu", "proxy")
     "JAX_PLATFORMS":
@@ -93,17 +121,17 @@ def _get_validated_env() -> str | None:
     lambda: os.getenv("DECODE_SLICES", ""),
     # Skip JAX precompilation step during initialization
     "SKIP_JAX_PRECOMPILE":
-    lambda: bool(int(os.getenv("SKIP_JAX_PRECOMPILE") or "0")),
+    env_bool("SKIP_JAX_PRECOMPILE", default=False),
     # Check for XLA recompilation during execution
     "VLLM_XLA_CHECK_RECOMPILATION":
-    lambda: bool(int(os.getenv("VLLM_XLA_CHECK_RECOMPILATION") or "0")),
+    env_bool("VLLM_XLA_CHECK_RECOMPILATION", default=False),
     # Model implementation type (e.g., "flax_nnx")
     "MODEL_IMPL_TYPE":
     env_with_choices("MODEL_IMPL_TYPE", "flax_nnx",
                      ["vllm", "flax_nnx", "jetpack"]),
     # Enable new experimental model design
     "NEW_MODEL_DESIGN":
-    lambda: bool(int(os.getenv("NEW_MODEL_DESIGN") or "0")),
+    env_bool("NEW_MODEL_DESIGN", default=False),
     # Directory to store phased profiling output
     "PHASED_PROFILING_DIR":
     lambda: os.getenv("PHASED_PROFILING_DIR", ""),
@@ -112,7 +140,7 @@ def _get_validated_env() -> str | None:
     lambda: int(os.getenv("PYTHON_TRACER_LEVEL") or "1"),
     # Use custom expert-parallel kernel for MoE (Mixture of Experts)
     "USE_MOE_EP_KERNEL":
-    lambda: bool(int(os.getenv("USE_MOE_EP_KERNEL") or "0")),
+    env_bool("USE_MOE_EP_KERNEL", default=False),
     # Number of TPU slices for multi-slice mesh
     "NUM_SLICES":
     lambda: int(os.getenv("NUM_SLICES") or "1"),