verl-project · zackcxb · Apr 8, 2026 · Apr 13, 2026 · Apr 14, 2026 · Apr 15, 2026
diff --git a/tests/agent/README.md b/tests/agent/README.md
@@ -0,0 +1,54 @@
+# Agent tests
+
+This directory contains the CPU-only unit tests for the new `verl.agent`
+packages introduced for the agent framework / gateway path.
+
+## Naming and CI routing
+
+All executable test modules in this directory use the `*_on_cpu.py` suffix so
+they are picked up by VERL's existing `cpu_unit_tests.yml` workflow instead of
+the default GPU unit-test workflow.
+
+## Coverage inventory
+
+### Framework
+
+- `framework/test_assembler_on_cpu.py`
+  - Verifies `TrajectoryAssembler` emits the expected training batch contract.
+  - Checks tensor padding, masks, logprobs, routed experts, and non-tensor
+    metadata packing in the final `TensorDict`.
+- `framework/test_openai_compatible_framework_on_cpu.py`
+  - Verifies `OpenAICompatibleAgentFramework` runs against a fake in-memory
+    session runtime without Ray, HTTP serving, or LLM backends.
+  - Covers session creation, finalize/abort behavior, reward assignment,
+    optional wait-for-completion, non-tensor field broadcast, and missing
+    rollout logprob handling.
+
+### Gateway
+
+- `gateway/test_gateway_actor_on_cpu.py`
+  - CPU-only FastAPI/Ray actor contract tests for `GatewayActor`.
+  - Uses fake tokenizer + mocked backends instead of real `LLMServer` /
+    rollout-serving paths.
+  - Covers request normalization, prefix matching, tool-schema drift,
+    continuation masks, per-session concurrency serialization, completion
+    semantics, invalid request rejection, backend failure rollback, and tool
+    parser response formatting.
+- `gateway/test_gateway_manager_on_cpu.py`
+  - Verifies sticky session routing and least-active gateway selection.
+  - Uses lightweight fake gateway actors instead of real serving stacks.
+- `gateway/test_session_runtime_on_cpu.py`
+  - Verifies `GatewayServingRuntime` owns gateway lifecycle and session runtime
+    behavior independently from `agent_loop`.
+  - Covers both runtime-owned fake backend injection and mocked load-balancer /
+    rollout-server integration.
+
+## Mocking boundaries
+
+- No test in this directory depends on a real `LLMServer`, model weights, or a
+  production serving runtime.
+- `tests/agent/support.py` provides the fakes and lightweight Ray actors used by
+  the gateway/runtime tests.
+- The only retained dependency on the old experimental tree is
+  `verl.experimental.agent_loop.tool_parser`, which is intentionally reused by
+  `GatewayActor` until the community-wide extraction lands.
diff --git a/tests/agent/framework/test_assembler_on_cpu.py b/tests/agent/framework/test_assembler_on_cpu.py
@@ -0,0 +1,154 @@
+import numpy as np
+import torch
+from tensordict import TensorDict
+
+from verl.agent.framework.assembler import TrajectoryAssembler
+from verl.agent.framework.types import Trajectory
+from verl.utils import tensordict_utils as tu
+
+
+def test_trajectory_assembler_matches_training_batch_contract():
+    trajectories = [
+        Trajectory(
+            uid="sample-0",
+            session_id="session-0",
+            trajectory_id=0,
+            prompt_ids=[10, 11],
+            response_ids=[20, 21, 22],
+            response_mask=[1, 1, 0],
+            response_logprobs=[-0.1, -0.2, 0.0],
+            reward_info={"score": 0.5, "label": "alpha"},
+            reward_score=0.5,
+            num_turns=2,
+            routed_experts=torch.tensor(
+                [
+                    [[1], [2]],
+                    [[3], [4]],
+                    [[5], [6]],
+                    [[7], [8]],
+                    [[9], [10]],
+                ],
+                dtype=torch.int64,
+            ),
+        ),
+        Trajectory(
+            uid="sample-1",
+            session_id="session-1",
+            trajectory_id=0,
+            prompt_ids=[30],
+            response_ids=[40, 41],
+            response_mask=[1, 1],
+            response_logprobs=[-0.3, -0.4],
+            reward_info={"score": 1.5, "label": "beta"},
+            reward_score=1.5,
+            num_turns=3,
+            routed_experts=torch.tensor(
+                [
+                    [[11], [12]],
+                    [[13], [14]],
+                    [[15], [16]],
+                ],
+                dtype=torch.int64,
+            ),
+        ),
+    ]
+
+    output = TrajectoryAssembler(pad_token_id=0).assemble(trajectories)
+
+    assert isinstance(output, TensorDict)
+    assert tuple(output["prompts"].shape) == (2, 2)
+    assert tuple(output["responses"].shape) == (2, 3)
+    assert tuple(output["response_mask"].shape) == (2, 3)
+    assert tuple(output["input_ids"].shape) == (2, 5)
+    assert tuple(output["attention_mask"].shape) == (2, 5)
+    assert tuple(output["position_ids"].shape) == (2, 5)
+    assert tuple(output["rollout_log_probs"].shape) == (2, 3)
+    assert tuple(output["routed_experts"].shape) == (2, 5, 2, 1)
+    assert tuple(output["rm_scores"].shape) == (2, 3)
+
+    torch.testing.assert_close(
+        output["prompts"],
+        torch.tensor(
+            [
+                [10, 11],
+                [0, 30],
+            ],
+            dtype=torch.long,
+        ),
+    )
+    torch.testing.assert_close(
+        output["responses"],
+        torch.tensor(
+            [
+                [20, 21, 22],
+                [40, 41, 0],
+            ],
+            dtype=torch.long,
+        ),
+    )
+    torch.testing.assert_close(
+        output["response_mask"],
+        torch.tensor(
+            [
+                [1, 1, 0],
+                [1, 1, 0],
+            ],
+            dtype=torch.long,
+        ),
+    )
+    torch.testing.assert_close(
+        output["attention_mask"],
+        torch.tensor(
+            [
+                [1, 1, 1, 1, 1],
+                [0, 1, 1, 1, 0],
+            ],
+            dtype=torch.long,
+        ),
+    )
+    torch.testing.assert_close(
+        output["position_ids"],
+        torch.tensor(
+            [
+                [0, 1, 2, 3, 4],
+                [0, 0, 1, 2, 2],
+            ],
+            dtype=torch.long,
+        ),
+    )
+    torch.testing.assert_close(
+        output["rm_scores"],
+        torch.tensor(
+            [
+                [0.0, 0.0, 0.5],
+                [0.0, 1.5, 0.0],
+            ],
+            dtype=torch.float32,
+        ),
+    )
+    torch.testing.assert_close(
+        output["rollout_log_probs"],
+        torch.tensor(
+            [
+                [-0.1, -0.2, 0.0],
+                [-0.3, -0.4, 0.0],
+            ],
+            dtype=torch.float32,
+        ),
+    )
+
+    second_experts = output["routed_experts"][1]
+    expected_second_experts = torch.zeros((5, 2, 1), dtype=torch.int64)
+    expected_second_experts[1:4] = torch.tensor(
+        [
+            [[11], [12]],
+            [[13], [14]],
+            [[15], [16]],
+        ],
+        dtype=torch.int64,
+    )
+    torch.testing.assert_close(second_experts, expected_second_experts)
+
+    assert np.array_equal(np.array(tu.get(output, "__num_turns__"), dtype=np.int32), np.array([2, 3], dtype=np.int32))
+    assert np.array_equal(np.array(tu.get(output, "label"), dtype=object), np.array(["alpha", "beta"], dtype=object))
+    assert tu.get(output, "reward_extra_keys") == ["score", "label"]