Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions tests/agent/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Agent tests

This directory contains the CPU-only unit tests for the new `verl.agent`
packages introduced for the agent framework / gateway path.

## Naming and CI routing

All executable test modules in this directory use the `*_on_cpu.py` suffix so
they are picked up by VERL's existing `cpu_unit_tests.yml` workflow instead of
the default GPU unit-test workflow.

## Coverage inventory

### Framework

- `framework/test_assembler_on_cpu.py`
- Verifies `TrajectoryAssembler` emits the expected training batch contract.
- Checks tensor padding, masks, logprobs, routed experts, and non-tensor
metadata packing in the final `TensorDict`.
- `framework/test_openai_compatible_framework_on_cpu.py`
- Verifies `OpenAICompatibleAgentFramework` runs against a fake in-memory
session runtime without Ray, HTTP serving, or LLM backends.
- Covers session creation, finalize/abort behavior, reward assignment,
optional wait-for-completion, non-tensor field broadcast, and missing
rollout logprob handling.

### Gateway

- `gateway/test_gateway_actor_on_cpu.py`
- CPU-only FastAPI/Ray actor contract tests for `GatewayActor`.
- Uses fake tokenizer + mocked backends instead of real `LLMServer` /
rollout-serving paths.
- Covers request normalization, prefix matching, tool-schema drift,
continuation masks, per-session concurrency serialization, completion
semantics, invalid request rejection, backend failure rollback, and tool
parser response formatting.
- `gateway/test_gateway_manager_on_cpu.py`
- Verifies sticky session routing and least-active gateway selection.
- Uses lightweight fake gateway actors instead of real serving stacks.
- `gateway/test_session_runtime_on_cpu.py`
- Verifies `GatewayServingRuntime` owns gateway lifecycle and session runtime
behavior independently from `agent_loop`.
- Covers both runtime-owned fake backend injection and mocked load-balancer /
rollout-server integration.

## Mocking boundaries

- No test in this directory depends on a real `LLMServer`, model weights, or a
production serving runtime.
- `tests/agent/support.py` provides the fakes and lightweight Ray actors used by
the gateway/runtime tests.
- The only retained dependency on the old experimental tree is
`verl.experimental.agent_loop.tool_parser`, which is intentionally reused by
`GatewayActor` until the community-wide extraction lands.
154 changes: 154 additions & 0 deletions tests/agent/framework/test_assembler_on_cpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import numpy as np
import torch
from tensordict import TensorDict

from verl.agent.framework.assembler import TrajectoryAssembler
from verl.agent.framework.types import Trajectory
from verl.utils import tensordict_utils as tu


def test_trajectory_assembler_matches_training_batch_contract():
trajectories = [
Trajectory(
uid="sample-0",
session_id="session-0",
trajectory_id=0,
prompt_ids=[10, 11],
response_ids=[20, 21, 22],
response_mask=[1, 1, 0],
response_logprobs=[-0.1, -0.2, 0.0],
reward_info={"score": 0.5, "label": "alpha"},
reward_score=0.5,
num_turns=2,
routed_experts=torch.tensor(
[
[[1], [2]],
[[3], [4]],
[[5], [6]],
[[7], [8]],
[[9], [10]],
],
dtype=torch.int64,
),
),
Trajectory(
uid="sample-1",
session_id="session-1",
trajectory_id=0,
prompt_ids=[30],
response_ids=[40, 41],
response_mask=[1, 1],
response_logprobs=[-0.3, -0.4],
reward_info={"score": 1.5, "label": "beta"},
reward_score=1.5,
num_turns=3,
routed_experts=torch.tensor(
[
[[11], [12]],
[[13], [14]],
[[15], [16]],
],
dtype=torch.int64,
),
),
]

output = TrajectoryAssembler(pad_token_id=0).assemble(trajectories)

assert isinstance(output, TensorDict)
assert tuple(output["prompts"].shape) == (2, 2)
assert tuple(output["responses"].shape) == (2, 3)
assert tuple(output["response_mask"].shape) == (2, 3)
assert tuple(output["input_ids"].shape) == (2, 5)
assert tuple(output["attention_mask"].shape) == (2, 5)
assert tuple(output["position_ids"].shape) == (2, 5)
assert tuple(output["rollout_log_probs"].shape) == (2, 3)
assert tuple(output["routed_experts"].shape) == (2, 5, 2, 1)
assert tuple(output["rm_scores"].shape) == (2, 3)

torch.testing.assert_close(
output["prompts"],
torch.tensor(
[
[10, 11],
[0, 30],
],
dtype=torch.long,
),
)
torch.testing.assert_close(
output["responses"],
torch.tensor(
[
[20, 21, 22],
[40, 41, 0],
],
dtype=torch.long,
),
)
torch.testing.assert_close(
output["response_mask"],
torch.tensor(
[
[1, 1, 0],
[1, 1, 0],
],
dtype=torch.long,
),
)
torch.testing.assert_close(
output["attention_mask"],
torch.tensor(
[
[1, 1, 1, 1, 1],
[0, 1, 1, 1, 0],
],
dtype=torch.long,
),
)
torch.testing.assert_close(
output["position_ids"],
torch.tensor(
[
[0, 1, 2, 3, 4],
[0, 0, 1, 2, 2],
],
dtype=torch.long,
),
)
torch.testing.assert_close(
output["rm_scores"],
torch.tensor(
[
[0.0, 0.0, 0.5],
[0.0, 1.5, 0.0],
],
dtype=torch.float32,
),
)
torch.testing.assert_close(
output["rollout_log_probs"],
torch.tensor(
[
[-0.1, -0.2, 0.0],
[-0.3, -0.4, 0.0],
],
dtype=torch.float32,
),
)

second_experts = output["routed_experts"][1]
expected_second_experts = torch.zeros((5, 2, 1), dtype=torch.int64)
expected_second_experts[1:4] = torch.tensor(
[
[[11], [12]],
[[13], [14]],
[[15], [16]],
],
dtype=torch.int64,
)
torch.testing.assert_close(second_experts, expected_second_experts)

assert np.array_equal(np.array(tu.get(output, "__num_turns__"), dtype=np.int32), np.array([2, 3], dtype=np.int32))
assert np.array_equal(np.array(tu.get(output, "label"), dtype=object), np.array(["alpha", "beta"], dtype=object))
assert tu.get(output, "reward_extra_keys") == ["score", "label"]
Loading