Skip to content

Commit b9bf441

Browse files
Simon FREYBURGERclaude
andcommitted
test: drop obsolete source-scan tests, add checkpoint e2e via agent.run
The three TestTokenSnapshotExtendedFields cases asserted cache_read / cache_creation fields that were removed in 620bbb2 ("fix: remove dead cache_read/cache_creation fields per review"). They have been failing ever since. Delete test_checkpoint_extras.py -- its remaining cases were either trivial (test_store_imports_sys checks 'import sys' exists) or file-source text scans (TestCheckpointPrintsToStderr) which don't test user behavior. Add tests/test_checkpoint_e2e.py with two real e2e scenarios: - Drive agent.run with a mocked LLM that emits a Write tool_call; assert the checkpoint hook created a pre-edit backup of the original content. - Same path but the file exceeds _MAX_FILE_SIZE -- assert the skip message lands on stderr only, not stdout. This is the actual user-visible contract of PR #47 and covers the full wiring agent.run -> Write hook -> checkpoint.store.track_file_edit. The three behavior tests in test_checkpoint_store.py stay -- they cover the store function directly via capsys. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 75aeff5 commit b9bf441

2 files changed

Lines changed: 112 additions & 62 deletions

File tree

tests/test_checkpoint_e2e.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
"""End-to-end: drive a real agent.run() conversation where the LLM calls Write,
2+
and verify the checkpoint hook intercepts the call and files a backup to disk.
3+
4+
Only the LLM provider is mocked (via monkeypatching agent.stream). The Write
5+
tool, checkpoint hooks and checkpoint store all run for real against tmp_path.
6+
"""
7+
from __future__ import annotations
8+
9+
import pytest
10+
11+
import tools as _tools_init # noqa: F401 - force built-in tool registration
12+
from agent import AgentState, run
13+
from providers import AssistantTurn
14+
from checkpoint import hooks as checkpoint_hooks
15+
from checkpoint import store as checkpoint_store
16+
17+
18+
def _scripted_stream(turns):
19+
cursor = iter(turns)
20+
21+
def fake_stream(**_kwargs):
22+
spec = next(cursor)
23+
yield AssistantTurn(
24+
text=spec.get("text", ""),
25+
tool_calls=spec.get("tool_calls") or [],
26+
in_tokens=1, out_tokens=1,
27+
)
28+
29+
return fake_stream
30+
31+
32+
@pytest.fixture
33+
def sandboxed_checkpoints(tmp_path, monkeypatch):
34+
"""Run checkpoint store against tmp_path and install hooks on built-in tools."""
35+
monkeypatch.setattr(
36+
checkpoint_store, "_checkpoints_root", lambda: tmp_path / ".checkpoints"
37+
)
38+
checkpoint_store.reset_file_versions()
39+
checkpoint_hooks.set_session("e2e-session")
40+
checkpoint_hooks.reset_tracked()
41+
checkpoint_hooks.install_hooks()
42+
yield tmp_path
43+
checkpoint_hooks.reset_tracked()
44+
45+
46+
def test_llm_write_triggers_checkpoint_backup(monkeypatch, sandboxed_checkpoints):
47+
"""When the LLM calls Write, the checkpoint hook must back the pre-edit file up.
48+
49+
Pre-populate a small file, then let the LLM overwrite it via the Write
50+
tool. The hook should copy the old content into checkpoints/.../backups/
51+
before the Write executes, so the backup holds the original bytes.
52+
"""
53+
target = sandboxed_checkpoints / "hello.py"
54+
target.write_text("print('before')\n", encoding="utf-8")
55+
56+
turns = [
57+
{"tool_calls": [{
58+
"id": "w1",
59+
"name": "Write",
60+
"input": {"file_path": str(target), "content": "print('after')\n"},
61+
}]},
62+
{"text": "done"},
63+
]
64+
monkeypatch.setattr("agent.stream", _scripted_stream(turns))
65+
66+
state = AgentState()
67+
config = {"model": "test", "permission_mode": "accept-all",
68+
"_session_id": "e2e-session", "disabled_tools": ["Agent"]}
69+
list(run("overwrite the file", state, config, "system prompt"))
70+
71+
# After the turn: Write applied the new content
72+
assert target.read_text(encoding="utf-8") == "print('after')\n"
73+
74+
# And the checkpoint hook filed a backup with the pre-edit content
75+
backups_dir = sandboxed_checkpoints / ".checkpoints" / "e2e-session" / "backups"
76+
backups = list(backups_dir.iterdir())
77+
assert backups, "checkpoint hook did not create a backup file"
78+
assert any(b.read_text(encoding="utf-8") == "print('before')\n" for b in backups)
79+
80+
81+
def test_oversized_write_logs_to_stderr_not_stdout(
82+
monkeypatch, sandboxed_checkpoints, capfd
83+
):
84+
"""Over the _MAX_FILE_SIZE threshold the hook skips + logs — to stderr only.
85+
86+
This is the actual user-visible contract of PR #47: checkpoint skips must
87+
not pollute stdout (which carries the conversation transcript), they must
88+
land on stderr where operators look.
89+
"""
90+
monkeypatch.setattr(checkpoint_store, "_MAX_FILE_SIZE", 20)
91+
big = sandboxed_checkpoints / "big.py"
92+
big.write_text("x" * 100, encoding="utf-8")
93+
94+
turns = [
95+
{"tool_calls": [{
96+
"id": "w1",
97+
"name": "Write",
98+
"input": {"file_path": str(big), "content": "y" * 100},
99+
}]},
100+
{"text": "ok"},
101+
]
102+
monkeypatch.setattr("agent.stream", _scripted_stream(turns))
103+
104+
state = AgentState()
105+
list(run("rewrite", state, {"model": "test", "permission_mode": "accept-all",
106+
"_session_id": "e2e-session",
107+
"disabled_tools": ["Agent"]},
108+
"sys"))
109+
110+
out, errtxt = capfd.readouterr()
111+
assert "[checkpoint] skipping large file" in errtxt
112+
assert "[checkpoint] skipping large file" not in out

tests/test_checkpoint_extras.py

Lines changed: 0 additions & 62 deletions
This file was deleted.

0 commit comments

Comments
 (0)