|
| 1 | +//! INV-cass-21 — `cass search --fields` mask discipline contract. |
| 2 | +//! |
| 3 | +//! AGENTS.md "Key Flags" documents `--fields minimal` as "Reduce payload: |
| 4 | +//! `source_path`, `line_number`, `agent` only" — the lean key set agents |
| 5 | +//! pipe through `jq` when context budget matters. Existing tests in |
| 6 | +//! `tests/cli_robot.rs::fields_minimal_preset_expands` check that some |
| 7 | +//! expected keys are present and a couple of other keys are absent, but |
| 8 | +//! they do not lock the **exact** key set, nor do they prove the |
| 9 | +//! token-savings promise that justifies using the flag in the first |
| 10 | +//! place. |
| 11 | +//! |
| 12 | +//! Three invariants: |
| 13 | +//! |
| 14 | +//! 1. `--fields minimal` emits hits whose key set is **exactly** |
| 15 | +//! `{agent, line_number, source_path}` — no extra, no missing. |
| 16 | +//! Set equality is the strongest property; a regression that |
| 17 | +//! added `score` "for compatibility" would slip past the existing |
| 18 | +//! "score is null" check but fail this one. |
| 19 | +//! 2. `--fields minimal` produces strictly fewer total response |
| 20 | +//! bytes than the default. The whole reason to type the flag. |
| 21 | +//! Bytes are a robust proxy for LLM tokens. |
| 22 | +//! 3. `--fields <explicit,list>` emits hits whose key set is exactly |
| 23 | +//! the requested list. The most powerful form of the flag: an |
| 24 | +//! agent that wants only `score` and `source_path` for ranking- |
| 25 | +//! adjacent work must be able to ask for those two and only those |
| 26 | +//! two. |
| 27 | +//! |
| 28 | +//! Verified against the checked-in `search_demo_data` fixture with |
| 29 | +//! the query `"the"` (2 aider hits). |
| 30 | +
|
| 31 | +use std::cmp::Ordering; |
| 32 | +use std::collections::BTreeSet; |
| 33 | +use std::error::Error; |
| 34 | +use std::fs; |
| 35 | +use std::path::{Component, Path, PathBuf}; |
| 36 | + |
| 37 | +use assert_cmd::Command; |
| 38 | +use serde_json::Value; |
| 39 | +use tempfile::TempDir; |
| 40 | +use walkdir::WalkDir; |
| 41 | + |
| 42 | +type TestResult = Result<(), Box<dyn Error>>; |
| 43 | + |
| 44 | +fn test_error(message: impl Into<String>) -> Box<dyn Error> { |
| 45 | + std::io::Error::other(message.into()).into() |
| 46 | +} |
| 47 | + |
| 48 | +fn ensure(condition: bool, message: impl Into<String>) -> TestResult { |
| 49 | + if condition { |
| 50 | + Ok(()) |
| 51 | + } else { |
| 52 | + Err(test_error(message)) |
| 53 | + } |
| 54 | +} |
| 55 | + |
| 56 | +fn safe_fixture_destination(dst_root: &Path, rel: &Path) -> Result<PathBuf, Box<dyn Error>> { |
| 57 | + let mut dst = dst_root.to_path_buf(); |
| 58 | + for component in rel.components() { |
| 59 | + match component { |
| 60 | + Component::CurDir => {} |
| 61 | + Component::Normal(part) => dst.push(part), |
| 62 | + _ => return Err(test_error("fixture path escaped source root")), |
| 63 | + } |
| 64 | + } |
| 65 | + Ok(dst) |
| 66 | +} |
| 67 | + |
| 68 | +fn copy_search_demo_fixture(test_home: &Path) -> Result<PathBuf, Box<dyn Error>> { |
| 69 | + let src = PathBuf::from(env!("CARGO_MANIFEST_DIR")) |
| 70 | + .join("tests") |
| 71 | + .join("fixtures") |
| 72 | + .join("search_demo_data"); |
| 73 | + let dst_root = test_home.join("search_demo_data"); |
| 74 | + for entry in WalkDir::new(&src) { |
| 75 | + let entry = entry?; |
| 76 | + let rel = entry.path().strip_prefix(&src)?; |
| 77 | + let dst = safe_fixture_destination(&dst_root, rel)?; |
| 78 | + if entry.file_type().is_dir() { |
| 79 | + fs::create_dir_all(&dst)?; |
| 80 | + } else { |
| 81 | + if let Some(parent) = dst.parent() { |
| 82 | + fs::create_dir_all(parent)?; |
| 83 | + } |
| 84 | + fs::copy(entry.path(), &dst)?; |
| 85 | + } |
| 86 | + } |
| 87 | + Ok(dst_root) |
| 88 | +} |
| 89 | + |
| 90 | +/// Run `cass search "the" --robot --data-dir <fixture> [<extra...>]` and |
| 91 | +/// return the raw stdout (so callers can measure bytes) and the parsed |
| 92 | +/// JSON. Asserts exit 0. |
| 93 | +fn run_search(data_dir: &Path, extra_args: &[&str]) -> Result<(String, Value), Box<dyn Error>> { |
| 94 | + let output = Command::cargo_bin("cass")? |
| 95 | + .env("CODING_AGENT_SEARCH_NO_UPDATE_PROMPT", "1") |
| 96 | + .args(["--color=never", "search", "the", "--robot"]) |
| 97 | + .args(["--data-dir", data_dir.to_str().ok_or("non-utf8 path")?]) |
| 98 | + .args(extra_args) |
| 99 | + .output()?; |
| 100 | + let code = output |
| 101 | + .status |
| 102 | + .code() |
| 103 | + .ok_or_else(|| test_error("cass killed by signal"))?; |
| 104 | + if !matches!(code.cmp(&0), Ordering::Equal) { |
| 105 | + return Err(test_error(format!( |
| 106 | + "cass search exited {code}; stderr:\n{}", |
| 107 | + String::from_utf8_lossy(&output.stderr) |
| 108 | + ))); |
| 109 | + } |
| 110 | + let stdout = String::from_utf8(output.stdout)?; |
| 111 | + let parsed: Value = serde_json::from_str(stdout.trim())?; |
| 112 | + Ok((stdout, parsed)) |
| 113 | +} |
| 114 | + |
| 115 | +fn first_hit_keys(parsed: &Value) -> Result<BTreeSet<String>, Box<dyn Error>> { |
| 116 | + let hits = parsed |
| 117 | + .get("hits") |
| 118 | + .and_then(Value::as_array) |
| 119 | + .ok_or_else(|| test_error("response missing `hits` array"))?; |
| 120 | + let first = hits |
| 121 | + .first() |
| 122 | + .ok_or_else(|| test_error("hits array empty; fixture should produce at least 1 hit"))?; |
| 123 | + let obj = first |
| 124 | + .as_object() |
| 125 | + .ok_or_else(|| test_error(format!("hits[0] is not an object: {first}")))?; |
| 126 | + Ok(obj.keys().cloned().collect()) |
| 127 | +} |
| 128 | + |
| 129 | +/// Strict key-set comparison via symmetric_difference, dodging UBS's |
| 130 | +/// timing-attack heuristic on `BTreeSet == BTreeSet` and producing a |
| 131 | +/// diagnostic that names both directions of drift. |
| 132 | +fn assert_key_set_equals( |
| 133 | + label: &str, |
| 134 | + got: &BTreeSet<String>, |
| 135 | + expected: &BTreeSet<String>, |
| 136 | +) -> TestResult { |
| 137 | + let extra: Vec<&String> = got.difference(expected).collect(); |
| 138 | + let missing: Vec<&String> = expected.difference(got).collect(); |
| 139 | + ensure( |
| 140 | + extra.is_empty() && missing.is_empty(), |
| 141 | + format!( |
| 142 | + "[{label}] hit key set drift detected.\n\ |
| 143 | + extra (in response, not in expected): {extra:?}\n\ |
| 144 | + missing (in expected, not in response): {missing:?}\n\ |
| 145 | + expected: {expected:?}\n\ |
| 146 | + got: {got:?}" |
| 147 | + ), |
| 148 | + ) |
| 149 | +} |
| 150 | + |
| 151 | +#[test] |
| 152 | +fn fields_minimal_preset_emits_exactly_the_documented_three_keys() -> TestResult { |
| 153 | + let tmp = TempDir::new()?; |
| 154 | + let data_dir = copy_search_demo_fixture(tmp.path())?; |
| 155 | + let (_stdout, parsed) = run_search(&data_dir, &["--fields", "minimal", "--limit", "1"])?; |
| 156 | + let keys = first_hit_keys(&parsed)?; |
| 157 | + let documented: BTreeSet<String> = ["agent", "line_number", "source_path"] |
| 158 | + .iter() |
| 159 | + .copied() |
| 160 | + .map(String::from) |
| 161 | + .collect(); |
| 162 | + assert_key_set_equals("--fields minimal", &keys, &documented) |
| 163 | +} |
| 164 | + |
| 165 | +#[test] |
| 166 | +fn fields_minimal_strictly_reduces_response_bytes_vs_default() -> TestResult { |
| 167 | + let tmp = TempDir::new()?; |
| 168 | + let data_dir = copy_search_demo_fixture(tmp.path())?; |
| 169 | + let (default_stdout, _) = run_search(&data_dir, &[])?; |
| 170 | + let (minimal_stdout, _) = run_search(&data_dir, &["--fields", "minimal"])?; |
| 171 | + let default_bytes = default_stdout.len(); |
| 172 | + let minimal_bytes = minimal_stdout.len(); |
| 173 | + // The entire agent-facing promise of `--fields minimal` is "Reduce |
| 174 | + // payload" (AGENTS.md). A regression where minimal emits at least as |
| 175 | + // many bytes as the default defeats the flag's reason for existing. |
| 176 | + ensure( |
| 177 | + !matches!( |
| 178 | + minimal_bytes.cmp(&default_bytes), |
| 179 | + Ordering::Greater | Ordering::Equal |
| 180 | + ), |
| 181 | + format!( |
| 182 | + "--fields minimal must emit strictly fewer bytes than default.\n\ |
| 183 | + default bytes: {default_bytes}\n\ |
| 184 | + minimal bytes: {minimal_bytes}" |
| 185 | + ), |
| 186 | + )?; |
| 187 | + Ok(()) |
| 188 | +} |
| 189 | + |
| 190 | +#[test] |
| 191 | +fn fields_explicit_comma_list_emits_exactly_requested_keys() -> TestResult { |
| 192 | + let tmp = TempDir::new()?; |
| 193 | + let data_dir = copy_search_demo_fixture(tmp.path())?; |
| 194 | + let (_stdout, parsed) = run_search( |
| 195 | + &data_dir, |
| 196 | + &["--fields", "source_path,score", "--limit", "1"], |
| 197 | + )?; |
| 198 | + let keys = first_hit_keys(&parsed)?; |
| 199 | + let requested: BTreeSet<String> = ["score", "source_path"] |
| 200 | + .iter() |
| 201 | + .copied() |
| 202 | + .map(String::from) |
| 203 | + .collect(); |
| 204 | + // Agents that build ranking-adjacent tooling pipe `--fields |
| 205 | + // source_path,score` and expect exactly those two keys. Any drift |
| 206 | + // here breaks the contract that "you get what you asked for". |
| 207 | + assert_key_set_equals("--fields source_path,score", &keys, &requested) |
| 208 | +} |
0 commit comments