Skip to content

Commit c8079ac

Browse files
authored
signals: feature parity with the latest Signals paper. Porting logic from python repo (#903)
* signals: port to layered taxonomy with dual-emit OTel Made-with: Cursor * fix: silence collapsible_match clippy lint (rustc 1.95) Made-with: Cursor * test: parity harness for rust vs python signals analyzer Validates the brightstaff signals port against the katanemo/signals Python reference on lmsys/lmsys-chat-1m. Adds a signals_replay bin emitting python- compatible JSON, a pyarrow-based driver (bypasses the datasets loader pickle bug on python 3.14), a 3-tier comparator, and an on-demand workflow_dispatch CI job. Made-with: Cursor * Remove signals test from the gitops flow * style: format parity harness with black Made-with: Cursor * signals: group summary by taxonomy, factor misalignment_ratio Addresses #903 review feedback from @nehcgs: - generate_summary() now renders explicit Interaction / Execution / Environment headers so the paper taxonomy is visible at a glance, even when no signals fired in a given layer. Quality-driving callouts (high misalignment rate, looping detected, escalation requested) are appended after the layer summary as an alerts tail. - repair_ratio (legacy taxonomy name) renamed to misalignment_ratio and factored into a single InteractionSignals::misalignment_ratio() helper so assess_quality and generate_summary share one source of truth instead of recomputing the same divide twice. Two new unit tests pin the layer headers and the (sev N) severity suffix. Parity with the python reference is preserved at the Tier-A level (per-type counts + overall_quality); only the human-readable summary string diverges, which the parity comparator already classifies as Tier-C. Made-with: Cursor
1 parent 6701195 commit c8079ac

31 files changed

Lines changed: 5257 additions & 3272 deletions

crates/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/brightstaff/Cargo.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@ name = "brightstaff"
33
version = "0.1.0"
44
edition = "2021"
55

6+
[[bin]]
7+
name = "brightstaff"
8+
path = "src/main.rs"
9+
10+
[[bin]]
11+
name = "signals_replay"
12+
path = "src/bin/signals_replay.rs"
13+
614
[dependencies]
715
async-openai = "0.30.1"
816
async-trait = "0.1"
@@ -26,6 +34,7 @@ opentelemetry-stdout = "0.31"
2634
opentelemetry_sdk = { version = "0.31", features = ["rt-tokio"] }
2735
pretty_assertions = "1.4.1"
2836
rand = "0.9.2"
37+
regex = "1.10"
2938
lru = "0.12"
3039
metrics = "0.23"
3140
metrics-exporter-prometheus = { version = "0.15", default-features = false, features = ["http-listener"] }
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
//! `signals-replay` — batch driver for the `brightstaff` signal analyzer.
2+
//!
3+
//! Reads JSONL conversations from stdin (one per line) and emits matching
4+
//! JSONL reports on stdout, one per input conversation, in the same order.
5+
//!
6+
//! Input shape (per line):
7+
//! ```json
8+
//! {"id": "convo-42", "messages": [{"from": "human", "value": "..."}, ...]}
9+
//! ```
10+
//!
11+
//! Output shape (per line, success):
12+
//! ```json
13+
//! {"id": "convo-42", "report": { ...python-compatible SignalReport dict... }}
14+
//! ```
15+
//!
16+
//! On per-line failure (parse / analyzer error), emits:
17+
//! ```json
18+
//! {"id": "convo-42", "error": "..."}
19+
//! ```
20+
//!
21+
//! The output report dict is shaped to match the Python reference's
22+
//! `SignalReport.to_dict()` byte-for-byte so the parity comparator can do a
23+
//! direct structural diff.
24+
25+
use std::io::{self, BufRead, BufWriter, Write};
26+
27+
use serde::Deserialize;
28+
use serde_json::{json, Map, Value};
29+
30+
use brightstaff::signals::{SignalAnalyzer, SignalGroup, SignalReport};
31+
32+
#[derive(Debug, Deserialize)]
33+
struct InputLine {
34+
id: Value,
35+
messages: Vec<MessageRow>,
36+
}
37+
38+
#[derive(Debug, Deserialize)]
39+
struct MessageRow {
40+
#[serde(default)]
41+
from: String,
42+
#[serde(default)]
43+
value: String,
44+
}
45+
46+
fn main() {
47+
let stdin = io::stdin();
48+
let stdout = io::stdout();
49+
let mut out = BufWriter::new(stdout.lock());
50+
let analyzer = SignalAnalyzer::default();
51+
52+
for line in stdin.lock().lines() {
53+
let line = match line {
54+
Ok(l) => l,
55+
Err(e) => {
56+
eprintln!("read error: {e}");
57+
std::process::exit(1);
58+
}
59+
};
60+
let trimmed = line.trim();
61+
if trimmed.is_empty() {
62+
continue;
63+
}
64+
let result = process_line(&analyzer, trimmed);
65+
// Always emit one line per input line so id ordering stays aligned.
66+
if let Err(e) = writeln!(out, "{result}") {
67+
eprintln!("write error: {e}");
68+
std::process::exit(1);
69+
}
70+
// Flush periodically isn't strictly needed — BufWriter handles it,
71+
// and the parent process reads the whole stream when we're done.
72+
}
73+
let _ = out.flush();
74+
}
75+
76+
fn process_line(analyzer: &SignalAnalyzer, line: &str) -> Value {
77+
let parsed: InputLine = match serde_json::from_str(line) {
78+
Ok(p) => p,
79+
Err(e) => {
80+
return json!({
81+
"id": Value::Null,
82+
"error": format!("input parse: {e}"),
83+
});
84+
}
85+
};
86+
87+
let id = parsed.id.clone();
88+
89+
let view: Vec<brightstaff::signals::analyzer::ShareGptMessage<'_>> = parsed
90+
.messages
91+
.iter()
92+
.map(|m| brightstaff::signals::analyzer::ShareGptMessage {
93+
from: m.from.as_str(),
94+
value: m.value.as_str(),
95+
})
96+
.collect();
97+
98+
let report = analyzer.analyze_sharegpt(&view);
99+
let report_dict = report_to_python_dict(&report);
100+
json!({
101+
"id": id,
102+
"report": report_dict,
103+
})
104+
}
105+
106+
/// Convert a `SignalReport` into the Python reference's `to_dict()` shape.
107+
///
108+
/// Ordering of category keys in each layer dict follows the Python source
109+
/// exactly so even string-equality comparisons behave deterministically.
110+
fn report_to_python_dict(r: &SignalReport) -> Value {
111+
let mut interaction = Map::new();
112+
interaction.insert(
113+
"misalignment".to_string(),
114+
signal_group_to_python(&r.interaction.misalignment),
115+
);
116+
interaction.insert(
117+
"stagnation".to_string(),
118+
signal_group_to_python(&r.interaction.stagnation),
119+
);
120+
interaction.insert(
121+
"disengagement".to_string(),
122+
signal_group_to_python(&r.interaction.disengagement),
123+
);
124+
interaction.insert(
125+
"satisfaction".to_string(),
126+
signal_group_to_python(&r.interaction.satisfaction),
127+
);
128+
129+
let mut execution = Map::new();
130+
execution.insert(
131+
"failure".to_string(),
132+
signal_group_to_python(&r.execution.failure),
133+
);
134+
execution.insert(
135+
"loops".to_string(),
136+
signal_group_to_python(&r.execution.loops),
137+
);
138+
139+
let mut environment = Map::new();
140+
environment.insert(
141+
"exhaustion".to_string(),
142+
signal_group_to_python(&r.environment.exhaustion),
143+
);
144+
145+
json!({
146+
"interaction_signals": Value::Object(interaction),
147+
"execution_signals": Value::Object(execution),
148+
"environment_signals": Value::Object(environment),
149+
"overall_quality": r.overall_quality.as_str(),
150+
"summary": r.summary,
151+
})
152+
}
153+
154+
fn signal_group_to_python(g: &SignalGroup) -> Value {
155+
let signals: Vec<Value> = g
156+
.signals
157+
.iter()
158+
.map(|s| {
159+
json!({
160+
"signal_type": s.signal_type.as_str(),
161+
"message_index": s.message_index,
162+
"snippet": s.snippet,
163+
"confidence": s.confidence,
164+
"metadata": s.metadata,
165+
})
166+
})
167+
.collect();
168+
169+
json!({
170+
"category": g.category,
171+
"count": g.count,
172+
"severity": g.severity,
173+
"signals": signals,
174+
})
175+
}

crates/brightstaff/src/handlers/function_calling.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -441,10 +441,8 @@ impl ArchFunctionHandler {
441441
}
442442
}
443443
// Handle str/string conversions
444-
"str" | "string" => {
445-
if !value.is_string() {
446-
return Ok(json!(value.to_string()));
447-
}
444+
"str" | "string" if !value.is_string() => {
445+
return Ok(json!(value.to_string()));
448446
}
449447
_ => {}
450448
}

0 commit comments

Comments
 (0)