Skip to content

Commit bf7f9df

Browse files
dennys246claude
andcommitted
feat(research): direct campaign injection — bypass LLM for turn delivery
Campaign turns are now injected directly through the SimulationBridge, bypassing the orchestrator LLM entirely for narrative delivery. This eliminates the JSON escaping problem (unescaped quotes in dialogue) that caused 14B models to fail on verbatim turn relay. Flow: 1. Campaign YAML is loaded with salience/novelty per turn 2. Turns are sent via bridge.send_and_wait() with progress output 3. AUT processes each turn, builds memories, responds 4. THEN the orchestrator LLM starts with an analysis-only goal (inspect_aut, record_experiment, finish_simulation) The LLM never touches the narrative text — it only analyzes the results after the campaign is complete. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9dd2526 commit bf7f9df

2 files changed

Lines changed: 68 additions & 25 deletions

File tree

src/maxim/simulation/orchestrator.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ def start_simulation_mode(
245245
sandbox_image: str = "python:3.12-slim",
246246
sandbox_network: str = "none",
247247
aut_model: str | None = None,
248+
pre_campaign_turns: list[dict[str, Any]] | None = None,
248249
) -> SimulationResult:
249250
"""Boot simulation mode: AUT + orchestrator + stdin reader.
250251
@@ -887,6 +888,54 @@ def _aut_worker() -> None:
887888
aut_thread = threading.Thread(target=_aut_worker, name="sim.aut", daemon=True)
888889
aut_thread.start()
889890

891+
# ── Pre-campaign: inject turns directly through bridge ───────────────
892+
# When campaign turns are provided, we bypass the orchestrator LLM for
893+
# turn delivery. The bridge sends each turn to the AUT as a raw percept,
894+
# waits for the response, and records the result. This avoids JSON
895+
# escaping issues with narrative dialogue and ensures verbatim delivery.
896+
campaign_results: list[dict[str, Any]] = []
897+
if pre_campaign_turns:
898+
import time as _pc_time
899+
900+
print(f"\n Delivering {len(pre_campaign_turns)} campaign turns directly to AUT...")
901+
# Give AUT a moment to start up
902+
_pc_time.sleep(1.0)
903+
for i, turn in enumerate(pre_campaign_turns, 1):
904+
text = turn.get("text", "")
905+
phase = turn.get("phase", "")
906+
sal = turn.get("salience", 0.8)
907+
nov = turn.get("novelty", 0.7)
908+
phase_label = f" [{phase}]" if phase else ""
909+
print(f" Turn {i}/{len(pre_campaign_turns)}{phase_label}: sending ({len(text)} chars)...")
910+
try:
911+
result = bridge.send_and_wait(text, salience=sal, novelty=nov)
912+
actions = [a.tool_name for a in result.get("actions", [])]
913+
blocked = len(result.get("blocked", []))
914+
response = result.get("response", "")
915+
resp_preview = (
916+
(response[:80] + "...") if response and len(response) > 80 else (response or "(no verbal response)")
917+
)
918+
print(
919+
f" AUT: {len(actions)} action(s) {actions}, {blocked} blocked, {result.get('duration_ms', 0):.0f}ms"
920+
)
921+
print(f" Response: {resp_preview}")
922+
campaign_results.append(
923+
{
924+
"turn": i,
925+
"phase": phase,
926+
"text_len": len(text),
927+
"actions": actions,
928+
"blocked": blocked,
929+
"response": response,
930+
"timed_out": result.get("timed_out", False),
931+
"duration_ms": result.get("duration_ms", 0),
932+
}
933+
)
934+
except Exception as e:
935+
logger.warning("Campaign turn %d failed: %s", i, e)
936+
campaign_results.append({"turn": i, "phase": phase, "error": str(e)})
937+
print(f" Campaign delivery complete: {len(campaign_results)} turns delivered\n")
938+
890939
# ── Inject initial goal (or resume context) into orchestrator ────────
891940
if resume_session:
892941
resume_data = _load_resume_context(resume_session)

src/maxim/simulation/research_orchestrator.py

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -124,52 +124,46 @@ def start_research_mode(
124124
"phase": p.get("metadata", {}).get("phase", ""),
125125
"role": p.get("metadata", {}).get("experiment_role", ""),
126126
"tag": p.get("metadata", {}).get("scenario_tag", ""),
127+
"salience": p.get("salience", 0.8),
128+
"novelty": p.get("novelty", 0.7),
127129
}
128130
)
129131
print(f" Loaded {len(campaign_turns)} campaign turns from {campaign}")
130132
except Exception as e:
131133
logger.warning("Failed to load campaign YAML: %s", e)
132134

133-
# Build the researcher goal with concrete campaign steps
135+
# Build the researcher goal — campaign turns are injected directly
136+
# through the bridge (bypassing the LLM), so the orchestrator only
137+
# needs to do post-campaign analysis.
134138
researcher_goal = goal
135139
if campaign_turns:
136-
turn_lines = []
137-
for i, turn in enumerate(campaign_turns, 1):
138-
phase_label = f" [{turn['phase']}]" if turn["phase"] else ""
139-
# Show full text (up to 500 chars) so the LLM has the complete narrative
140-
turn_lines.append(f"--- TURN {i}{phase_label} ---\n{turn['text'][:500]}")
141-
turns_block = "\n\n".join(turn_lines)
142140
researcher_goal = (
143141
f"{goal}\n\n"
144-
f"CAMPAIGN PROTOCOL\n"
145-
f"=================\n"
146-
f"You MUST deliver the following narrative turns to the AUT, in order.\n"
147-
f'For each turn, call: send_message(text="<the narrative text below>")\n\n'
148-
f"IMPORTANT RULES:\n"
149-
f"- Send the narrative text VERBATIM — do NOT paraphrase, summarize, or adapt it.\n"
150-
f"- Do NOT invent your own probes or adversarial variations.\n"
151-
f"- Do NOT skip turns or reorder them.\n"
152-
f"- Wait for the AUT response after each turn before sending the next.\n\n"
153-
f"{turns_block}\n\n"
154-
f"--- END OF CAMPAIGN TURNS ---\n\n"
155-
f"After ALL {len(campaign_turns)} turns are sent and responses collected:\n"
156-
f"1. Use inspect_aut(query='memory_recall', params={{'keyword': 'Verath'}}) to check memory survival\n"
157-
f"2. Use inspect_aut(query='system_stats') for graph topology\n"
158-
f"3. Record results with record_experiment\n"
159-
f"4. Call finish_simulation with your findings"
142+
f"CAMPAIGN COMPLETE — {len(campaign_turns)} narrative turns were delivered directly to the AUT.\n"
143+
f"The campaign tested memory recall under narrative interference.\n\n"
144+
f"YOUR TASK (analysis only — do NOT send any more narrative turns):\n"
145+
f"1. Use inspect_aut(query='memory_recall', params={{'keyword': 'Verath'}}) to check if the seed memory survived\n"
146+
f"2. Use inspect_aut(query='system_stats') for graph topology and memory count\n"
147+
f"3. Use observe_actions to review the AUT's behavior during the campaign\n"
148+
f"4. Record your findings with record_experiment (hypothesis, method, result, conclusion)\n"
149+
f"5. Call finish_simulation with your analysis"
160150
)
161151
elif campaign:
162152
researcher_goal = f"{goal}\n\nCampaign file: {campaign} (failed to load — run manually with send_message)"
163153

164-
# Run the researcher via the existing simulation orchestrator
154+
# Run the researcher via the existing simulation orchestrator.
155+
# Campaign turns are injected directly through the bridge before the
156+
# orchestrator LLM starts, ensuring verbatim delivery without JSON
157+
# escaping issues.
165158
sim_result = start_simulation_mode(
166159
goal=researcher_goal,
167160
persona="researcher",
168161
max_turns=max_turns,
169162
debug=debug,
170163
sandbox_backend=sandbox_backend,
171164
aut_model=aut_model,
172-
no_sim_env=True, # Research mode doesn't need pain-triggering files
165+
no_sim_env=True,
166+
pre_campaign_turns=campaign_turns if campaign_turns else None,
173167
)
174168

175169
# The experiment log was populated by the researcher's record_experiment calls

0 commit comments

Comments
 (0)