Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,10 @@ MAXIM_NAC_MIN_CONFIDENCE=0.0 # Override propose_via_substrate's min_confiden
# EC activation instrumentation (release_0_9_1.md Stage 0d, cross_modal_substrate_binding.md Stage 1)
MAXIM_EC_TRACE_ACTIVATIONS=1 # Gate per-tick `sim_ec_activation` JSONL events from EntorhinalCortex.pattern_complete_or_separate. Fields: agent_id, tick (int second bucket), active_node_id, activation_strength, modality_tag (linguistic/drive/sensor), modality, is_new. Off by default — Roy-4 sets it in the runner environment for the cross-modal binding pre-implementation validation experiment (scripts/analyze_roy_4_coactivation.py is the post-hoc analyzer). Falsy values ("0", "false", "no", "off", empty) disable. The instrumentation fires even on cold-start when active_node_id is freshly allocated, so pattern-separation events are visible in the co-activation matrix.

# Action JSONL + recommend_action telemetry (release_0_9_1.md Stages 0b + 0c) — no env var; structural
# Stage 0b: actions.jsonl gains a header line with `_format_version: "1.1"` (minor bump from pre-0b unversioned "0.x" per CC1) + per-record `agent_id` / `session_id` / `entity_class` fields populated from utils/http.py::current_context() (bound at the sim orchestrator entry on both AUT + orchestrator threads via `context_scope()`). InstrumentedExecutor derives entity_class strictly opt-in (params["entity_class"] → params["target"]/["entity"]/["object"]); the verb-prefix-strip heuristic was dropped in pre-merge review fold (too noisy on non-entity tools like `get_status` → "status"). Tool authors opt into Roy-3 attribution by passing entity_class through params; 1.1 ships declared `Tool.entity_class` field per the docstring TODO. ActionRecord is shape-frozen at 1.0 (CC3) with three optional fields appended at the end — back-compat with existing ActionSink consumers. **Reader contract:** `_record_kind == "header"` MUST be skipped before interpreting per-action fields.
# Stage 0c: NAc.recommend_action emits one `sim_log("NAc_RECOMMEND", ...)` event per call (including all FOUR early-return paths: empty available_tools, empty scores, sub-threshold, success — Roy-3 needs to distinguish "gate fired, consumer did nothing" from "consumer didn't run at all"). Fields: tick (int(time.time() - sim_logger._sim_start) — ALIGNED with Stage 0d's `sim_ec_activation` tick space so Roy-3 cross-channel joins work), current_cluster_id, cluster_reward_bias_consulted (0.0 sentinel when cluster_id known but no tool scored; None when cluster_id truly absent — distinction is load-bearing for Roy-3 H1 disambiguation), best_tool, best_score, min_confidence, passed_gate. Routes through the standard sim_log JSONL writer + the MAXIM_LOG_FILE bridge. Fail-soft on ImportError only (non-sim runtime); other exceptions propagate so a real sim_logger bug surfaces.

# Leader proxy admission control
MAXIM_PROXY_MAX_CONCURRENT=4 # Max in-flight requests to upstream (0=unlimited)
MAXIM_PROXY_RATE_LIMIT_RPM=0 # Per-peer requests/minute (0=unlimited)
Expand Down
131 changes: 131 additions & 0 deletions src/maxim/decisions/nac.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,71 @@
logger = logging.getLogger(__name__)


def _emit_recommend_action_event(
*,
agent_id: str,
current_cluster_id: str | None,
cluster_reward_bias_consulted: float | None,
best_tool: str | None,
best_score: float,
min_confidence: float,
passed_gate: bool,
) -> None:
"""Emit a ``sim_recommend_action`` event for Stage 0c telemetry.

Per release_0_9_1.md Stage 0c, every ``recommend_action`` call MUST
emit exactly one event — even the early-return paths (empty
available_tools, empty scores, sub-threshold) — so Roy-3 measurement
can distinguish "gate fired but consumer did nothing" from
"consumer ran and proposed nothing."

The event lands on the ``sim_log("NAc_RECOMMEND", ...)`` channel,
which routes through the standard sim_log JSONL writer + the
MAXIM_LOG_FILE bridge.

**Tick alignment with Stage 0d (CRITICAL):** the ``tick`` field
matches Stage 0d's ``sim_ec_activation`` tick space —
``int(time.time() - sim_logger._sim_start)``, NOT raw epoch seconds.
Without this alignment Roy-3 cannot left-join the two channels
on tick (a 1e9 offset returns zero matches every time). For
sub-second ordering use the sim_log JSONL's top-level ``t`` field,
which sim_log auto-attaches with millisecond resolution from the
same ``_sim_start`` reference.

The emission is fail-soft: ``ImportError`` (non-sim runtime where
sim_logger isn't importable at all) is swallowed silently. Any
other exception propagates — a real sim_logger bug should surface
rather than masquerade as silent annotation-off.
"""
try:
from maxim.simulation import sim_logger as _sl

tick = int(time.time() - _sl._sim_start) if _sl._sim_start > 0.0 else 0
_sl.sim_log(
"NAc_RECOMMEND",
f"recommend_action: passed_gate={passed_gate}",
{
"tick": tick,
"current_cluster_id": current_cluster_id,
"cluster_reward_bias_consulted": cluster_reward_bias_consulted,
"best_tool": best_tool,
"best_score": round(best_score, 4),
"min_confidence": min_confidence,
"passed_gate": passed_gate,
},
agent_id=agent_id,
)
except ImportError:
# Non-sim runtime: sim_logger isn't importable at all (e.g.,
# headless API without the simulation extras). Stage 0c is
# observability only, not load-bearing for correctness —
# swallow silently. Any OTHER exception (a real sim_logger
# bug, an attribute error from a broken refactor) propagates
# so we don't silently disable telemetry the Roy-3 measurement
# arm depends on.
pass


@dataclass(frozen=True)
class NACConfig:
"""Configuration for Nucleus Accumbens."""
Expand Down Expand Up @@ -1218,6 +1283,19 @@ def recommend_action(
if not agent_id:
raise ValueError("recommend_action requires non-empty agent_id")
if not available_tools:
# Stage 0c: empty available_tools is a legitimate early return
# (e.g., the scene_actor filter trimmed the executor's tool set
# to nothing). Still emit so Roy-3 can distinguish "no tools
# available" from "no tools scored above gate."
_emit_recommend_action_event(
agent_id=agent_id,
current_cluster_id=current_cluster_id,
cluster_reward_bias_consulted=None,
best_tool=None,
best_score=0.0,
min_confidence=min_confidence,
passed_gate=False,
)
return None

drives = current_drives or {}
Expand Down Expand Up @@ -1292,14 +1370,67 @@ def recommend_action(
scores[tool_name] = score
reasoning_parts[tool_name] = parts

# Stage 0c (release_0_9_1.md): emit `sim_recommend_action` for
# post-hoc Roy-3 measurement. Every recommend_action call emits
# exactly one event — even on the early-return paths (no scores,
# sub-threshold) — so Roy iterations can distinguish "gate fired
# but consumer didn't run" from "consumer ran and proposed
# nothing." Per the plan: "the event MUST emit even when
# recommend_action returns None."
if not scores:
# Bio-fidelity review fold: distinguish "cluster known, no
# tool scored" (0.0 sentinel — agent had context but nothing
# rewarded) from "cluster unknown" (None — no
# current_cluster_id at all). Roy-3 needs this distinction
# to expose the Wire-A vs recommend_action gap; collapsing
# both into None would elide the H1 signal.
_consulted_on_empty: float | None = 0.0 if current_cluster_id else None
_emit_recommend_action_event(
agent_id=agent_id,
current_cluster_id=current_cluster_id,
cluster_reward_bias_consulted=_consulted_on_empty,
best_tool=None,
best_score=0.0,
min_confidence=min_confidence,
passed_gate=False,
)
return None

best_tool = max(scores, key=lambda t: (scores[t], t))
best_score = scores[best_tool]

# Record the cluster_reward_bias consulted for the best tool —
# informative for Roy-3 because Wire-A renders aggregate biases
# across all clusters, but recommend_action only consults the
# active-cluster value. Mismatch between rendered Wire-A signal
# and consulted recommend_action signal is the failure mode the
# H1 sub-hypothesis branches (cross_modal_substrate_binding.md /
# jepa_cross_modal_alignment.md) eventually address.
consulted_bias: float | None = None
if current_cluster_id:
consulted_bias = self.cluster_reward_bias(agent_id, current_cluster_id, f"tool:{best_tool}")

if best_score < min_confidence:
_emit_recommend_action_event(
agent_id=agent_id,
current_cluster_id=current_cluster_id,
cluster_reward_bias_consulted=consulted_bias,
best_tool=best_tool,
best_score=best_score,
min_confidence=min_confidence,
passed_gate=False,
)
return None

_emit_recommend_action_event(
agent_id=agent_id,
current_cluster_id=current_cluster_id,
cluster_reward_bias_consulted=consulted_bias,
best_tool=best_tool,
best_score=best_score,
min_confidence=min_confidence,
passed_gate=True,
)
return {
"tool_name": best_tool,
"params": {},
Expand Down
81 changes: 80 additions & 1 deletion src/maxim/simulation/instrumented_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
Captures every tool execution (success, failure, and autonomy rejections)
as ActionRecords in a RecordingSink. Transparently wraps an existing
Executor without changing its interface.

Stage 0b (release_0_9_1.md) telemetry: each record carries
``agent_id`` / ``session_id`` from the ``utils/http.py::current_context``
ContextVar (bound at the sim orchestrator entry) and a best-effort
``entity_class`` derived from the action's params. The fields default
to ``None`` when context isn't bound (e.g., unit tests, headless API),
so the producer never raises.
"""

from __future__ import annotations
Expand All @@ -12,6 +19,65 @@

from maxim.simulation.sinks import ActionRecord, ActionSink
from maxim.tools.base import ToolOutput
from maxim.utils.http import current_context


def _derive_entity_class(tool_name: str, params: dict[str, Any]) -> str | None:
"""Best-effort entity-class extraction for Stage 0b telemetry.

**DO NOT consume this field from any substrate write path** (NAc,
EC, ATL, Hippocampus, PainBus). It exists for Roy-3 post-hoc
exposure-count normalization and the Roy harness's per-class
plotting. Substrate consumers must derive entity identity from
the percept text + EC pattern completion, NEVER from this field.
The bio-fidelity guardrail in the bio-lens review: this field is
walled off from the substrate so it can stay a best-effort
heuristic without contaminating the 1.0 thesis ("substrate carries
cognition; language is I/O").

**Strict opt-in derivation:** ships explicit-param-only at 0.9.1
after the pre-merge review caught the verb-strip heuristic
producing noisy buckets on non-entity tools (``get_status`` →
``"status"``, ``set_entity_sensor`` → ``"entity_sensor"``,
``do_something_clever`` → ``"something_clever"``). Roy-3
normalization explicitly skips ``None``, so being conservative is
strictly safer than producing wrong buckets — silent miscount is
worse than missing data.

Heuristics in priority order:
1. ``params["entity_class"]`` — explicit caller override.
2. ``params["target"]`` / ``params["entity"]`` / ``params["object"]`` —
the conventional param names entity-binding tools use.

Returns ``None`` when neither (1) nor (2) is present, including
for tools whose name suggests an entity binding but didn't pass
one through params (``infant_humanoid_pick_up`` with no target →
None). The field is best-effort metadata.

TODO (1.1): replace this opt-in heuristic with a declared
``Tool.entity_class: str | None`` field on the Tool ABC, so tool
authors can opt their tools into Roy-3 attribution explicitly
without participating in this derivation logic at all. Tracks
the same surface as ``feedback_two_identity_schemes.md`` — the
substrate already uses tool-name AND EC-cluster identity for one
concept; declared ``entity_class`` would be a third explicit
handle that tooling can rely on.
"""
if not isinstance(params, dict):
return None
# 1. Explicit caller override.
explicit = params.get("entity_class")
if isinstance(explicit, str) and explicit:
return explicit
# 2. Conventional param names.
for key in ("target", "entity", "object"):
val = params.get(key)
if isinstance(val, str) and val:
return val
# No verb-strip path: pre-merge review showed it produced noise
# on non-entity tools that Roy-3 normalization would silently
# mis-attribute. Future work tracked in the docstring TODO.
return None


class InstrumentedExecutor:
Expand All @@ -33,6 +99,16 @@ def __init__(self, executor: Any, sink: ActionSink) -> None:
self._executor = executor
self._sink = sink

def _telemetry_fields(self, tool_name: str, params: dict[str, Any]) -> dict[str, Any]:
"""Pull Stage 0b telemetry (agent_id, session_id, entity_class)
off the bound RequestContext + tool action."""
ctx = current_context()
return {
"agent_id": ctx.agent_id if ctx is not None else None,
"session_id": ctx.session_id if ctx is not None else None,
"entity_class": _derive_entity_class(tool_name, params),
}

def execute(self, action: dict[str, Any]) -> ToolOutput:
"""Execute a tool action and record the result."""
tool_name = action.get("tool_name", "unknown")
Expand All @@ -54,20 +130,23 @@ def execute(self, action: dict[str, Any]) -> ToolOutput:
result_error=result.error,
blocked=is_blocked,
block_reason=result.error if is_blocked else None,
**self._telemetry_fields(tool_name, params),
)
)

return result

def record_block(self, tool_name: str, reason: str, params: dict[str, Any] | None = None) -> None:
"""Record that an action was blocked (e.g., by FearAgent or autonomy)."""
params = params or {}
self._sink.record(
ActionRecord(
timestamp=time.time(),
tool_name=tool_name,
tool_args=params or {},
tool_args=params,
blocked=True,
block_reason=reason,
**self._telemetry_fields(tool_name, params),
)
)

Expand Down
Loading
Loading