Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 156 additions & 0 deletions src/maxim/embodiment/body.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import logging
import time
from dataclasses import dataclass, field
from collections.abc import Iterable
from typing import Any

from maxim.embodiment.sem import Entity, FailureMode, SensorReading
Expand Down Expand Up @@ -574,6 +575,161 @@ def format_body_state_for_prompt(self) -> str:
def failure_history(self) -> list[FailureEvent]:
return list(self._failure_history)

# -- Wire 3: embodiment-state → action filter (release_0_9_1.md Stage 1) -
#
# **I/O-layer boundary, not substrate contamination.** The thresholds
# gate the LLM proposer's tool surface, NOT substrate encoding.
# EC clusters, NAc reward_bias, and the natural failure → pain →
# NAc learning chain are untouched. This is the same downstream-of-
# encoding exemption Wire-A's bias-band labels operate under (per
# bio-fidelity pre-merge review).
#
# Default thresholds (also documented in
# docs/plans/bio_emergent_persona_foundations.md § Wire 3).
# The agent_loop hook reads these via the method signature so a
# non-default threshold pair can ship in a future tuning experiment
# without touching call sites.
#
# **Band semantics (pinned in tests at the strict-vs-inclusive split):**
# - ``integrity < 0.3`` → disabled (filtered from prompt)
# - ``0.3 <= integrity < 0.6`` → degraded (annotated in prompt)
# - ``integrity >= 0.6`` → healthy (no annotation)
# The bands partition [0, 1] cleanly — no overlap, no gap.

_WIRE_3_DISABLE_THRESHOLD: float = 0.3
_WIRE_3_DEGRADE_THRESHOLD: float = 0.6

def _iter_modulator_affordance_pairs(
self,
) -> Iterable[tuple[str, float]]:
"""Yield ``(base_tool_name, integrity)`` for every modulator
affordance on the entity tree.

``base_tool_name`` is the ``{entity.name}_{affordance_name}``
form ``tool_bridge.generate_tools_for_entity`` uses BEFORE
``_resolve_tool_name`` disambiguates against the registry.
On the common single-body topology (Roy / cradle / Reachy)
there are no name collisions, so the registered tool name
equals the base name and Wire 3 matches cleanly. The
agent_loop hook compares against the live tool list — if a
collision did rename a tool to ``{ancestor}_{base_name}``,
the integrity filter fails open (the tool stays available)
rather than silently mis-gating.

Modulators without a ``compute_integrity`` method (older
SpecModulator-shaped types, capability-only modulators) yield
``1.0`` — equivalent to "not damaged", per the same
backward-compat convention ``SpecModulator.compute_integrity``
uses when ``vital_metrics`` is empty.
"""
for ent in self.root.walk():
for mod in ent.modulators.values():
if hasattr(mod, "compute_integrity"):
try:
integrity = float(mod.compute_integrity())
except Exception as e:
# Bio-fidelity fold (Wire 3 review): a broken
# integrity calc is itself a signal the body's
# self-monitoring is failing. Currently fail-open
# to integrity=1.0 (preserves loop stability)
# but surface as WARNING so the broken modulator
# is visible in operator review / Roy-3 logs.
# Treat-as-disabled (more cautious) is the bio-
# faithful alternative, deferred to a future
# tuning experiment.
log.warning(
"Wire 3: compute_integrity() raised on %s/%s — treating as healthy (1.0): %s",
ent.name,
getattr(mod, "name", "?"),
e,
)
integrity = 1.0
else:
integrity = 1.0
if not hasattr(mod, "affordances"):
continue
for aff_name in mod.affordances.keys():
yield f"{ent.name}_{aff_name}", integrity

def get_disabled_affordances(self, *, threshold: float | None = None) -> set[str]:
"""Affordances routed through critically-damaged components.

Returns the set of base tool names (``{entity.name}_{affordance_name}``)
whose owning modulator's ``compute_integrity()`` is **strictly
below** the disable threshold (default ``0.3``). The agent_loop
hook filters these from the per-tick available-tools list
BEFORE the LLM prompt sees them — a damaged-arm agent stops
attempting arm-routed affordances without any prompt-injection
scaffolding, the cleanest emergent "trait" demonstration in
bio_emergent_persona_foundations.md § Wire 3.

See ``_iter_modulator_affordance_pairs`` for the base-name
derivation contract; failures match cleanly on Roy's
single-body topology and fail-open under name collisions.

Args:
threshold: Override ``_WIRE_3_DISABLE_THRESHOLD`` (0.3).
Below this integrity, the affordance is disabled.
"""
cutoff = float(threshold) if threshold is not None else self._WIRE_3_DISABLE_THRESHOLD
return {name for name, integrity in self._iter_modulator_affordance_pairs() if integrity < cutoff}

@staticmethod
def integrity_to_felt_phrase(integrity: float) -> str:
"""Map a degraded-band integrity value to a felt-sensation phrase.

Per bio-fidelity pre-merge review (Wire 3 fold), the prompt-
visible annotation reads as proprioceptive percept ("feels
strained", "feels weakened") rather than as a system advisor
("DAMAGED: integrity 0.4"). The numeric integrity stays in the
``sim_log("WIRE_3_FILTER", ...)`` JSONL event for post-hoc
Roy-3 analysis; the LLM sees the qualitative phrase only.

Mirrors Wire-A's ``bias_to_band`` 5-band approach but with
2 bands inside the narrower degraded range [0.3, 0.6):

- ``0.45 <= integrity < 0.6`` → ``"feels strained"``
- ``0.3 <= integrity < 0.45`` → ``"feels weakened, prone to failing"``

Values outside the degraded range return ``""`` (the caller
is the agent_loop hook, which only invokes this method on
values it knows are in the degraded band; the empty-string
case is defensive — never happens via the documented flow).
"""
if integrity >= 0.45 and integrity < 0.6:
return "feels strained"
if integrity >= 0.3 and integrity < 0.45:
return "feels weakened, prone to failing"
return ""

def get_degraded_affordances(
self,
*,
disable_threshold: float | None = None,
degrade_threshold: float | None = None,
) -> dict[str, float]:
"""Affordances on partially-damaged components.

Returns ``{base_tool_name: integrity}`` for every modulator
affordance whose owning modulator's integrity is in the
``[disable_threshold, degrade_threshold)`` range — damaged
but not disabled. The agent_loop hook annotates these tools'
descriptions with ``[DAMAGED: integrity 0.X]`` so the LLM
proposer sees the cost of using them; learning is post-hoc
via the standard reward path (damaged-tool use → likelier
failure → NAc credit).

Args:
disable_threshold: Below this integrity, the affordance
is in ``get_disabled_affordances`` instead and is NOT
in this map. Default 0.3.
degrade_threshold: At or above this integrity, the
affordance is healthy and NOT in this map. Default 0.6.
"""
lo = float(disable_threshold) if disable_threshold is not None else self._WIRE_3_DISABLE_THRESHOLD
hi = float(degrade_threshold) if degrade_threshold is not None else self._WIRE_3_DEGRADE_THRESHOLD
return {name: integrity for name, integrity in self._iter_modulator_affordance_pairs() if lo <= integrity < hi}

# -- failure persistence -------------------------------------------------

def export_failure_state(self) -> dict[str, Any]:
Expand Down
138 changes: 137 additions & 1 deletion src/maxim/runtime/agent_loop.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from __future__ import annotations

import itertools
import logging
import os
import re
import time
import itertools
from typing import TYPE_CHECKING, Any

from maxim.evaluation.base import Evaluator
Expand Down Expand Up @@ -48,6 +49,18 @@
logger = logging.getLogger(__name__)


# Wire 3 (release_0_9_1.md Stage 1): regex matching the felt-sensation
# annotations Embodiment.integrity_to_felt_phrase produces. The agent_loop
# hook uses this to strip a stale annotation before re-applying the
# current-tick one — guards against multi-tick integrity drift accumulating
# multiple suffixes (e.g., integrity 0.55 → ``(feels strained)``, then 0.40
# → ``(feels weakened, prone to failing)``; without the strip, both would
# coexist in the description). The phrases are pinned in tests so a future
# additional band must update both this regex AND
# ``Embodiment.integrity_to_felt_phrase`` together.
_WIRE3_PHRASE_RE = re.compile(r" \((?:feels strained|feels weakened, prone to failing)\)$")


from maxim.runtime.loop_state import (
_persist_state_json,
_get_failure_strategy,
Expand Down Expand Up @@ -3031,6 +3044,85 @@ def _get_all_tools() -> set[str]:

# Get available tools for this mode
available_tools = mode_info.get_available_tools(_all_tools)

# Wire 3 (release_0_9_1.md Stage 1): filter tools
# routed through critically-damaged components and
# collect degraded-affordance annotations. Pulls
# from Embodiment.get_disabled_affordances() /
# .get_degraded_affordances() which read each
# modulator's compute_integrity(). Default
# thresholds: integrity < 0.3 → disabled
# (filtered out); 0.3 <= integrity < 0.6 →
# annotated with a felt-sensation phrase
# ("feels strained" / "feels weakened, prone to
# failing") so the LLM proposer reads the
# affordance's cost in proprioceptive voice
# rather than as a system advisor (bio-fidelity
# fold). Fail-open at the narrowed exception
# surface (no embodiment, missing methods, broken
# modulator shape) — the filter is a no-op but
# the WARNING surfaces operator-visible signal.
#
# NOTE on `last_surfaced_tools`: post-filter is
# intentional. The learned tool-relevance index
# at line ~1700 calls `record_surfaced_but_unused`
# — disabled tools weren't surfaced, so they
# don't decay. Future: if a disabled tool
# recovers, the relevance index resumes decay
# the next tick the tool surfaces again.
_wire3_embodiment = getattr(executor, "embodiment", None)
_wire3_degraded: dict[str, float] = {}
_wire3_disabled: set[str] = set()
if _wire3_embodiment is not None:
try:
_wire3_disabled = _wire3_embodiment.get_disabled_affordances()
if _wire3_disabled:
available_tools = [t for t in available_tools if t not in _wire3_disabled]
_wire3_degraded = _wire3_embodiment.get_degraded_affordances()
except (AttributeError, TypeError) as e:
# Narrowed from broad Exception per
# arch-lens A4: the inner body.py guard
# already swallows compute_integrity
# raises with a WARNING. Outer surface
# failures here are method-shape
# mismatches (non-Embodiment object
# plugged into executor.embodiment) —
# WARN so operator review catches it.
logger.warning(
"Wire 3: get_disabled/degraded_affordances shape mismatch — filter no-op: %s",
e,
)
_wire3_degraded = {}
_wire3_disabled = set()
# Emit Roy-3 disambiguator (bio-fidelity B2):
# without this, "Wire 3 hid the tool" and
# "substrate learned avoidance" are
# indistinguishable post-hoc. The event
# lists which affordances were filtered /
# annotated each LLM submission so Roy-3
# can quantify Wire 3's effect surface.
if _wire3_disabled or _wire3_degraded:
try:
from maxim.simulation import sim_logger as _sl_w3

_w3_tick = int(time.time() - _sl_w3._sim_start) if _sl_w3._sim_start > 0.0 else 0
_sl_w3.sim_log(
"WIRE_3_FILTER",
f"wire_3: disabled={len(_wire3_disabled)} degraded={len(_wire3_degraded)}",
{
"tick": _w3_tick,
"disabled_tools": sorted(_wire3_disabled),
# Pass integrity floats only here — the LLM
# sees the felt phrases above.
"degraded_integrities": {
name: round(integrity, 4) for name, integrity in _wire3_degraded.items()
},
},
)
except ImportError:
# Non-sim runtime — observability
# is optional, never load-bearing.
pass
last_surfaced_tools = list(available_tools)

# Get full tool info for prompt (description, params, example).
Expand Down Expand Up @@ -3071,6 +3163,50 @@ def _get_all_tools() -> set[str]:
except (KeyError, Exception):
pass

# Wire 3: annotate degraded tools' descriptions in
# place with a felt-sensation phrase (bio-fidelity
# fold). The annotation lives at the end of the
# description string so the LLM reads the body's
# state in proprioceptive voice without losing
# the tool's normal capability blurb. Uses the
# per-tool entry's structure (dict for dynamic
# tools, TOOL_DESCRIPTIONS dict for builtin);
# skips any tool whose description shape we don't
# recognise, fail-open.
#
# Idempotency under integrity drift (arch A1):
# if integrity ticks 0.5 → 0.4 → 0.5 across a
# session, the felt phrase changes per band.
# The regex strip removes any existing
# ``(feels …)`` Wire 3 annotation before
# appending the current one so phrases don't
# accumulate. Healthy / disabled affordances
# never enter this loop, so the only way to
# have an annotation present is via a prior
# Wire 3 pass.
if _wire3_degraded:
for name, integrity in _wire3_degraded.items():
entry = tool_descriptions.get(name)
if not isinstance(entry, dict):
continue
base_desc = entry.get("description", "")
if not isinstance(base_desc, str):
continue
phrase = _wire3_embodiment.integrity_to_felt_phrase(integrity)
if not phrase:
continue
annotation = f" ({phrase})"
# Strip any prior felt annotation pinned
# by Embodiment.integrity_to_felt_phrase
# — the two bands give two distinct
# suffixes which could otherwise stack.
stripped = _WIRE3_PHRASE_RE.sub("", base_desc)
# Copy-on-write — TOOL_DESCRIPTIONS is
# a shared module-level dict; mutating
# it would poison the description for
# future calls (and other agents).
tool_descriptions[name] = {**entry, "description": stripped + annotation}

# Get context pool text
context_pool_text = context_pool.get_context_text(
max_tokens=mode_info.context_window_tokens // 2
Expand Down
Loading
Loading