Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
7a676ba
Add threshold-driven compaction strategy
nhicks00 Apr 24, 2026
1e1a530
Add compaction precision probe test
nhicks00 Apr 24, 2026
4f20efa
Add live compaction QA eval harness
nhicks00 Apr 24, 2026
69dabce
Make live compaction eval use legacy router
nhicks00 Apr 24, 2026
243a7a4
Respect context window in live compaction eval
nhicks00 Apr 24, 2026
9322b1b
Document threshold compaction live eval results
nhicks00 Apr 24, 2026
a01d89e
Rebrand threshold compaction as continuity
nhicks00 Apr 24, 2026
198c179
Make continuity fallback summarization deterministic
nhicks00 Apr 24, 2026
ecaa44f
Show continuity compaction status
nhicks00 Apr 24, 2026
2ee13d3
Add continuity task ledger memory
nhicks00 Apr 24, 2026
d8da0f4
Add semantic continuity task detection
nhicks00 Apr 24, 2026
f99bb3f
Add continuity memory v2
nhicks00 Apr 24, 2026
d1a7554
Show continuity semantic memory status
nhicks00 Apr 24, 2026
3866a3f
Reduce continuity semantic memory timeouts
nhicks00 Apr 24, 2026
f90c162
Increase continuity semantic timeout
nhicks00 Apr 24, 2026
b1627d3
Document Continuity core integration rationale
nhicks00 Apr 24, 2026
2237765
Document Continuity compaction behavior
nhicks00 Apr 24, 2026
53f672c
Use raw text request for Continuity memory
nhicks00 Apr 24, 2026
54b16c7
Fix Continuity semantic memory extraction
nhicks00 Apr 24, 2026
a26d8de
Add Continuity predictive trigger floor
nhicks00 Apr 24, 2026
f412d8c
Make Continuity compaction target adaptive
nhicks00 Apr 24, 2026
c08fa3d
Format Continuity compaction changes
nhicks00 Apr 24, 2026
81c7ba7
Trim Continuity history to displayed target
nhicks00 Apr 24, 2026
2576fb1
Document Continuity target trimming
nhicks00 Apr 24, 2026
5c09429
Use configured Continuity target
nhicks00 Apr 24, 2026
c236733
Preserve recent raw tail during Continuity trim
nhicks00 Apr 24, 2026
a7d6fc0
Move Continuity compaction into plugin hooks
nhicks00 Apr 30, 2026
e194e62
Remove unrelated Codex client change
nhicks00 Apr 30, 2026
575bd69
Add Continuity semantic model setting
nhicks00 Apr 30, 2026
e8882cc
Default Continuity semantic model to active chat model
nhicks00 Apr 30, 2026
13e7df4
Default compaction strategy to Continuity
nhicks00 Apr 30, 2026
b792859
Retry Continuity semantic memory on empty active response
nhicks00 Apr 30, 2026
6c54f3f
Preserve ChatGPT streamed text for semantic memory
nhicks00 Apr 30, 2026
cba97c0
Cover Continuity semantic model wiring
nhicks00 May 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ That's it. The plugin loader auto-discovers `register_callbacks.py` in subdirs.
| `register_tools` | Tool registration | `() -> list[dict]` with `{"name": str, "register_func": callable}` |
| `register_agents` | Agent catalogue | `() -> list[dict]` with `{"name": str, "class": type}` |
| `register_model_type` | Custom model type | `() -> list[dict]` with `{"type": str, "handler": callable}` |
| `register_config_keys` | Config discoverability | `() -> list[str]` |
| `register_compaction_strategies` | Compaction strategy catalogue | `() -> list[str \| dict]` |
| `compact_message_history` | Message history compaction | `(strategy, agent, messages, model_max, context_overhead, model_name=None, force=False, total_tokens=None, proportion_used=None) -> dict \| None` |
| `load_model_config` | Patch model config | `(*args, **kwargs) -> Any` |
| `load_models_config` | Inject models | `() -> dict` |
| `get_model_system_prompt` | Per-model prompt | `(model_name, default_prompt, user_prompt) -> dict \| None` |
Expand Down
86 changes: 80 additions & 6 deletions code_puppy/agents/_compaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
prune_interrupted_tool_calls,
)
from code_puppy.callbacks import (
on_compact_message_history,
on_message_history_processor_end,
on_message_history_processor_start,
)
Expand Down Expand Up @@ -281,6 +282,7 @@ def compact(
messages: List[ModelMessage],
model_max: int,
context_overhead: int,
force: bool = False,
) -> Tuple[List[ModelMessage], List[ModelMessage]]:
"""Unified compaction entrypoint. Replaces ``message_history_processor``.

Expand All @@ -290,6 +292,8 @@ def compact(
messages: Current message history (already accumulated by the caller).
model_max: Effective model context window in tokens.
context_overhead: Estimated overhead for system prompt + tool schemas.
force: If true, run the configured compaction strategy even below its
normal trigger. Used by the manual ``/compact`` command.

Returns:
``(new_messages, dropped_messages_for_hash_tracking)``.
Expand All @@ -312,12 +316,34 @@ def compact(
)
update_spinner_context(context_summary)

strategy = get_compaction_strategy()
plugin_result = _run_plugin_compaction(
strategy=strategy,
agent=agent,
messages=messages,
model_max=model_max,
context_overhead=context_overhead,
model_name=model_name,
force=force,
total_tokens=total_tokens,
proportion_used=proportion_used,
)
if plugin_result is not None:
result_messages, summarized_messages = plugin_result
_update_final_spinner(result_messages, model_name, model_max)
return result_messages, summarized_messages

if strategy not in {"summarization", "truncation"}:
emit_warning(
f"Compaction strategy '{strategy}' was not handled by any plugin; "
"falling back to truncation for this compaction cycle."
)
strategy = "truncation"

threshold = get_compaction_threshold()
if proportion_used <= threshold:
if not force and proportion_used <= threshold:
return messages, []

strategy = get_compaction_strategy()

protected_tokens = get_protected_token_count()
filtered = filter_huge_messages(messages, model_name)

Expand Down Expand Up @@ -360,8 +386,58 @@ def compact(
filtered, protected_tokens, model_name
)

_update_final_spinner(result_messages, model_name, model_max)

return result_messages, summarized_messages


def _run_plugin_compaction(
*,
strategy: str,
agent: Any,
messages: List[ModelMessage],
model_max: int,
context_overhead: int,
model_name: Optional[str],
force: bool,
total_tokens: int,
proportion_used: float,
) -> Tuple[List[ModelMessage], List[ModelMessage]] | None:
"""Return plugin compaction output when a plugin handles the strategy."""
results = on_compact_message_history(
strategy=strategy,
agent=agent,
messages=messages,
model_max=model_max,
context_overhead=context_overhead,
model_name=model_name,
force=force,
total_tokens=total_tokens,
proportion_used=proportion_used,
)
for result in results:
if not isinstance(result, dict) or not result.get("handled"):
continue
result_messages = result.get("messages")
if not isinstance(result_messages, list):
emit_warning(
f"Compaction plugin for '{strategy}' returned no message list; ignoring."
)
continue
dropped = result.get("dropped_messages", result.get("dropped", []))
if not isinstance(dropped, list):
dropped = []
return result_messages, dropped
return None


def _update_final_spinner(
messages: List[ModelMessage],
model_name: Optional[str],
model_max: int,
) -> None:
final_token_count = sum(
estimate_tokens_for_message(m, model_name) for m in result_messages
estimate_tokens_for_message(m, model_name) for m in messages
)
final_summary = SpinnerBase.format_context_info(
final_token_count,
Expand All @@ -370,8 +446,6 @@ def compact(
)
update_spinner_context(final_summary)

return result_messages, summarized_messages


def _strip_empty_thinking_parts(
messages: List[ModelMessage],
Expand Down
58 changes: 58 additions & 0 deletions code_puppy/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
"register_mcp_catalog_servers",
"register_browser_types",
"register_model_providers",
"register_config_keys",
"register_compaction_strategies",
"compact_message_history",
"message_history_processor_start",
"message_history_processor_end",
"on_message",
Expand Down Expand Up @@ -75,6 +78,9 @@
"register_mcp_catalog_servers": [],
"register_browser_types": [],
"register_model_providers": [],
"register_config_keys": [],
"register_compaction_strategies": [],
"compact_message_history": [],
"message_history_processor_start": [],
"message_history_processor_end": [],
"on_message": [],
Expand Down Expand Up @@ -703,6 +709,58 @@ def on_register_model_providers() -> List[Any]:
return _trigger_callbacks_sync("register_model_providers")


def on_register_config_keys() -> List[Any]:
"""Collect additional config keys exposed by plugins.

Each callback may return a list/tuple/set of config key strings, a single
string key, or ``None``. Core treats this as discoverability only; config
persistence still accepts arbitrary keys for backward compatibility.
"""
return _trigger_callbacks_sync("register_config_keys")


def on_register_compaction_strategies() -> List[Any]:
"""Collect plugin-provided compaction strategy declarations.

Each callback may return strings or dicts with at least ``{"name": str}``.
The names extend the accepted ``compaction_strategy`` values; actual
compaction behavior is supplied through ``compact_message_history``.
"""
return _trigger_callbacks_sync("register_compaction_strategies")


def on_compact_message_history(
strategy: str,
agent: Any,
messages: List[Any],
model_max: int,
context_overhead: int,
model_name: str | None = None,
force: bool = False,
total_tokens: int | None = None,
proportion_used: float | None = None,
) -> List[Any]:
"""Allow plugins to handle message-history compaction.

A callback should return ``None`` when it does not handle ``strategy``. To
handle compaction, return a dict with ``handled=True``, ``messages`` set to
the rebuilt message list, and ``dropped``/``dropped_messages`` set to the
source messages removed from live history for compacted-hash tracking.
"""
return _trigger_callbacks_sync(
"compact_message_history",
strategy,
agent,
messages,
model_max,
context_overhead,
model_name,
force,
total_tokens,
proportion_used,
)


def on_message_history_processor_start(
agent_name: str,
session_id: str | None,
Expand Down
63 changes: 39 additions & 24 deletions code_puppy/chatgpt_codex_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,38 +279,26 @@ async def _convert_stream_to_response(
f"Got final response data with keys: {list(final_response_data.keys())}"
)

# Build the final response body
collected_output = self._build_collected_output(
collected_text, collected_tool_calls
)

# Build the final response body. Some ChatGPT Codex responses stream
# output_text deltas but send `output: []` in response.completed when
# store=false. Preserve the completed response metadata, but patch in
# collected output so pydantic-ai can parse the non-streaming result.
if final_response_data:
response_body = final_response_data
response_body = dict(final_response_data)
if not response_body.get("output") and collected_output:
response_body["output"] = collected_output
else:
# Fallback: construct a minimal response from collected data
response_body = {
"id": "reconstructed",
"object": "response",
"output": [],
"output": collected_output,
}

if collected_text:
response_body["output"].append(
{
"type": "message",
"role": "assistant",
"content": [
{"type": "output_text", "text": "".join(collected_text)}
],
}
)

for tool_call in collected_tool_calls:
response_body["output"].append(
{
"type": "function_call",
"name": tool_call["name"],
"arguments": tool_call["arguments"],
"call_id": tool_call["call_id"],
}
)

# Create a new response with the complete body
body_bytes = json.dumps(response_body).encode("utf-8")
logger.debug(f"Reconstructed response body: {len(body_bytes)} bytes")
Expand All @@ -323,6 +311,33 @@ async def _convert_stream_to_response(
)
return new_response

@staticmethod
def _build_collected_output(
collected_text: list[str], collected_tool_calls: list[dict[str, str]]
) -> list[dict[str, Any]]:
output: list[dict[str, Any]] = []
if collected_text:
output.append(
{
"type": "message",
"role": "assistant",
"content": [
{"type": "output_text", "text": "".join(collected_text)}
],
}
)

for tool_call in collected_tool_calls:
output.append(
{
"type": "function_call",
"name": tool_call["name"],
"arguments": tool_call["arguments"],
"call_id": tool_call["call_id"],
}
)
return output


def create_codex_async_client(
headers: dict[str, str] | None = None,
Expand Down
9 changes: 7 additions & 2 deletions code_puppy/command_line/config_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def handle_show_command(command: str) -> bool:
from code_puppy.config import (
get_auto_save_session,
get_compaction_strategy,
get_compaction_strategy_names,
get_compaction_threshold,
get_default_agent,
get_effective_temperature,
Expand All @@ -60,6 +61,7 @@ def handle_show_command(command: str) -> bool:
protected_tokens = get_protected_token_count()
compaction_threshold = get_compaction_threshold()
compaction_strategy = get_compaction_strategy()
compaction_strategy_names = ", ".join(sorted(get_compaction_strategy_names()))
global_temperature = get_temperature()
effective_temperature = get_effective_temperature(model)

Expand All @@ -79,7 +81,7 @@ def handle_show_command(command: str) -> bool:
[bold]auto_save_session:[/bold] {"[green]enabled[/green]" if auto_save else "[yellow]disabled[/yellow]"}
[bold]protected_tokens:[/bold] [cyan]{protected_tokens:,}[/cyan] recent tokens preserved
[bold]compaction_threshold:[/bold] [cyan]{compaction_threshold:.1%}[/cyan] context usage triggers compaction
[bold]compaction_strategy:[/bold] [cyan]{compaction_strategy}[/cyan] (summarization or truncation)
[bold]compaction_strategy:[/bold] [cyan]{compaction_strategy}[/cyan] ({compaction_strategy_names})
[bold]resume_message_count:[/bold] [cyan]{get_resume_message_count()}[/cyan] messages shown on /resume
[bold]reasoning_effort:[/bold] [cyan]{get_openai_reasoning_effort()}[/cyan]
[bold]verbosity:[/bold] [cyan]{get_openai_verbosity()}[/cyan]
Expand Down Expand Up @@ -199,9 +201,12 @@ def handle_set_command(command: str) -> bool:
key = tokens[1]
value = ""
else:
from code_puppy.config import get_compaction_strategy_names

config_keys = get_config_keys()
if "compaction_strategy" not in config_keys:
config_keys.append("compaction_strategy")
compaction_strategies = ", ".join(sorted(get_compaction_strategy_names()))
session_help = (
"\n[yellow]Session Management[/yellow]"
"\n [cyan]auto_save_session[/cyan] Auto-save chat after every response (true/false)"
Expand All @@ -212,7 +217,7 @@ def handle_set_command(command: str) -> bool:
)
emit_warning(
Text.from_markup(
f"Usage: /set KEY=VALUE or /set KEY VALUE\nConfig keys: {', '.join(config_keys)}\n[dim]Note: compaction_strategy can be 'summarization' or 'truncation'[/dim]{session_help}{keymap_help}"
f"Usage: /set KEY=VALUE or /set KEY VALUE\nConfig keys: {', '.join(config_keys)}\n[dim]Note: compaction_strategy can be one of: {compaction_strategies}[/dim]{session_help}{keymap_help}"
)
)
return True
Expand Down
Loading