pydantic · DouweM · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
diff --git a/.agents/settings.local.json b/.agents/settings.local.json
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,8 @@
+.env*
+.mcp.json
+.DS_Store
+.agents/settings.local.json
+
 # IDE
 .idea/
 

diff --git a/PLAN.md b/PLAN.md
@@ -0,0 +1,54 @@
+# Memory Capability
+
+## Summary
+
+Implements a `Memory` capability (`AbstractCapability` subclass) that provides persistent key-value memory across agent sessions, referencing issues #30 and #31.
+
+## Design
+
+### Architecture
+
+- **`Memory`** dataclass extends `AbstractCapability[AgentDepsT]`
+  - `get_instructions()` returns a dynamic callable that injects stored memories into the system prompt at run start
+  - `get_toolset()` returns a `FunctionToolset` with five tools: `save_memory`, `recall_memory`, `search_memories`, `list_memories`, `delete_memory`
+  - Tool functions use closures over `self.store` (no dependency on agent `deps`)
+
+### Storage
+
+- **`MemoryStore`** protocol: pluggable backend with `get`, `put`, `delete`, `list_all`, `search`
+- **`InMemoryStore`**: dict-based, ephemeral, for testing (default)
+- **`FileStore`**: JSON file on disk, reads on init, writes on every mutation
+
+### Memory Model
+
+- **`MemoryEntry`** dataclass: `key`, `content`, `tags` (list[str]), `scope`, `expires_at`, `created_at`, `updated_at`
+- **`MemoryEntryDict`** TypedDict for serialization
+- Word-boundary search with relevance scoring (case-insensitive) across key, content, and tags
+- Scoping/namespaces via `scope` field with filtering on search/list
+- TTL/expiration via `expires_at` with `is_expired()` auto-filtering
+- Dedup warning on save when keys are similar (Levenshtein distance <= 2)
+
+### Spec Serialization
+
+- `Memory.get_serialization_name()` returns `"Memory"`
+- `Memory.from_spec(backend="file", path="...")` creates a `FileStore`-backed instance
+
+## Configuration
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `store` | `InMemoryStore()` | Storage backend |
+| `inject_memories_in_instructions` | `True` | Include memories in system prompt |
+| `max_instructions_memories` | `20` | Cap on memories injected into prompt |
+
+## Files
+
+- `src/pydantic_harness/memory.py` - Capability, stores, entry model
+- `src/pydantic_harness/__init__.py` - Re-exports
+- `tests/test_memory.py` - 113 tests covering all code paths
+
+## Future Work
+
+- Semantic/vector search backend (e.g. embedding-based `MemoryStore`)
+- Session-scoped memory isolation via `for_run()`
+- SQLite / Redis backends for production persistence
diff --git a/examples/memory/coding_assistant.py b/examples/memory/coding_assistant.py
@@ -0,0 +1,101 @@
+"""Self-Improving Coding Assistant — procedural memory via instructions injection.
+
+Demonstrates: instructions injection as self-modifying prompt, scoping, search, delete.
+"""
+
+from __future__ import annotations
+
+import sys
+
+import logfire
+from pydantic_ai import Agent
+
+from pydantic_harness.memory import InMemoryStore, Memory
+
+logfire.configure(send_to_logfire='if-token-present')
+logfire.instrument_openai()
+
+
+def main() -> None:
+    """Run the coding assistant example."""
+    store = InMemoryStore()
+    memory = Memory(store=store, max_instructions_memories=10)
+
+    agent = Agent(
+        'openai:gpt-4o-mini',
+        capabilities=[memory],
+        system_prompt=(
+            'You are a coding assistant that learns from user corrections. '
+            'When the user gives you a coding rule or correction, save it as a memory '
+            'with scope "rules" and tags like ["python", "style"] or ["typescript", "testing"]. '
+            'Use descriptive keys like "rule_python_fstrings" or "rule_ts_const". '
+            'When asked to write code, search your memories for relevant rules first.'
+        ),
+    )
+
+    # --- Teach rules ---
+    with logfire.span('teach-rules'):
+        result1 = agent.run_sync(
+            'Remember these coding rules:\n'
+            '1. Always use f-strings in Python, never .format() or % formatting\n'
+            '2. In TypeScript, prefer const over let, never use var\n'
+            '3. Always add type hints to Python function signatures'
+        )
+        print(f'Assistant: {result1.output}')
+
+    rules = store.list_all()
+    print(f'\nRules stored: {len(rules)}')
+    for r in rules:
+        print(f'  [{r.key}] {r.content} (scope={r.scope}, tags={r.tags})')
+
+    assert len(rules) >= 3, f'Expected at least 3 rules saved, got {len(rules)}'
+
+    # Check that search works across stored rules
+    python_rules = store.search('python')
+    print(f'Rules matching "python": {len(python_rules)}')
+    assert len(python_rules) >= 1, 'Expected at least 1 rule matching "python"'
+
+    # --- Verify instructions injection ---
+    # Build instructions should now include the rules
+    from unittest.mock import MagicMock
+
+    from pydantic_ai._run_context import RunContext
+    from pydantic_ai.usage import RunUsage
+
+    ctx: RunContext[None] = RunContext(deps=None, model=MagicMock(), usage=RunUsage())
+    instructions = memory.build_instructions(ctx)
+    print(f'\nInstructions preview (first 300 chars):\n{instructions[:300]}...')
+
+    assert 'Currently stored memories' in instructions, 'Expected memories in instructions'
+
+    # --- Ask for code, verify rules are considered ---
+    with logfire.span('apply-rules'):
+        result2 = agent.run_sync(
+            'Write a Python function that greets a user by name. Follow all coding rules you know.'
+        )
+        print(f'\nAssistant: {result2.output}')
+
+    # The output should use f-strings and type hints (based on rules)
+    output_lower = result2.output.lower()
+    assert "f'" in result2.output or 'f"' in result2.output or 'f-string' in output_lower, (
+        'Expected f-string usage in code output'
+    )
+
+    # --- Delete an obsolete rule ---
+    with logfire.span('delete-rule'):
+        result3 = agent.run_sync('Actually, the TypeScript const rule is outdated for this project. Delete it.')
+        print(f'\nAssistant: {result3.output}')
+
+    remaining = store.list_all()
+    print(f'\nRules after deletion: {len(remaining)}')
+    for r in remaining:
+        print(f'  [{r.key}] {r.content}')
+
+    # Should have fewer rules now
+    assert len(remaining) < len(rules), 'Expected at least one rule deleted'
+
+    print('\n--- Coding Assistant example passed! ---')
+
+
+if __name__ == '__main__':
+    sys.exit(main() or 0)
diff --git a/examples/memory/personal_assistant.py b/examples/memory/personal_assistant.py
@@ -0,0 +1,89 @@
+"""Personal Assistant — remembers user preferences across sessions.
+
+Demonstrates: FileStore persistence, save/recall, instructions injection, tags, scoping.
+"""
+
+from __future__ import annotations
+
+import sys
+import tempfile
+from pathlib import Path
+
+import logfire
+from pydantic_ai import Agent
+
+from pydantic_harness.memory import FileStore, Memory
+
+logfire.configure(send_to_logfire='if-token-present')
+logfire.instrument_openai()
+
+
+def main() -> None:
+    """Run the personal assistant example."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        mem_path = Path(tmpdir) / 'preferences.json'
+        store = FileStore(mem_path)
+        memory = Memory(store=store)
+
+        agent = Agent(
+            'openai:gpt-4o-mini',
+            capabilities=[memory],
+            system_prompt=(
+                'You are a helpful personal assistant. '
+                'When the user tells you about their preferences, save each one as a memory '
+                'with scope "user_prefs" and appropriate tags. '
+                'Use descriptive keys like "preferred_name" or "theme_preference".'
+            ),
+        )
+
+        # --- Session 1: user shares preferences ---
+        with logfire.span('session-1-save-preferences'):
+            result1 = agent.run_sync("Hi! My name is Alice, I prefer dark mode, and I'm vegetarian.")
+            print(f'Assistant: {result1.output}')
+
+        entries = store.list_all()
+        print(f'\nMemories after session 1: {len(entries)}')
+        for e in entries:
+            print(f'  [{e.key}] {e.content} (tags={e.tags}, scope={e.scope})')
+
+        assert len(entries) >= 2, f'Expected at least 2 memories saved, got {len(entries)}'
+        all_content = ' '.join(e.content.lower() for e in entries)
+        assert 'alice' in all_content or any('alice' in e.key.lower() for e in entries), 'Expected a memory about Alice'
+
+        # --- Session 2: new agent instance loads from same file (persistence) ---
+        store2 = FileStore(mem_path)
+        memory2 = Memory(store=store2)
+        agent2 = Agent(
+            'openai:gpt-4o-mini',
+            capabilities=[memory2],
+            system_prompt='You are a helpful personal assistant.',
+        )
+
+        loaded_entries = store2.list_all()
+        print(f'\nMemories loaded in session 2: {len(loaded_entries)}')
+        assert len(loaded_entries) == len(entries), 'FileStore persistence failed'
+
+        with logfire.span('session-2-recall-preferences'):
+            result2 = agent2.run_sync('What do you know about me?')
+            print(f'Assistant: {result2.output}')
+
+        # The instructions injection should have included the memories
+        assert 'alice' in result2.output.lower() or 'dark' in result2.output.lower(), (
+            'Expected assistant to recall preferences from instructions injection'
+        )
+
+        # --- Session 3: update a preference ---
+        with logfire.span('session-3-update-preference'):
+            result3 = agent2.run_sync('Actually, I go by Ali now. Please update my name.')
+            print(f'\nAssistant: {result3.output}')
+
+        updated_entries = store2.list_all()
+        print(f'\nMemories after update: {len(updated_entries)}')
+        for e in updated_entries:
+            print(f'  [{e.key}] {e.content} (tags={e.tags})')
+
+        print('\n--- Personal Assistant example passed! ---')
+
+
+if __name__ == '__main__':
+    sys.exit(main() or 0)
diff --git a/examples/memory/study_coach.py b/examples/memory/study_coach.py
@@ -0,0 +1,76 @@
+"""Study Coach — spaced repetition with TTL.
+
+Demonstrates: TTL/expiration, save with ttl_minutes, list/search, tags.
+"""
+
+from __future__ import annotations
+
+import sys
+
+import logfire
+from pydantic_ai import Agent
+
+from pydantic_harness.memory import InMemoryStore, Memory
+
+logfire.configure(send_to_logfire='if-token-present')
+logfire.instrument_openai()
+
+
+def main() -> None:
+    """Run the study coach example."""
+    store = InMemoryStore()
+    memory = Memory(store=store)
+
+    agent = Agent(
+        'openai:gpt-4o-mini',
+        capabilities=[memory],
+        system_prompt=(
+            'You are a study coach that helps users learn facts. '
+            'When the user provides a fact to learn, save it as a memory with '
+            'tag "study" and a ttl_minutes value: use 1 for new/hard facts, '
+            '60 for reviewed facts, and 1440 for mastered facts. '
+            'Use descriptive keys like "biology_mitochondria" or "history_magna_carta".'
+        ),
+    )
+
+    # --- Learn some facts ---
+    with logfire.span('learn-facts'):
+        result1 = agent.run_sync(
+            'I need to learn these facts:\n'
+            '1. Mitochondria are the powerhouse of the cell\n'
+            '2. The Magna Carta was signed in 1215\n'
+            '3. Water boils at 100 degrees Celsius at sea level'
+        )
+        print(f'Coach: {result1.output}')
+
+    entries = store.list_all()
+    print(f'\nFacts stored: {len(entries)}')
+    for e in entries:
+        print(f'  [{e.key}] {e.content} (tags={e.tags}, ttl={e.expires_at})')
+
+    assert len(entries) >= 3, f'Expected at least 3 facts saved, got {len(entries)}'
+
+    # Check that TTL was set on at least some entries
+    entries_with_ttl = [e for e in entries if e.expires_at is not None]
+    assert len(entries_with_ttl) >= 1, 'Expected at least 1 entry with TTL set'
+    print(f'Entries with TTL: {len(entries_with_ttl)}')
+
+    # Check tags
+    entries_with_study_tag = [e for e in entries if 'study' in e.tags]
+    assert len(entries_with_study_tag) >= 1, 'Expected at least 1 entry with "study" tag'
+
+    # --- Search for facts ---
+    with logfire.span('search-facts'):
+        result2 = agent.run_sync('Search my memories for anything about biology.')
+        print(f'\nCoach: {result2.output}')
+
+    # --- List all facts ---
+    with logfire.span('list-facts'):
+        result3 = agent.run_sync('List all my study memories.')
+        print(f'\nCoach: {result3.output}')
+
+    print('\n--- Study Coach example passed! ---')
+
+
+if __name__ == '__main__':
+    sys.exit(main() or 0)
diff --git a/src/pydantic_harness/__init__.py b/src/pydantic_harness/__init__.py
@@ -7,4 +7,13 @@
 # Each capability module is imported and re-exported here.
 # Capabilities are listed alphabetically.
 
-__all__: list[str] = []
+from pydantic_harness.memory import FileStore, InMemoryStore, Memory, MemoryEntry, MemoryEntryDict, MemoryStore
+
+__all__: list[str] = [
+    'FileStore',
+    'InMemoryStore',
+    'Memory',
+    'MemoryEntry',
+    'MemoryEntryDict',
+    'MemoryStore',
+]