From 19d5af5cc4a41afd51db259d8e41b55b7fb1e5ce Mon Sep 17 00:00:00 2001 From: "claude[bot]" <209825114+claude[bot]@users.noreply.github.com> Date: Tue, 22 Jul 2025 20:18:38 +0000 Subject: [PATCH 1/6] Add context usage percentage to working memory endpoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add context_usage_percentage field to WorkingMemoryResponse model - Add _calculate_context_usage_percentage() helper function - Update GET /v1/working-memory/{session_id} to return percentage - Update PUT /v1/working-memory/{session_id} to return percentage based on final state (after potential summarization) - Percentage calculated as (current_tokens / token_threshold) * 100 where token_threshold = context_window * 0.7 - Returns None when no model info provided, otherwise 0-100% value Resolves #37 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Andrew Brookins --- agent_memory_server/api.py | 61 ++++++++++++++++++++++++++++++++-- agent_memory_server/models.py | 5 +++ tests/test_full_integration.py | 18 +++++----- 3 files changed, 73 insertions(+), 11 deletions(-) diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py index b5c8a47..b9c799c 100644 --- a/agent_memory_server/api.py +++ b/agent_memory_server/api.py @@ -63,6 +63,41 @@ def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int: return total_tokens +def _calculate_context_usage_percentage( + messages: list[MemoryMessage], + model_name: ModelNameLiteral | None, + context_window_max: int | None, +) -> float | None: + """ + Calculate the percentage of context window used before auto-summarization triggers. + + Args: + messages: List of messages to calculate token count for + model_name: The client's LLM model name for context window determination + context_window_max: Direct specification of context window max tokens + + Returns: + Percentage (0-100) of context used, or None if no model info provided + """ + if not messages or (not model_name and not context_window_max): + return None + + # Calculate current token usage + current_tokens = _calculate_messages_token_count(messages) + + # Get effective token limit for the client's model + max_tokens = _get_effective_token_limit(model_name, context_window_max) + + # Use the same threshold as _summarize_working_memory (70% of context window) + token_threshold = int(max_tokens * 0.7) + + # Calculate percentage of threshold used + percentage = (current_tokens / token_threshold) * 100.0 + + # Cap at 100% for display purposes + return min(percentage, 100.0) + + async def _summarize_working_memory( memory: WorkingMemory, model_name: ModelNameLiteral | None = None, @@ -269,7 +304,18 @@ async def get_working_memory( logger.debug(f"Working mem: {working_mem}") - return working_mem + # Calculate context usage percentage + context_usage_percentage = _calculate_context_usage_percentage( + messages=working_mem.messages, + model_name=model_name, + context_window_max=context_window_max, + ) + + # Return WorkingMemoryResponse with percentage + return WorkingMemoryResponse( + **working_mem.model_dump(), + context_usage_percentage=context_usage_percentage, + ) @router.put("/v1/working-memory/{session_id}", response_model=WorkingMemoryResponse) @@ -348,7 +394,18 @@ async def put_working_memory( namespace=updated_memory.namespace, ) - return updated_memory + # Calculate context usage percentage based on the final state (after potential summarization) + context_usage_percentage = _calculate_context_usage_percentage( + messages=updated_memory.messages, + model_name=model_name, + context_window_max=context_window_max, + ) + + # Return WorkingMemoryResponse with percentage + return WorkingMemoryResponse( + **updated_memory.model_dump(), + context_usage_percentage=context_usage_percentage, + ) @router.delete("/v1/working-memory/{session_id}", response_model=AckResponse) diff --git a/agent_memory_server/models.py b/agent_memory_server/models.py index 7fda47c..87d9af5 100644 --- a/agent_memory_server/models.py +++ b/agent_memory_server/models.py @@ -222,6 +222,11 @@ class WorkingMemory(BaseModel): class WorkingMemoryResponse(WorkingMemory): """Response containing working memory""" + context_usage_percentage: float | None = Field( + default=None, + description="Percentage of context window used before auto-summarization triggers (0-100)", + ) + class WorkingMemoryRequest(BaseModel): """Request parameters for working memory operations""" diff --git a/tests/test_full_integration.py b/tests/test_full_integration.py index 1c0761a..1c68228 100644 --- a/tests/test_full_integration.py +++ b/tests/test_full_integration.py @@ -773,9 +773,9 @@ async def test_memory_prompt_with_long_term_search( ) for msg in messages ) - assert ( - relevant_context_found - ), f"No relevant memory context found in messages: {messages}" + assert relevant_context_found, ( + f"No relevant memory context found in messages: {messages}" + ) # Cleanup await client.delete_long_term_memories([m.id for m in test_memories]) @@ -1079,9 +1079,9 @@ async def test_full_workflow_integration( ) print(f"No topic filter search results: {no_topic_search}") - assert ( - len(search_results["memories"]) > 0 - ), f"No memories found in search results: {search_results}" + assert len(search_results["memories"]) > 0, ( + f"No memories found in search results: {search_results}" + ) # 6. Test tool integration with a realistic scenario tool_call = { @@ -1126,9 +1126,9 @@ async def test_full_workflow_integration( m for m in long_term_memories.memories if m.id.startswith(memory_id_prefix) ] - assert ( - len(our_memories) == 0 - ), f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}" + assert len(our_memories) == 0, ( + f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}" + ) @pytest.mark.integration From a1a778af3acad2a1c7b999ad2d36ed2a85e4a7ba Mon Sep 17 00:00:00 2001 From: Andrew Brookins Date: Thu, 24 Jul 2025 16:33:25 -0700 Subject: [PATCH 2/6] Fix code formatting in test_full_integration.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- tests/test_full_integration.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_full_integration.py b/tests/test_full_integration.py index 1c68228..1c0761a 100644 --- a/tests/test_full_integration.py +++ b/tests/test_full_integration.py @@ -773,9 +773,9 @@ async def test_memory_prompt_with_long_term_search( ) for msg in messages ) - assert relevant_context_found, ( - f"No relevant memory context found in messages: {messages}" - ) + assert ( + relevant_context_found + ), f"No relevant memory context found in messages: {messages}" # Cleanup await client.delete_long_term_memories([m.id for m in test_memories]) @@ -1079,9 +1079,9 @@ async def test_full_workflow_integration( ) print(f"No topic filter search results: {no_topic_search}") - assert len(search_results["memories"]) > 0, ( - f"No memories found in search results: {search_results}" - ) + assert ( + len(search_results["memories"]) > 0 + ), f"No memories found in search results: {search_results}" # 6. Test tool integration with a realistic scenario tool_call = { @@ -1126,9 +1126,9 @@ async def test_full_workflow_integration( m for m in long_term_memories.memories if m.id.startswith(memory_id_prefix) ] - assert len(our_memories) == 0, ( - f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}" - ) + assert ( + len(our_memories) == 0 + ), f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}" @pytest.mark.integration From b5c55d195898dfbd8a25a1eb1a9b6aa48ffecf1b Mon Sep 17 00:00:00 2001 From: Andrew Brookins Date: Fri, 25 Jul 2025 10:37:17 -0700 Subject: [PATCH 3/6] Fix duplicate context_usage_percentage parameter in API responses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolve TypeError by properly handling the context_usage_percentage field in WorkingMemoryResponse creation to avoid duplicate keyword arguments. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- agent_memory_server/api.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py index b9c799c..2718b4b 100644 --- a/agent_memory_server/api.py +++ b/agent_memory_server/api.py @@ -312,10 +312,9 @@ async def get_working_memory( ) # Return WorkingMemoryResponse with percentage - return WorkingMemoryResponse( - **working_mem.model_dump(), - context_usage_percentage=context_usage_percentage, - ) + working_mem_data = working_mem.model_dump() + working_mem_data["context_usage_percentage"] = context_usage_percentage + return WorkingMemoryResponse(**working_mem_data) @router.put("/v1/working-memory/{session_id}", response_model=WorkingMemoryResponse) @@ -402,10 +401,9 @@ async def put_working_memory( ) # Return WorkingMemoryResponse with percentage - return WorkingMemoryResponse( - **updated_memory.model_dump(), - context_usage_percentage=context_usage_percentage, - ) + updated_memory_data = updated_memory.model_dump() + updated_memory_data["context_usage_percentage"] = context_usage_percentage + return WorkingMemoryResponse(**updated_memory_data) @router.delete("/v1/working-memory/{session_id}", response_model=AckResponse) From 8043c185e3d666e06db62782bbfaa97e6597fbe9 Mon Sep 17 00:00:00 2001 From: Andrew Brookins Date: Fri, 25 Jul 2025 10:37:50 -0700 Subject: [PATCH 4/6] Add context_usage_percentage field and tests to SDK client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add context_usage_percentage field to WorkingMemoryResponse model - Add comprehensive test suite for the new field covering: - Field creation and default values - Serialization behavior - Validation of different percentage values - Dictionary-to-model conversion 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../agent_memory_client/models.py | 5 +- agent-memory-client/tests/test_client.py | 110 ++++++++++++++++++ docs/memory-types.md | 5 +- dump.rdb | Bin 0 -> 88 bytes 4 files changed, 115 insertions(+), 5 deletions(-) create mode 100644 dump.rdb diff --git a/agent-memory-client/agent_memory_client/models.py b/agent-memory-client/agent_memory_client/models.py index bc731c9..12206ac 100644 --- a/agent-memory-client/agent_memory_client/models.py +++ b/agent-memory-client/agent_memory_client/models.py @@ -215,7 +215,10 @@ class SessionListResponse(BaseModel): class WorkingMemoryResponse(WorkingMemory): """Response from working memory operations""" - pass + context_usage_percentage: float | None = Field( + default=None, + description="Percentage of context window used before auto-summarization triggers (0-100)", + ) class MemoryRecordResult(MemoryRecord): diff --git a/agent-memory-client/tests/test_client.py b/agent-memory-client/tests/test_client.py index e49f615..2b13817 100644 --- a/agent-memory-client/tests/test_client.py +++ b/agent-memory-client/tests/test_client.py @@ -653,3 +653,113 @@ def test_validation_with_none_values(self, enhanced_test_client): # Should not raise enhanced_test_client.validate_memory_record(memory) + + +class TestContextUsagePercentage: + """Tests for context usage percentage functionality.""" + + @pytest.mark.asyncio + async def test_working_memory_response_with_context_percentage( + self, enhanced_test_client + ): + """Test that WorkingMemoryResponse properly handles context_usage_percentage field.""" + session_id = "test-session" + + # Test with context percentage set + working_memory_response = WorkingMemoryResponse( + session_id=session_id, + messages=[], + memories=[], + data={}, + context=None, + user_id=None, + context_usage_percentage=45.5, + ) + + assert working_memory_response.context_usage_percentage == 45.5 + assert working_memory_response.session_id == session_id + + # Test with None context percentage (default) + working_memory_response_none = WorkingMemoryResponse( + session_id=session_id, + messages=[], + memories=[], + data={}, + context=None, + user_id=None, + ) + + assert working_memory_response_none.context_usage_percentage is None + + @pytest.mark.asyncio + async def test_context_percentage_serialization(self, enhanced_test_client): + """Test that context_usage_percentage is properly serialized.""" + session_id = "test-session" + + # Create response with context percentage + working_memory_response = WorkingMemoryResponse( + session_id=session_id, + messages=[], + memories=[], + data={}, + context=None, + user_id=None, + context_usage_percentage=75.0, + ) + + # Test model_dump includes the field + dumped = working_memory_response.model_dump() + assert "context_usage_percentage" in dumped + assert dumped["context_usage_percentage"] == 75.0 + + # Test JSON serialization + json_data = working_memory_response.model_dump_json() + assert "context_usage_percentage" in json_data + assert "75.0" in json_data + + @pytest.mark.asyncio + async def test_context_percentage_validation(self, enhanced_test_client): + """Test that context_usage_percentage accepts valid values.""" + session_id = "test-session" + + # Test valid percentages + valid_percentages = [0.0, 25.5, 50.0, 99.9, 100.0, None] + + for percentage in valid_percentages: + working_memory_response = WorkingMemoryResponse( + session_id=session_id, + messages=[], + memories=[], + data={}, + context=None, + user_id=None, + context_usage_percentage=percentage, + ) + assert working_memory_response.context_usage_percentage == percentage + + def test_working_memory_response_from_dict_with_context_percentage(self): + """Test that WorkingMemoryResponse can be created from dict with context_usage_percentage.""" + session_id = "test-session" + + # Test creating WorkingMemoryResponse from dict (simulating API response parsing) + response_dict = { + "session_id": session_id, + "messages": [], + "memories": [], + "data": {}, + "context": None, + "user_id": None, + "context_usage_percentage": 33.3, + "tokens": 0, + "namespace": None, + "ttl_seconds": None, + "last_accessed": "2024-01-01T00:00:00Z", + } + + # This simulates what happens when the API client parses the JSON response + result = WorkingMemoryResponse(**response_dict) + + # Verify the context_usage_percentage is included + assert isinstance(result, WorkingMemoryResponse) + assert result.context_usage_percentage == 33.3 + assert result.session_id == session_id diff --git a/docs/memory-types.md b/docs/memory-types.md index c1cf549..02bf30d 100644 --- a/docs/memory-types.md +++ b/docs/memory-types.md @@ -202,11 +202,8 @@ Long-term memory supports three types of memories: # Create long-term memories POST /v1/long-term-memory/ -# Search long-term memories only +# Search long-term memories POST /v1/long-term-memory/search - -# Search across all memory types -POST /v1/memory/search ``` ### Search Capabilities diff --git a/dump.rdb b/dump.rdb new file mode 100644 index 0000000000000000000000000000000000000000..f6dbd0e92f437877d750f2544c3cbbafd42ad305 GIT binary patch literal 88 zcmWG?b@2=~FfcUw#aWb^l3A=cB}M=M literal 0 HcmV?d00001 From ab196dff291c6695b1d39ba9264e1bc4c4995c72 Mon Sep 17 00:00:00 2001 From: Andrew Brookins Date: Fri, 25 Jul 2025 11:58:37 -0700 Subject: [PATCH 5/6] Make summarization threshold configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review comments by making the 0.7 threshold configurable instead of hardcoded. Added summarization_threshold setting that can be configured via environment variable or config file. - Added summarization_threshold to Settings (default: 0.7) - Updated both _calculate_context_usage_percentage and _summarize_working_memory to use settings.summarization_threshold - Improved maintainability and consistency between functions - Allows users to customize when summarization is triggered 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- agent_memory_server/api.py | 8 ++++---- agent_memory_server/config.py | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py index 2718b4b..271b34c 100644 --- a/agent_memory_server/api.py +++ b/agent_memory_server/api.py @@ -88,8 +88,8 @@ def _calculate_context_usage_percentage( # Get effective token limit for the client's model max_tokens = _get_effective_token_limit(model_name, context_window_max) - # Use the same threshold as _summarize_working_memory (70% of context window) - token_threshold = int(max_tokens * 0.7) + # Use the same threshold as _summarize_working_memory (reserves space for new content) + token_threshold = int(max_tokens * settings.summarization_threshold) # Calculate percentage of threshold used percentage = (current_tokens / token_threshold) * 100.0 @@ -123,8 +123,8 @@ async def _summarize_working_memory( max_tokens = _get_effective_token_limit(model_name, context_window_max) # Reserve space for new messages, function calls, and response generation - # Use 70% of context window to leave room for new content - token_threshold = int(max_tokens * 0.7) + # Use configurable threshold to leave room for new content + token_threshold = int(max_tokens * settings.summarization_threshold) if current_tokens <= token_threshold: return memory diff --git a/agent_memory_server/config.py b/agent_memory_server/config.py index 8fdde4d..73acbb2 100644 --- a/agent_memory_server/config.py +++ b/agent_memory_server/config.py @@ -119,6 +119,9 @@ class Settings(BaseSettings): # Working memory settings window_size: int = 20 # Default number of recent messages to return + summarization_threshold: float = ( + 0.7 # Fraction of context window that triggers summarization + ) # Other Application settings log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO" From 2acd27bc92161595f094e61077f17c89d6eab48d Mon Sep 17 00:00:00 2001 From: Andrew Brookins Date: Fri, 25 Jul 2025 14:30:17 -0700 Subject: [PATCH 6/6] Implement dual context percentage fields for working memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add context_percentage_total_used field showing actual context window usage (0-100%) - Add context_percentage_until_summarization field showing percentage until auto-summarization triggers (0-100%) - Update API calculation function to return both values as tuple - Update server and SDK models with new fields - Update comprehensive test coverage for both fields - Remove old single context_usage_percentage field - Maintain configurable summarization threshold (default 70%) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../agent_memory_client/models.py | 8 ++- agent-memory-client/tests/test_client.py | 65 ++++++++++++------- agent_memory_server/api.py | 64 +++++++++++------- agent_memory_server/models.py | 8 ++- 4 files changed, 92 insertions(+), 53 deletions(-) diff --git a/agent-memory-client/agent_memory_client/models.py b/agent-memory-client/agent_memory_client/models.py index 12206ac..a77b1ea 100644 --- a/agent-memory-client/agent_memory_client/models.py +++ b/agent-memory-client/agent_memory_client/models.py @@ -215,9 +215,13 @@ class SessionListResponse(BaseModel): class WorkingMemoryResponse(WorkingMemory): """Response from working memory operations""" - context_usage_percentage: float | None = Field( + context_percentage_total_used: float | None = Field( default=None, - description="Percentage of context window used before auto-summarization triggers (0-100)", + description="Percentage of total context window currently used (0-100)", + ) + context_percentage_until_summarization: float | None = Field( + default=None, + description="Percentage until auto-summarization triggers (0-100, reaches 100% at summarization threshold)", ) diff --git a/agent-memory-client/tests/test_client.py b/agent-memory-client/tests/test_client.py index 2b13817..a77619f 100644 --- a/agent-memory-client/tests/test_client.py +++ b/agent-memory-client/tests/test_client.py @@ -659,13 +659,13 @@ class TestContextUsagePercentage: """Tests for context usage percentage functionality.""" @pytest.mark.asyncio - async def test_working_memory_response_with_context_percentage( + async def test_working_memory_response_with_context_percentages( self, enhanced_test_client ): - """Test that WorkingMemoryResponse properly handles context_usage_percentage field.""" + """Test that WorkingMemoryResponse properly handles both context percentage fields.""" session_id = "test-session" - # Test with context percentage set + # Test with both context percentages set working_memory_response = WorkingMemoryResponse( session_id=session_id, messages=[], @@ -673,13 +673,15 @@ async def test_working_memory_response_with_context_percentage( data={}, context=None, user_id=None, - context_usage_percentage=45.5, + context_percentage_total_used=45.5, + context_percentage_until_summarization=65.0, ) - assert working_memory_response.context_usage_percentage == 45.5 + assert working_memory_response.context_percentage_total_used == 45.5 + assert working_memory_response.context_percentage_until_summarization == 65.0 assert working_memory_response.session_id == session_id - # Test with None context percentage (default) + # Test with None context percentages (default) working_memory_response_none = WorkingMemoryResponse( session_id=session_id, messages=[], @@ -689,14 +691,17 @@ async def test_working_memory_response_with_context_percentage( user_id=None, ) - assert working_memory_response_none.context_usage_percentage is None + assert working_memory_response_none.context_percentage_total_used is None + assert ( + working_memory_response_none.context_percentage_until_summarization is None + ) @pytest.mark.asyncio - async def test_context_percentage_serialization(self, enhanced_test_client): - """Test that context_usage_percentage is properly serialized.""" + async def test_context_percentages_serialization(self, enhanced_test_client): + """Test that both context percentage fields are properly serialized.""" session_id = "test-session" - # Create response with context percentage + # Create response with both context percentages working_memory_response = WorkingMemoryResponse( session_id=session_id, messages=[], @@ -704,22 +709,27 @@ async def test_context_percentage_serialization(self, enhanced_test_client): data={}, context=None, user_id=None, - context_usage_percentage=75.0, + context_percentage_total_used=75.0, + context_percentage_until_summarization=85.5, ) - # Test model_dump includes the field + # Test model_dump includes both fields dumped = working_memory_response.model_dump() - assert "context_usage_percentage" in dumped - assert dumped["context_usage_percentage"] == 75.0 + assert "context_percentage_total_used" in dumped + assert "context_percentage_until_summarization" in dumped + assert dumped["context_percentage_total_used"] == 75.0 + assert dumped["context_percentage_until_summarization"] == 85.5 # Test JSON serialization json_data = working_memory_response.model_dump_json() - assert "context_usage_percentage" in json_data + assert "context_percentage_total_used" in json_data + assert "context_percentage_until_summarization" in json_data assert "75.0" in json_data + assert "85.5" in json_data @pytest.mark.asyncio - async def test_context_percentage_validation(self, enhanced_test_client): - """Test that context_usage_percentage accepts valid values.""" + async def test_context_percentages_validation(self, enhanced_test_client): + """Test that both context percentage fields accept valid values.""" session_id = "test-session" # Test valid percentages @@ -733,12 +743,17 @@ async def test_context_percentage_validation(self, enhanced_test_client): data={}, context=None, user_id=None, - context_usage_percentage=percentage, + context_percentage_total_used=percentage, + context_percentage_until_summarization=percentage, + ) + assert working_memory_response.context_percentage_total_used == percentage + assert ( + working_memory_response.context_percentage_until_summarization + == percentage ) - assert working_memory_response.context_usage_percentage == percentage - def test_working_memory_response_from_dict_with_context_percentage(self): - """Test that WorkingMemoryResponse can be created from dict with context_usage_percentage.""" + def test_working_memory_response_from_dict_with_context_percentages(self): + """Test that WorkingMemoryResponse can be created from dict with both context percentage fields.""" session_id = "test-session" # Test creating WorkingMemoryResponse from dict (simulating API response parsing) @@ -749,7 +764,8 @@ def test_working_memory_response_from_dict_with_context_percentage(self): "data": {}, "context": None, "user_id": None, - "context_usage_percentage": 33.3, + "context_percentage_total_used": 33.3, + "context_percentage_until_summarization": 47.5, "tokens": 0, "namespace": None, "ttl_seconds": None, @@ -759,7 +775,8 @@ def test_working_memory_response_from_dict_with_context_percentage(self): # This simulates what happens when the API client parses the JSON response result = WorkingMemoryResponse(**response_dict) - # Verify the context_usage_percentage is included + # Verify both context percentage fields are included assert isinstance(result, WorkingMemoryResponse) - assert result.context_usage_percentage == 33.3 + assert result.context_percentage_total_used == 33.3 + assert result.context_percentage_until_summarization == 47.5 assert result.session_id == session_id diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py index 271b34c..578795d 100644 --- a/agent_memory_server/api.py +++ b/agent_memory_server/api.py @@ -63,13 +63,13 @@ def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int: return total_tokens -def _calculate_context_usage_percentage( +def _calculate_context_usage_percentages( messages: list[MemoryMessage], model_name: ModelNameLiteral | None, context_window_max: int | None, -) -> float | None: +) -> tuple[float | None, float | None]: """ - Calculate the percentage of context window used before auto-summarization triggers. + Calculate context usage percentages for total usage and until summarization triggers. Args: messages: List of messages to calculate token count for @@ -77,10 +77,13 @@ def _calculate_context_usage_percentage( context_window_max: Direct specification of context window max tokens Returns: - Percentage (0-100) of context used, or None if no model info provided + Tuple of (total_percentage, until_summarization_percentage) + - total_percentage: Percentage (0-100) of total context window used + - until_summarization_percentage: Percentage (0-100) until summarization triggers + Both values are None if no model info provided """ if not messages or (not model_name and not context_window_max): - return None + return None, None # Calculate current token usage current_tokens = _calculate_messages_token_count(messages) @@ -88,14 +91,15 @@ def _calculate_context_usage_percentage( # Get effective token limit for the client's model max_tokens = _get_effective_token_limit(model_name, context_window_max) - # Use the same threshold as _summarize_working_memory (reserves space for new content) - token_threshold = int(max_tokens * settings.summarization_threshold) + # Calculate percentage of total context window used + total_percentage = (current_tokens / max_tokens) * 100.0 - # Calculate percentage of threshold used - percentage = (current_tokens / token_threshold) * 100.0 + # Calculate percentage until summarization threshold + token_threshold = int(max_tokens * settings.summarization_threshold) + until_summarization_percentage = (current_tokens / token_threshold) * 100.0 - # Cap at 100% for display purposes - return min(percentage, 100.0) + # Cap both at 100% for display purposes + return min(total_percentage, 100.0), min(until_summarization_percentage, 100.0) async def _summarize_working_memory( @@ -304,16 +308,21 @@ async def get_working_memory( logger.debug(f"Working mem: {working_mem}") - # Calculate context usage percentage - context_usage_percentage = _calculate_context_usage_percentage( - messages=working_mem.messages, - model_name=model_name, - context_window_max=context_window_max, + # Calculate context usage percentages + total_percentage, until_summarization_percentage = ( + _calculate_context_usage_percentages( + messages=working_mem.messages, + model_name=model_name, + context_window_max=context_window_max, + ) ) - # Return WorkingMemoryResponse with percentage + # Return WorkingMemoryResponse with both percentage values working_mem_data = working_mem.model_dump() - working_mem_data["context_usage_percentage"] = context_usage_percentage + working_mem_data["context_percentage_total_used"] = total_percentage + working_mem_data["context_percentage_until_summarization"] = ( + until_summarization_percentage + ) return WorkingMemoryResponse(**working_mem_data) @@ -393,16 +402,21 @@ async def put_working_memory( namespace=updated_memory.namespace, ) - # Calculate context usage percentage based on the final state (after potential summarization) - context_usage_percentage = _calculate_context_usage_percentage( - messages=updated_memory.messages, - model_name=model_name, - context_window_max=context_window_max, + # Calculate context usage percentages based on the final state (after potential summarization) + total_percentage, until_summarization_percentage = ( + _calculate_context_usage_percentages( + messages=updated_memory.messages, + model_name=model_name, + context_window_max=context_window_max, + ) ) - # Return WorkingMemoryResponse with percentage + # Return WorkingMemoryResponse with both percentage values updated_memory_data = updated_memory.model_dump() - updated_memory_data["context_usage_percentage"] = context_usage_percentage + updated_memory_data["context_percentage_total_used"] = total_percentage + updated_memory_data["context_percentage_until_summarization"] = ( + until_summarization_percentage + ) return WorkingMemoryResponse(**updated_memory_data) diff --git a/agent_memory_server/models.py b/agent_memory_server/models.py index 87d9af5..204dfdf 100644 --- a/agent_memory_server/models.py +++ b/agent_memory_server/models.py @@ -222,9 +222,13 @@ class WorkingMemory(BaseModel): class WorkingMemoryResponse(WorkingMemory): """Response containing working memory""" - context_usage_percentage: float | None = Field( + context_percentage_total_used: float | None = Field( default=None, - description="Percentage of context window used before auto-summarization triggers (0-100)", + description="Percentage of total context window currently used (0-100)", + ) + context_percentage_until_summarization: float | None = Field( + default=None, + description="Percentage until auto-summarization triggers (0-100, reaches 100% at summarization threshold)", )