Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions code_puppy/tools/file_operations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# file_operations.py

import math
import os
import shutil
import subprocess
Expand Down Expand Up @@ -193,10 +194,12 @@ def _list_files(
break

if not rg_path and recursive:
# Only need ripgrep for recursive listings
error_msg = "Error: ripgrep (rg) not found. Please install ripgrep to use this tool."
return ListFileOutput(content=error_msg, error=error_msg)

# Fall back to non-recursive listing when ripgrep is not available
output_lines.append(
"Warning: ripgrep (rg) not found. Falling back to non-recursive listing. "
"Install ripgrep for full recursive support."
)
recursive = False
Comment thread
coderabbitai[bot] marked this conversation as resolved.
# Only use ripgrep for recursive listings
if recursive:
# Build command for ripgrep --files
Expand Down Expand Up @@ -512,8 +515,8 @@ def _read_file(
for char in content
)

# Simple approximation: ~4 characters per token
num_tokens = len(content) // 4
# Token estimation consistent with BaseAgent (~2.5 characters per token)
num_tokens = max(1, math.floor(len(content) / 2.5))
Comment thread
coderabbitai[bot] marked this conversation as resolved.
if num_tokens > 10000:
return ReadFileOutput(
content=None,
Expand Down
52 changes: 52 additions & 0 deletions tests/agents/test_token_estimation_consistency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""Tests for token estimation consistency across modules.

Ensures file_operations._read_file and BaseAgent.estimate_token_count
use the same chars-per-token heuristic to prevent unexpected early
compaction triggered by estimation mismatch.
"""

import math

from code_puppy.agents.agent_code_puppy import CodePuppyAgent


class TestTokenEstimationConsistency:
"""Token estimation should be consistent between file_operations and BaseAgent."""

def test_estimate_token_count_matches_file_operations_heuristic(self):
"""
BaseAgent.estimate_token_count and file_operations._read_file
must use the same 2.5 chars/token heuristic.
"""
agent = CodePuppyAgent()
content = "x" * 1000

base_agent_estimate = agent.estimate_token_count(content)
expected_heuristic = math.floor(len(content) / 2.5)

assert base_agent_estimate == expected_heuristic

def test_estimation_consistent_across_content_sizes(self):
"""
Consistency holds across small, medium, and large content sizes.
"""
agent = CodePuppyAgent()

for size in [100, 1000, 10000, 25000]:
content = "x" * size
base_agent_estimate = agent.estimate_token_count(content)
expected_heuristic = math.floor(len(content) / 2.5)
assert base_agent_estimate == expected_heuristic, (
f"Mismatch at size {size}: "
f"base_agent={base_agent_estimate}, "
f"expected={expected_heuristic}"
)

def test_minimum_token_count_is_one(self):
"""
estimate_token_count enforces a minimum of 1 token even for empty content.
"""
agent = CodePuppyAgent()

result = agent.estimate_token_count("")
assert result == 1
5 changes: 3 additions & 2 deletions tests/tools/test_file_operations_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,9 @@ def test_list_files_ripgrep_not_found_recursive(self, tmp_path):
):
result = _list_files(None, str(tmp_path), recursive=True)

assert result.error is not None
assert "ripgrep" in result.error.lower() or "rg" in result.error.lower()
# Fallback behavior: warning in content, no hard error, files still listed
assert result.content is not None
assert result.error is None or "falling back" in (result.content or "").lower()

def test_list_files_non_recursive_without_ripgrep(self, tmp_path):
"""Test non-recursive listing works without ripgrep."""
Expand Down
48 changes: 48 additions & 0 deletions tests/tools/test_list_files_ripgrep_fallback.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Regression test for ripgrep fallback in _list_files.

When ripgrep is not installed, _list_files should fall back to
non-recursive os.listdir instead of returning an error.
"""

import os
import tempfile
from unittest.mock import patch

from code_puppy.tools.file_operations import _list_files


class TestListFilesRipgrepFallback:
"""_list_files should gracefully handle missing ripgrep."""

def test_falls_back_when_ripgrep_not_found(self):
"""
When ripgrep is not installed, _list_files should return
a non-recursive listing instead of an error.
"""
with tempfile.TemporaryDirectory() as tmpdir:
test_file = os.path.join(tmpdir, "test.py")
with open(test_file, "w") as f:
f.write("print('hello')")

with patch("shutil.which", return_value=None):
result = _list_files(None, tmpdir, recursive=True)

# Should not return a hard error
assert result.content is not None
assert "not found" not in (result.content or "").lower() or "falling back" in (result.content or "").lower()
# Should still return file listing
assert "test.py" in result.content

def test_returns_files_without_ripgrep(self):
"""
Files in the directory should be listed even without ripgrep.
"""
with tempfile.TemporaryDirectory() as tmpdir:
test_file = os.path.join(tmpdir, "myfile.py")
with open(test_file, "w") as f:
f.write("x = 1")

with patch("shutil.which", return_value=None):
result = _list_files(None, tmpdir, recursive=True)

assert "myfile.py" in result.content