Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,23 @@ Configuration is managed through environment variables in `.env` file:
- `TARGET_REPO_PATH`: Default repository path (default: `.`)
- `LOCAL_MODEL_ENDPOINT`: Fallback endpoint for Ollama (default: `http://localhost:11434/v1`)

### Custom Ignore Patterns

You can specify additional directories to exclude by creating a `.cgrignore` file in your repository root:

```
# Comments start with #
vendor
.custom_cache
my_build_output
```

- One directory name per line
- Lines starting with `#` are comments
- Blank lines are ignored
- Patterns are exact directory name matches (not globs)
- Patterns from `.cgrignore` are merged with `--exclude` flags and auto-detected directories

### Key Dependencies

<!-- SECTION:dependencies -->
Expand Down
28 changes: 28 additions & 0 deletions codebase_rag/config.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
from __future__ import annotations

from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Unpack

from dotenv import load_dotenv
from loguru import logger
from pydantic import AnyHttpUrl
from pydantic_settings import BaseSettings, SettingsConfigDict

from . import constants as cs
from . import exceptions as ex
from . import logs
from .types_defs import ModelConfigKwargs

load_dotenv()
Expand Down Expand Up @@ -227,3 +230,28 @@ def resolve_batch_size(self, batch_size: int | None) -> int:


settings = AppConfig()

CGRIGNORE_FILENAME = ".cgrignore"


def load_cgrignore_patterns(repo_path: Path) -> frozenset[str]:
ignore_file = repo_path / CGRIGNORE_FILENAME
if not ignore_file.is_file():
return frozenset()

patterns: set[str] = set()
try:
with ignore_file.open(encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
continue
patterns.add(line)
if patterns:
logger.info(
logs.CGRIGNORE_LOADED.format(count=len(patterns), path=ignore_file)
)
return frozenset(patterns)
except OSError as e:
logger.warning(logs.CGRIGNORE_READ_FAILED.format(path=ignore_file, error=e))
return frozenset()
1 change: 1 addition & 0 deletions codebase_rag/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,7 @@ class DiffMarker:
INTERACTIVE_STYLE_DIM = "dim"
INTERACTIVE_STATUS_DETECTED = "auto-detected"
INTERACTIVE_STATUS_CLI = "--exclude"
INTERACTIVE_STATUS_CGRIGNORE = ".cgrignore"
INTERACTIVE_NESTED_SINGULAR = "{count} dir"
INTERACTIVE_NESTED_PLURAL = "{count} dirs"
INTERACTIVE_INSTRUCTIONS_GROUPED = (
Expand Down
4 changes: 4 additions & 0 deletions codebase_rag/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@
GRAMMAR_LOAD_FAILED = "Failed to load {lang} grammar: {error}"
INITIALIZED_PARSERS = "Initialized parsers for: {languages}"

# (H) Ignore pattern logs
CGRIGNORE_LOADED = "Loaded {count} patterns from {path}"
CGRIGNORE_READ_FAILED = "Failed to read {path}: {error}"

# (H) File watcher logs
WATCHER_ACTIVE = "File watcher is now active."
WATCHER_SKIP_NO_QUERY = "Ingestor does not support querying, skipping real-time update."
Expand Down
6 changes: 4 additions & 2 deletions codebase_rag/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from . import constants as cs
from . import exceptions as ex
from . import logs as ls
from .config import settings
from .config import load_cgrignore_patterns, settings
from .models import AppContext
from .prompts import OPTIMIZATION_PROMPT, OPTIMIZATION_PROMPT_WITH_REFERENCE
from .services import QueryProtocol
Expand Down Expand Up @@ -788,7 +788,9 @@ def prompt_for_included_directories(
cli_excludes: list[str] | None = None,
) -> frozenset[str]:
detected = detect_excludable_directories(repo_path)
pre_excluded = frozenset(cli_excludes) if cli_excludes else frozenset()
cgrignore_patterns = load_cgrignore_patterns(repo_path)
cli_patterns = frozenset(cli_excludes) if cli_excludes else frozenset()
pre_excluded = cli_patterns | cgrignore_patterns

if not detected and not pre_excluded:
return frozenset()
Expand Down
152 changes: 152 additions & 0 deletions codebase_rag/tests/test_cgrignore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from __future__ import annotations

from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest

from codebase_rag.config import CGRIGNORE_FILENAME, load_cgrignore_patterns
from codebase_rag.main import prompt_for_included_directories


def test_returns_empty_when_no_file(temp_repo: Path) -> None:
result = load_cgrignore_patterns(temp_repo)
assert result == frozenset()


def test_loads_patterns_from_file(temp_repo: Path) -> None:
cgrignore = temp_repo / CGRIGNORE_FILENAME
cgrignore.write_text("vendor\nmy_build\n")

result = load_cgrignore_patterns(temp_repo)

assert "vendor" in result
assert "my_build" in result
assert len(result) == 2


def test_ignores_comments_and_blank_lines(temp_repo: Path) -> None:
cgrignore = temp_repo / CGRIGNORE_FILENAME
cgrignore.write_text("# Comment\n\nvendor\n # Indented comment\n")

result = load_cgrignore_patterns(temp_repo)

assert result == frozenset({"vendor"})


def test_strips_whitespace(temp_repo: Path) -> None:
cgrignore = temp_repo / CGRIGNORE_FILENAME
cgrignore.write_text(" vendor \n\ttemp\t\n")

result = load_cgrignore_patterns(temp_repo)

assert "vendor" in result
assert "temp" in result


def test_returns_empty_on_read_error(
temp_repo: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
cgrignore = temp_repo / CGRIGNORE_FILENAME
cgrignore.write_text("vendor")

original_open = Path.open

def mock_open(self: Path, *args, **kwargs): # noqa: ANN002, ANN003
if self.name == CGRIGNORE_FILENAME:
raise PermissionError("Cannot read")
return original_open(self, *args, **kwargs)

monkeypatch.setattr(Path, "open", mock_open)

result = load_cgrignore_patterns(temp_repo)
assert result == frozenset()


def test_handles_duplicates(temp_repo: Path) -> None:
cgrignore = temp_repo / CGRIGNORE_FILENAME
cgrignore.write_text("vendor\nvendor\ntemp\n")

result = load_cgrignore_patterns(temp_repo)

assert len(result) == 2


def test_returns_empty_if_cgrignore_is_a_directory(temp_repo: Path) -> None:
cgrignore_path = temp_repo / CGRIGNORE_FILENAME
cgrignore_path.mkdir()

result = load_cgrignore_patterns(temp_repo)

assert result == frozenset()


class TestCgrignoreIntegration:
@patch("codebase_rag.main.Prompt.ask")
@patch("codebase_rag.main.app_context")
def test_cgrignore_patterns_included_in_candidates(
self, mock_context: MagicMock, mock_ask: MagicMock, tmp_path: Path
) -> None:
(tmp_path / ".git").mkdir()
cgrignore = tmp_path / CGRIGNORE_FILENAME
cgrignore.write_text("vendor\ncustom_cache\n")
mock_ask.return_value = "all"

result = prompt_for_included_directories(tmp_path)

assert ".git" in result
assert "vendor" in result
assert "custom_cache" in result

@patch("codebase_rag.main.Prompt.ask")
@patch("codebase_rag.main.app_context")
def test_cgrignore_merged_with_cli_excludes(
self, mock_context: MagicMock, mock_ask: MagicMock, tmp_path: Path
) -> None:
cgrignore = tmp_path / CGRIGNORE_FILENAME
cgrignore.write_text("from_cgrignore\n")
mock_ask.return_value = "all"

result = prompt_for_included_directories(tmp_path, cli_excludes=["from_cli"])

assert "from_cgrignore" in result
assert "from_cli" in result

@patch("codebase_rag.main.Prompt.ask")
@patch("codebase_rag.main.app_context")
def test_cgrignore_only_returns_without_prompt_when_empty(
self, mock_context: MagicMock, mock_ask: MagicMock, tmp_path: Path
) -> None:
result = prompt_for_included_directories(tmp_path)

assert result == frozenset()
mock_ask.assert_not_called()

@patch("codebase_rag.main.Prompt.ask")
@patch("codebase_rag.main.app_context")
def test_cgrignore_alone_triggers_prompt(
self, mock_context: MagicMock, mock_ask: MagicMock, tmp_path: Path
) -> None:
cgrignore = tmp_path / CGRIGNORE_FILENAME
cgrignore.write_text("my_custom_dir\n")
mock_ask.return_value = "none"

prompt_for_included_directories(tmp_path)

mock_ask.assert_called_once()

@patch("codebase_rag.main.Prompt.ask")
@patch("codebase_rag.main.app_context")
def test_cgrignore_deduplicates_with_detected(
self, mock_context: MagicMock, mock_ask: MagicMock, tmp_path: Path
) -> None:
(tmp_path / ".git").mkdir()
cgrignore = tmp_path / CGRIGNORE_FILENAME
cgrignore.write_text(".git\nvendor\n")
mock_ask.return_value = "all"

result = prompt_for_included_directories(tmp_path)

assert ".git" in result
assert "vendor" in result
assert len([x for x in result if x == ".git"]) == 1