Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,23 @@ Configuration is managed through environment variables in `.env` file:
- `TARGET_REPO_PATH`: Default repository path (default: `.`)
- `LOCAL_MODEL_ENDPOINT`: Fallback endpoint for Ollama (default: `http://localhost:11434/v1`)

### Custom Ignore Patterns

You can specify additional directories to exclude by creating a `.cgrignore` file in your repository root:

```
# Comments start with #
vendor
.custom_cache
my_build_output
```

- One directory name per line
- Lines starting with `#` are comments
- Blank lines are ignored
- Patterns are exact directory name matches (not globs)
- Patterns from `.cgrignore` are merged with `--exclude` flags and auto-detected directories

### Key Dependencies

<!-- SECTION:dependencies -->
Expand Down
28 changes: 28 additions & 0 deletions codebase_rag/config.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
from __future__ import annotations

from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Unpack

from dotenv import load_dotenv
from loguru import logger
from pydantic import AnyHttpUrl
from pydantic_settings import BaseSettings, SettingsConfigDict

from . import constants as cs
from . import exceptions as ex
from . import logs
from .types_defs import ModelConfigKwargs

load_dotenv()
Expand Down Expand Up @@ -227,3 +230,28 @@ def resolve_batch_size(self, batch_size: int | None) -> int:


settings = AppConfig()

CGRIGNORE_FILENAME = ".cgrignore"


def load_cgrignore_patterns(repo_path: Path) -> frozenset[str]:
ignore_file = repo_path / CGRIGNORE_FILENAME
if not ignore_file.exists():
return frozenset()

patterns: set[str] = set()
try:
with ignore_file.open(encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
continue
patterns.add(line)
if patterns:
logger.info(
logs.CGRIGNORE_LOADED.format(count=len(patterns), path=ignore_file)
)
return frozenset(patterns)
except OSError as e:
logger.warning(logs.CGRIGNORE_READ_FAILED.format(path=ignore_file, error=e))
return frozenset()
1 change: 1 addition & 0 deletions codebase_rag/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,7 @@ class DiffMarker:
EXCLUDE_COL_STATUS = "Source"
EXCLUDE_STATUS_DETECTED = "auto-detected"
EXCLUDE_STATUS_CLI = "--exclude"
EXCLUDE_STATUS_CGRIGNORE = ".cgrignore"
EXCLUDE_PROMPT_INSTRUCTIONS = (
"Options: 'all' (exclude all), 'none' (exclude nothing), "
"or numbers like '1,3,5' (exclude specific)"
Expand Down
4 changes: 4 additions & 0 deletions codebase_rag/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@
GRAMMAR_LOAD_FAILED = "Failed to load {lang} grammar: {error}"
INITIALIZED_PARSERS = "Initialized parsers for: {languages}"

# (H) Ignore pattern logs
CGRIGNORE_LOADED = "Loaded {count} patterns from {path}"
CGRIGNORE_READ_FAILED = "Failed to read {path}: {error}"

# (H) File watcher logs
WATCHER_ACTIVE = "File watcher is now active."
WATCHER_SKIP_NO_QUERY = "Ingestor does not support querying, skipping real-time update."
Expand Down
17 changes: 11 additions & 6 deletions codebase_rag/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,8 +676,12 @@ def prompt_exclude_directories(
cli_excludes: list[str] | None = None,
skip_prompt: bool = False,
) -> frozenset[str]:
from .config import load_cgrignore_patterns

detected = detect_root_excludable_directories(repo_path)
pre_excluded = frozenset(cli_excludes) if cli_excludes else frozenset()
cgrignore_patterns = load_cgrignore_patterns(repo_path)
cli_patterns = frozenset(cli_excludes) if cli_excludes else frozenset()
pre_excluded = cli_patterns | cgrignore_patterns

if not detected and not pre_excluded:
return frozenset()
Expand All @@ -693,11 +697,12 @@ def prompt_exclude_directories(
table.add_column(cs.EXCLUDE_COL_STATUS, style=cs.Color.GREEN)

for i, name in enumerate(all_candidates, 1):
status = (
cs.EXCLUDE_STATUS_CLI
if name in pre_excluded
else cs.EXCLUDE_STATUS_DETECTED
)
if name in cli_patterns:
status = cs.EXCLUDE_STATUS_CLI
elif name in cgrignore_patterns:
status = cs.EXCLUDE_STATUS_CGRIGNORE
else:
status = cs.EXCLUDE_STATUS_DETECTED
table.add_row(str(i), name, status)

app_context.console.print(table)
Expand Down
70 changes: 70 additions & 0 deletions codebase_rag/tests/test_cgrignore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from __future__ import annotations

from pathlib import Path

import pytest

from codebase_rag.config import CGRIGNORE_FILENAME, load_cgrignore_patterns


def test_returns_empty_when_no_file(temp_repo: Path) -> None:
result = load_cgrignore_patterns(temp_repo)
assert result == frozenset()


def test_loads_patterns_from_file(temp_repo: Path) -> None:
cgrignore = temp_repo / CGRIGNORE_FILENAME
cgrignore.write_text("vendor\nmy_build\n")

result = load_cgrignore_patterns(temp_repo)

assert "vendor" in result
assert "my_build" in result
assert len(result) == 2


def test_ignores_comments_and_blank_lines(temp_repo: Path) -> None:
cgrignore = temp_repo / CGRIGNORE_FILENAME
cgrignore.write_text("# Comment\n\nvendor\n # Indented comment\n")

result = load_cgrignore_patterns(temp_repo)

assert result == frozenset({"vendor"})


def test_strips_whitespace(temp_repo: Path) -> None:
cgrignore = temp_repo / CGRIGNORE_FILENAME
cgrignore.write_text(" vendor \n\ttemp\t\n")

result = load_cgrignore_patterns(temp_repo)

assert "vendor" in result
assert "temp" in result


def test_returns_empty_on_read_error(
temp_repo: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
cgrignore = temp_repo / CGRIGNORE_FILENAME
cgrignore.write_text("vendor")

original_open = Path.open

def mock_open(self: Path, *args, **kwargs): # noqa: ANN002, ANN003
if self.name == CGRIGNORE_FILENAME:
raise PermissionError("Cannot read")
return original_open(self, *args, **kwargs)

monkeypatch.setattr(Path, "open", mock_open)

result = load_cgrignore_patterns(temp_repo)
assert result == frozenset()


def test_handles_duplicates(temp_repo: Path) -> None:
cgrignore = temp_repo / CGRIGNORE_FILENAME
cgrignore.write_text("vendor\nvendor\ntemp\n")

result = load_cgrignore_patterns(temp_repo)

assert len(result) == 2