From d17d3329f49dd39488aa08bacc5408d10168d026 Mon Sep 17 00:00:00 2001
From: mickael <contact@mickael-caudrelier.fr>
Date: Tue, 12 Aug 2025 18:41:36 +0200
Subject: [PATCH 1/4] feat: add memory optimizations to prevent OOM issues

---
 .pre-commit-config.yaml             |  2 +
 pyproject.toml                      |  1 +
 requirements.txt                    |  1 +
 src/gitingest/config.py             |  4 ++
 src/gitingest/ingestion.py          | 18 ++++++-
 src/gitingest/output_formatter.py   | 29 +++++++++-
 src/gitingest/schemas/filesystem.py | 32 ++++++++++-
 src/gitingest/utils/memory_utils.py | 83 +++++++++++++++++++++++++++++
 src/server/query_processor.py       | 18 +++++--
 9 files changed, 180 insertions(+), 8 deletions(-)
 create mode 100644 src/gitingest/utils/memory_utils.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3fcfb61b..a7f749dd 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -122,6 +122,7 @@ repos:
             loguru>=0.7.0,
             pathspec>=0.12.1,
             prometheus-client,
+            psutil>=5.9.0,
             pydantic,
             pytest-asyncio,
             pytest-mock,
@@ -150,6 +151,7 @@ repos:
             loguru>=0.7.0,
             pathspec>=0.12.1,
             prometheus-client,
+            psutil>=5.9.0,
             pydantic,
             pytest-asyncio,
             pytest-mock,
diff --git a/pyproject.toml b/pyproject.toml
index 36219fe6..abd6c511 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,6 +48,7 @@ server = [
     "boto3>=1.28.0",  # AWS SDK for S3 support
     "fastapi[standard]>=0.109.1",  # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2024-38)
     "prometheus-client",
+    "psutil>=5.9.0",  # Memory monitoring for optimization
     "sentry-sdk[fastapi]",
     "slowapi",
     "uvicorn>=0.11.7",  # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2020-150)
diff --git a/requirements.txt b/requirements.txt
index b803cf7b..2d28af30 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,6 +5,7 @@ httpx
 loguru>=0.7.0
 pathspec>=0.12.1
 prometheus-client
+psutil>=5.9.0  # Memory monitoring for optimization
 pydantic
 python-dotenv
 sentry-sdk[fastapi]
diff --git a/src/gitingest/config.py b/src/gitingest/config.py
index 3d154684..eabd60e0 100644
--- a/src/gitingest/config.py
+++ b/src/gitingest/config.py
@@ -9,6 +9,10 @@
 MAX_TOTAL_SIZE_BYTES = 500 * 1024 * 1024  # Maximum size of output file (500 MB)
 DEFAULT_TIMEOUT = 60  # seconds
 
+# Memory optimization settings
+BATCH_SIZE = 100  # Process files in batches to reduce memory usage
+MEMORY_CHECK_INTERVAL = 50  # Check memory usage every N files
+
 OUTPUT_FILE_NAME = "digest.txt"
 
 TMP_BASE_PATH = Path(tempfile.gettempdir()) / "gitingest"
diff --git a/src/gitingest/ingestion.py b/src/gitingest/ingestion.py
index 01a2c8f3..be3c5e67 100644
--- a/src/gitingest/ingestion.py
+++ b/src/gitingest/ingestion.py
@@ -5,11 +5,12 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES
+from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES, MEMORY_CHECK_INTERVAL
 from gitingest.output_formatter import format_node
 from gitingest.schemas import FileSystemNode, FileSystemNodeType, FileSystemStats
 from gitingest.utils.ingestion_utils import _should_exclude, _should_include
 from gitingest.utils.logging_config import get_logger
+from gitingest.utils.memory_utils import check_memory_pressure, force_garbage_collection, log_memory_stats
 
 if TYPE_CHECKING:
     from gitingest.schemas import IngestionQuery
@@ -51,6 +52,9 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
         },
     )
 
+    # Log initial memory usage
+    log_memory_stats("at ingestion start")
+
     subpath = Path(query.subpath.strip("/")).as_posix()
     path = query.local_path / subpath
 
@@ -117,6 +121,9 @@ def ingest_query(query: IngestionQuery) -> tuple[str, str, str]:
         },
     )
 
+    # Log final memory usage
+    log_memory_stats("at ingestion completion")
+
     return format_node(root_node, query=query)
 
 
@@ -258,6 +265,15 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
     stats.total_files += 1
     stats.total_size += file_size
 
+    # Check memory usage periodically and force GC if needed
+    if stats.total_files % MEMORY_CHECK_INTERVAL == 0 and check_memory_pressure():
+        logger.warning(
+            "Memory pressure detected, forcing garbage collection",
+            extra={"files_processed": stats.total_files},
+        )
+        force_garbage_collection()
+        log_memory_stats(f"after processing {stats.total_files} files")
+
     child = FileSystemNode(
         name=path.name,
         type=FileSystemNodeType.FILE,
diff --git a/src/gitingest/output_formatter.py b/src/gitingest/output_formatter.py
index 5c2b59ae..64b8449c 100644
--- a/src/gitingest/output_formatter.py
+++ b/src/gitingest/output_formatter.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import ssl
+from io import StringIO
 from typing import TYPE_CHECKING
 
 import requests.exceptions
@@ -122,8 +123,32 @@ def _gather_file_contents(node: FileSystemNode) -> str:
     if node.type != FileSystemNodeType.DIRECTORY:
         return node.content_string
 
-    # Recursively gather contents of all files under the current directory
-    return "\n".join(_gather_file_contents(child) for child in node.children)
+    # Use StringIO for memory-efficient string concatenation
+    content_buffer = StringIO()
+    try:
+        _gather_file_contents_recursive(node, content_buffer)
+        return content_buffer.getvalue()
+    finally:
+        content_buffer.close()
+
+
+def _gather_file_contents_recursive(node: FileSystemNode, buffer: StringIO) -> None:
+    """Recursively gather file contents into a StringIO buffer to reduce memory usage.
+
+    Parameters
+    ----------
+    node : FileSystemNode
+        The current directory or file node being processed.
+    buffer : StringIO
+        Buffer to write content to.
+
+    """
+    if node.type != FileSystemNodeType.DIRECTORY:
+        buffer.write(node.content_string)
+        return
+
+    for child in node.children:
+        _gather_file_contents_recursive(child, buffer)
 
 
 def _create_tree_structure(
diff --git a/src/gitingest/schemas/filesystem.py b/src/gitingest/schemas/filesystem.py
index cc66e7b1..4df8212a 100644
--- a/src/gitingest/schemas/filesystem.py
+++ b/src/gitingest/schemas/filesystem.py
@@ -5,6 +5,7 @@
 import os
 from dataclasses import dataclass, field
 from enum import Enum, auto
+from io import StringIO
 from typing import TYPE_CHECKING
 
 from gitingest.utils.compat_func import readlink
@@ -155,7 +156,34 @@ def content(self) -> str:  # pylint: disable=too-many-return-statements
             return "Error: Unable to decode file with available encodings"
 
         try:
-            with self.path.open(encoding=good_enc) as fp:
-                return fp.read()
+            return self._read_file_content_streaming(good_enc)
         except (OSError, UnicodeDecodeError) as exc:
             return f"Error reading file with {good_enc!r}: {exc}"
+
+    def _read_file_content_streaming(self, encoding: str, chunk_size: int = 8192) -> str:
+        """Read file content using streaming to reduce memory usage.
+
+        Parameters
+        ----------
+        encoding : str
+            The encoding to use for reading the file.
+        chunk_size : int
+            Size of chunks to read at a time (default: 8192 bytes).
+
+        Returns
+        -------
+        str
+            The file content.
+
+        """
+        content_buffer = StringIO()
+        try:
+            with self.path.open(encoding=encoding) as fp:
+                while True:
+                    chunk = fp.read(chunk_size)
+                    if not chunk:
+                        break
+                    content_buffer.write(chunk)
+            return content_buffer.getvalue()
+        finally:
+            content_buffer.close()
diff --git a/src/gitingest/utils/memory_utils.py b/src/gitingest/utils/memory_utils.py
new file mode 100644
index 00000000..d32cb61a
--- /dev/null
+++ b/src/gitingest/utils/memory_utils.py
@@ -0,0 +1,83 @@
+"""Memory utility functions for monitoring and optimization."""
+
+from __future__ import annotations
+
+import gc
+from typing import Any
+
+import psutil
+
+from gitingest.utils.logging_config import get_logger
+
+logger = get_logger(__name__)
+
+
+def get_memory_usage() -> dict[str, Any]:
+    """Get current memory usage statistics.
+
+    Returns
+    -------
+    dict[str, Any]
+        Dictionary containing memory usage statistics in MB.
+
+    """
+    try:
+        process = psutil.Process()
+        memory_info = process.memory_info()
+
+        return {
+            "rss_mb": memory_info.rss / (1024 * 1024),  # Resident Set Size
+            "vms_mb": memory_info.vms / (1024 * 1024),  # Virtual Memory Size
+            "percent": process.memory_percent(),
+        }
+    except Exception as exc:
+        logger.warning("Failed to get memory usage", extra={"error": str(exc)})
+        return {"rss_mb": 0, "vms_mb": 0, "percent": 0}
+
+
+def force_garbage_collection() -> None:
+    """Force garbage collection to free up memory."""
+    try:
+        collected = gc.collect()
+        logger.debug("Forced garbage collection", extra={"objects_collected": collected})
+    except Exception as exc:
+        logger.warning("Failed to force garbage collection", extra={"error": str(exc)})
+
+
+def check_memory_pressure(threshold_mb: float = 3000) -> bool:
+    """Check if memory usage is above threshold.
+
+    Parameters
+    ----------
+    threshold_mb : float
+        Memory threshold in MB (default: 3000 MB = 3 GB).
+
+    Returns
+    -------
+    bool
+        True if memory usage is above threshold.
+
+    """
+    memory_stats = get_memory_usage()
+    return memory_stats["rss_mb"] > threshold_mb
+
+
+def log_memory_stats(context: str = "") -> None:
+    """Log current memory statistics.
+
+    Parameters
+    ----------
+    context : str
+        Context information for the log message.
+
+    """
+    memory_stats = get_memory_usage()
+    logger.info(
+        "Memory usage %s",
+        context,
+        extra={
+            "memory_rss_mb": round(memory_stats["rss_mb"], 2),
+            "memory_vms_mb": round(memory_stats["vms_mb"], 2),
+            "memory_percent": round(memory_stats["percent"], 2),
+        },
+    )
diff --git a/src/server/query_processor.py b/src/server/query_processor.py
index f2f2ae90..ad68b519 100644
--- a/src/server/query_processor.py
+++ b/src/server/query_processor.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import shutil
+from io import StringIO
 from pathlib import Path
 from typing import TYPE_CHECKING, cast
 
@@ -302,7 +303,19 @@ async def process_query(
 
     try:
         summary, tree, content = ingest_query(query)
-        digest_content = tree + "\n" + content
+
+        # Clean up repository immediately after ingestion to free memory
+        _cleanup_repository(clone_config)
+
+        # Use StringIO for memory-efficient string concatenation
+        digest_buffer = StringIO()
+        try:
+            digest_buffer.write(tree)
+            digest_buffer.write("\n")
+            digest_buffer.write(content)
+            digest_content = digest_buffer.getvalue()
+        finally:
+            digest_buffer.close()
         _store_digest_content(query, clone_config, digest_content, summary, tree, content)
     except Exception as exc:
         _print_error(query.url, exc, max_file_size, pattern_type, pattern)
@@ -326,8 +339,7 @@ async def process_query(
 
     digest_url = _generate_digest_url(query)
 
-    # Clean up the repository after successful processing
-    _cleanup_repository(clone_config)
+    # Repository was already cleaned up after ingestion to free memory earlier
 
     return IngestSuccessResponse(
         repo_url=input_text,

From 8e4e37c311482665905deb3d5665adaf54883593 Mon Sep 17 00:00:00 2001
From: mickael <contact@mickael-caudrelier.fr>
Date: Tue, 12 Aug 2025 19:01:12 +0200
Subject: [PATCH 2/4] feat: add prometheus memory metrics for ingestion
 monitoring

---
 src/gitingest/utils/memory_utils.py |   6 +-
 src/server/memory_metrics.py        | 144 ++++++++++++++++++++++++++++
 src/server/metrics_server.py        |   3 +
 src/server/query_processor.py       |  57 ++++++-----
 4 files changed, 183 insertions(+), 27 deletions(-)
 create mode 100644 src/server/memory_metrics.py

diff --git a/src/gitingest/utils/memory_utils.py b/src/gitingest/utils/memory_utils.py
index d32cb61a..c52c990b 100644
--- a/src/gitingest/utils/memory_utils.py
+++ b/src/gitingest/utils/memory_utils.py
@@ -72,10 +72,10 @@ def log_memory_stats(context: str = "") -> None:
 
     """
     memory_stats = get_memory_usage()
-    logger.info(
-        "Memory usage %s",
-        context,
+    logger.debug(
+        "Memory usage statistics",
         extra={
+            "context": context,
             "memory_rss_mb": round(memory_stats["rss_mb"], 2),
             "memory_vms_mb": round(memory_stats["vms_mb"], 2),
             "memory_percent": round(memory_stats["percent"], 2),
diff --git a/src/server/memory_metrics.py b/src/server/memory_metrics.py
new file mode 100644
index 00000000..746f3ac7
--- /dev/null
+++ b/src/server/memory_metrics.py
@@ -0,0 +1,144 @@
+"""Memory usage metrics for Prometheus monitoring."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from prometheus_client import Gauge, Histogram
+
+from gitingest.utils.memory_utils import get_memory_usage
+
+if TYPE_CHECKING:
+    import types
+    from typing import Self
+
+# Memory usage gauges
+memory_usage_rss_mb = Gauge(
+    "gitingest_memory_usage_rss_mb",
+    "Resident Set Size memory usage in MB",
+    ["repo_url"],
+)
+
+memory_usage_vms_mb = Gauge(
+    "gitingest_memory_usage_vms_mb",
+    "Virtual Memory Size usage in MB",
+    ["repo_url"],
+)
+
+memory_usage_percent = Gauge(
+    "gitingest_memory_usage_percent",
+    "Memory usage percentage",
+    ["repo_url"],
+)
+
+# Memory usage histogram to track distribution of memory consumption per repository
+memory_consumption_histogram = Histogram(
+    "gitingest_memory_consumption_mb",
+    "Memory consumption distribution per repository in MB",
+    ["repo_url"],
+    buckets=(50, 100, 250, 500, 1000, 2000, 3000, 5000, 10000, float("inf")),
+)
+
+# Peak memory usage gauge
+peak_memory_usage_mb = Gauge(
+    "gitingest_peak_memory_usage_mb",
+    "Peak memory usage during ingestion in MB",
+    ["repo_url"],
+)
+
+
+def record_memory_usage(repo_url: str) -> dict[str, float]:
+    """Record current memory usage metrics for a repository.
+
+    Parameters
+    ----------
+    repo_url : str
+        The repository URL to label the metrics with
+
+    Returns
+    -------
+    dict[str, float]
+        Current memory usage statistics
+
+    """
+    # Truncate URL for label to avoid excessive cardinality
+    repo_label = repo_url[:255]
+
+    # Get current memory stats
+    memory_stats = get_memory_usage()
+
+    # Record current memory usage
+    memory_usage_rss_mb.labels(repo_url=repo_label).set(memory_stats["rss_mb"])
+    memory_usage_vms_mb.labels(repo_url=repo_label).set(memory_stats["vms_mb"])
+    memory_usage_percent.labels(repo_url=repo_label).set(memory_stats["percent"])
+
+    # Record in histogram for distribution analysis
+    memory_consumption_histogram.labels(repo_url=repo_label).observe(memory_stats["rss_mb"])
+
+    return memory_stats
+
+
+def record_peak_memory_usage(repo_url: str, peak_mb: float) -> None:
+    """Record peak memory usage for a repository ingestion.
+
+    Parameters
+    ----------
+    repo_url : str
+        The repository URL to label the metrics with
+    peak_mb : float
+        Peak memory usage in MB
+
+    """
+    repo_label = repo_url[:255]
+    peak_memory_usage_mb.labels(repo_url=repo_label).set(peak_mb)
+
+
+class MemoryTracker:
+    """Context manager to track memory usage during repository ingestion.
+
+    Parameters
+    ----------
+    repo_url : str
+        Repository URL for labeling metrics
+
+    """
+
+    def __init__(self, repo_url: str) -> None:
+        self.repo_url = repo_url
+        self.initial_memory = 0.0
+        self.peak_memory = 0.0
+
+    def __enter__(self) -> Self:
+        """Start memory tracking."""
+        initial_stats = get_memory_usage()
+        self.initial_memory = initial_stats["rss_mb"]
+        self.peak_memory = self.initial_memory
+
+        # Record initial memory usage
+        record_memory_usage(self.repo_url)
+
+        return self
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: types.TracebackType | None,
+    ) -> None:
+        """End memory tracking and record peak usage."""
+        # Record final memory usage
+        final_stats = record_memory_usage(self.repo_url)
+
+        # Update peak if current is higher
+        self.peak_memory = max(self.peak_memory, final_stats["rss_mb"])
+
+        # Record peak memory usage
+        record_peak_memory_usage(self.repo_url, self.peak_memory)
+
+    def update_peak(self) -> None:
+        """Update peak memory if current usage is higher."""
+        current_stats = get_memory_usage()
+        self.peak_memory = max(self.peak_memory, current_stats["rss_mb"])
+
+        # Also record current usage
+        record_memory_usage(self.repo_url)
diff --git a/src/server/metrics_server.py b/src/server/metrics_server.py
index b24424c6..ec51b7c8 100644
--- a/src/server/metrics_server.py
+++ b/src/server/metrics_server.py
@@ -7,6 +7,9 @@
 
 from gitingest.utils.logging_config import get_logger
 
+# Import to ensure memory metrics are registered
+from server import memory_metrics  # noqa: F401 # pylint: disable=unused-import
+
 # Create a logger for this module
 logger = get_logger(__name__)
 
diff --git a/src/server/query_processor.py b/src/server/query_processor.py
index ad68b519..c56ac107 100644
--- a/src/server/query_processor.py
+++ b/src/server/query_processor.py
@@ -13,6 +13,7 @@
 from gitingest.utils.git_utils import resolve_commit, validate_github_token
 from gitingest.utils.logging_config import get_logger
 from gitingest.utils.pattern_utils import process_patterns
+from server.memory_metrics import MemoryTracker
 from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType, S3Metadata
 from server.s3_utils import (
     _build_s3_url,
@@ -292,36 +293,44 @@ async def process_query(
         return s3_response
 
     clone_config = query.extract_clone_config()
-    await clone_repo(clone_config, token=token)
-
     short_repo_url = f"{query.user_name}/{query.repo_name}"
 
-    # The commit hash should always be available at this point
-    if not query.commit:
-        msg = "Unexpected error: no commit hash found"
-        raise RuntimeError(msg)
+    # Track memory usage during the entire ingestion process
+    with MemoryTracker(input_text) as memory_tracker:
+        await clone_repo(clone_config, token=token)
 
-    try:
-        summary, tree, content = ingest_query(query)
+        # Update peak memory after cloning
+        memory_tracker.update_peak()
 
-        # Clean up repository immediately after ingestion to free memory
-        _cleanup_repository(clone_config)
+        # The commit hash should always be available at this point
+        if not query.commit:
+            msg = "Unexpected error: no commit hash found"
+            raise RuntimeError(msg)
 
-        # Use StringIO for memory-efficient string concatenation
-        digest_buffer = StringIO()
         try:
-            digest_buffer.write(tree)
-            digest_buffer.write("\n")
-            digest_buffer.write(content)
-            digest_content = digest_buffer.getvalue()
-        finally:
-            digest_buffer.close()
-        _store_digest_content(query, clone_config, digest_content, summary, tree, content)
-    except Exception as exc:
-        _print_error(query.url, exc, max_file_size, pattern_type, pattern)
-        # Clean up repository even if processing failed
-        _cleanup_repository(clone_config)
-        return IngestErrorResponse(error=f"{exc!s}")
+            summary, tree, content = ingest_query(query)
+
+            # Update peak memory after ingestion (this is likely the highest usage)
+            memory_tracker.update_peak()
+
+            # Clean up repository immediately after ingestion to free memory
+            _cleanup_repository(clone_config)
+
+            # Use StringIO for memory-efficient string concatenation
+            digest_buffer = StringIO()
+            try:
+                digest_buffer.write(tree)
+                digest_buffer.write("\n")
+                digest_buffer.write(content)
+                digest_content = digest_buffer.getvalue()
+            finally:
+                digest_buffer.close()
+            _store_digest_content(query, clone_config, digest_content, summary, tree, content)
+        except Exception as exc:
+            _print_error(query.url, exc, max_file_size, pattern_type, pattern)
+            # Clean up repository even if processing failed
+            _cleanup_repository(clone_config)
+            return IngestErrorResponse(error=f"{exc!s}")
 
     if len(content) > MAX_DISPLAY_SIZE:
         content = (

From d1d7abb3fc3c1b23d255ab38d6adf6077df29520 Mon Sep 17 00:00:00 2001
From: mickael <contact@mickael-caudrelier.fr>
Date: Tue, 12 Aug 2025 19:13:28 +0200
Subject: [PATCH 3/4] fix: ensure memory metrics are recorded for all ingestion
 paths

---
 src/server/query_processor.py | 83 ++++++++++++++++++-----------------
 1 file changed, 42 insertions(+), 41 deletions(-)

diff --git a/src/server/query_processor.py b/src/server/query_processor.py
index c56ac107..5b0d0a95 100644
--- a/src/server/query_processor.py
+++ b/src/server/query_processor.py
@@ -280,23 +280,24 @@ async def process_query(
         include_patterns=pattern if pattern_type == PatternType.INCLUDE else None,
     )
 
-    # Check if digest already exists on S3 before cloning
-    s3_response = await _check_s3_cache(
-        query=query,
-        input_text=input_text,
-        max_file_size=max_file_size,
-        pattern_type=pattern_type.value,
-        pattern=pattern,
-        token=token,
-    )
-    if s3_response:
-        return s3_response
-
-    clone_config = query.extract_clone_config()
-    short_repo_url = f"{query.user_name}/{query.repo_name}"
-
-    # Track memory usage during the entire ingestion process
+    # Track memory usage for the entire request (including S3 cache checks)
     with MemoryTracker(input_text) as memory_tracker:
+        # Check if digest already exists on S3 before cloning
+        s3_response = await _check_s3_cache(
+            query=query,
+            input_text=input_text,
+            max_file_size=max_file_size,
+            pattern_type=pattern_type.value,
+            pattern=pattern,
+            token=token,
+        )
+        if s3_response:
+            # Even for S3 cache hits, record the memory usage
+            memory_tracker.update_peak()
+            return s3_response
+
+        clone_config = query.extract_clone_config()
+        short_repo_url = f"{query.user_name}/{query.repo_name}"
         await clone_repo(clone_config, token=token)
 
         # Update peak memory after cloning
@@ -332,35 +333,35 @@ async def process_query(
             _cleanup_repository(clone_config)
             return IngestErrorResponse(error=f"{exc!s}")
 
-    if len(content) > MAX_DISPLAY_SIZE:
-        content = (
-            f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, "
-            "download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE]
-        )
+        if len(content) > MAX_DISPLAY_SIZE:
+            content = (
+                f"(Files content cropped to {int(MAX_DISPLAY_SIZE / 1_000)}k characters, "
+                "download full ingest to see more)\n" + content[:MAX_DISPLAY_SIZE]
+            )
 
-    _print_success(
-        url=query.url,
-        max_file_size=max_file_size,
-        pattern_type=pattern_type,
-        pattern=pattern,
-        summary=summary,
-    )
+        _print_success(
+            url=query.url,
+            max_file_size=max_file_size,
+            pattern_type=pattern_type,
+            pattern=pattern,
+            summary=summary,
+        )
 
-    digest_url = _generate_digest_url(query)
+        digest_url = _generate_digest_url(query)
 
-    # Repository was already cleaned up after ingestion to free memory earlier
+        # Repository was already cleaned up after ingestion to free memory earlier
 
-    return IngestSuccessResponse(
-        repo_url=input_text,
-        short_repo_url=short_repo_url,
-        summary=summary,
-        digest_url=digest_url,
-        tree=tree,
-        content=content,
-        default_max_file_size=max_file_size,
-        pattern_type=pattern_type,
-        pattern=pattern,
-    )
+        return IngestSuccessResponse(
+            repo_url=input_text,
+            short_repo_url=short_repo_url,
+            summary=summary,
+            digest_url=digest_url,
+            tree=tree,
+            content=content,
+            default_max_file_size=max_file_size,
+            pattern_type=pattern_type,
+            pattern=pattern,
+        )
 
 
 def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None:

From 6cecec0fa75c5a2993115b3988a7ac3fe4ea6a19 Mon Sep 17 00:00:00 2001
From: mickael <contact@mickael-caudrelier.fr>
Date: Thu, 14 Aug 2025 10:40:05 +0200
Subject: [PATCH 4/4] perf: implement enhanced memory management utilities

- Added aggressive garbage collection and memory checks.
- Introduced token counting optimizations with caching and chunking.
- Integrated content caching and progressive cleanup for large repos.
- Adjusted memory thresholds for better handling of OOM risks.
---
 src/gitingest/config.py             |   4 +-
 src/gitingest/ingestion.py          |  20 ++-
 src/gitingest/output_formatter.py   |  87 +++++++------
 src/gitingest/schemas/filesystem.py |  54 +++++++-
 src/gitingest/utils/memory_utils.py |   2 +-
 src/gitingest/utils/token_utils.py  | 183 ++++++++++++++++++++++++++++
 src/server/query_processor.py       |  12 +-
 7 files changed, 305 insertions(+), 57 deletions(-)
 create mode 100644 src/gitingest/utils/token_utils.py

diff --git a/src/gitingest/config.py b/src/gitingest/config.py
index eabd60e0..2049c939 100644
--- a/src/gitingest/config.py
+++ b/src/gitingest/config.py
@@ -11,7 +11,9 @@
 
 # Memory optimization settings
 BATCH_SIZE = 100  # Process files in batches to reduce memory usage
-MEMORY_CHECK_INTERVAL = 50  # Check memory usage every N files
+MEMORY_CHECK_INTERVAL = 25  # Check memory usage every N files (more frequent)
+AGGRESSIVE_GC_INTERVAL = 10  # Force garbage collection every N files for large repos
+MEMORY_PRESSURE_THRESHOLD_MB = 2000  # Trigger aggressive cleanup at 2GB usage
 
 OUTPUT_FILE_NAME = "digest.txt"
 
diff --git a/src/gitingest/ingestion.py b/src/gitingest/ingestion.py
index be3c5e67..32c98be5 100644
--- a/src/gitingest/ingestion.py
+++ b/src/gitingest/ingestion.py
@@ -5,7 +5,13 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-from gitingest.config import MAX_DIRECTORY_DEPTH, MAX_FILES, MAX_TOTAL_SIZE_BYTES, MEMORY_CHECK_INTERVAL
+from gitingest.config import (
+    AGGRESSIVE_GC_INTERVAL,
+    MAX_DIRECTORY_DEPTH,
+    MAX_FILES,
+    MAX_TOTAL_SIZE_BYTES,
+    MEMORY_CHECK_INTERVAL,
+)
 from gitingest.output_formatter import format_node
 from gitingest.schemas import FileSystemNode, FileSystemNodeType, FileSystemStats
 from gitingest.utils.ingestion_utils import _should_exclude, _should_include
@@ -265,14 +271,20 @@ def _process_file(path: Path, parent_node: FileSystemNode, stats: FileSystemStat
     stats.total_files += 1
     stats.total_size += file_size
 
-    # Check memory usage periodically and force GC if needed
+    # More aggressive memory management for large repositories
+    if stats.total_files % AGGRESSIVE_GC_INTERVAL == 0:
+        force_garbage_collection()
+
+    # Check memory usage periodically and force more aggressive GC if needed
     if stats.total_files % MEMORY_CHECK_INTERVAL == 0 and check_memory_pressure():
         logger.warning(
-            "Memory pressure detected, forcing garbage collection",
+            "Memory pressure detected, forcing aggressive garbage collection",
             extra={"files_processed": stats.total_files},
         )
+        # Multiple GC cycles for better cleanup
+        force_garbage_collection()
         force_garbage_collection()
-        log_memory_stats(f"after processing {stats.total_files} files")
+        log_memory_stats(f"after aggressive cleanup at {stats.total_files} files")
 
     child = FileSystemNode(
         name=path.name,
diff --git a/src/gitingest/output_formatter.py b/src/gitingest/output_formatter.py
index 64b8449c..ea956659 100644
--- a/src/gitingest/output_formatter.py
+++ b/src/gitingest/output_formatter.py
@@ -2,16 +2,14 @@
 
 from __future__ import annotations
 
-import ssl
 from io import StringIO
 from typing import TYPE_CHECKING
 
-import requests.exceptions
-import tiktoken
-
 from gitingest.schemas import FileSystemNode, FileSystemNodeType
 from gitingest.utils.compat_func import readlink
 from gitingest.utils.logging_config import get_logger
+from gitingest.utils.memory_utils import force_garbage_collection, log_memory_stats
+from gitingest.utils.token_utils import clear_encoding_cache, count_tokens_optimized, format_token_count
 
 if TYPE_CHECKING:
     from gitingest.schemas import IngestionQuery
@@ -19,11 +17,6 @@
 # Initialize logger for this module
 logger = get_logger(__name__)
 
-_TOKEN_THRESHOLDS: list[tuple[int, str]] = [
-    (1_000_000, "M"),
-    (1_000, "k"),
-]
-
 
 def format_node(node: FileSystemNode, query: IngestionQuery) -> tuple[str, str, str]:
     """Generate a summary, directory structure, and file contents for a given file system node.
@@ -52,13 +45,33 @@ def format_node(node: FileSystemNode, query: IngestionQuery) -> tuple[str, str,
         summary += f"File: {node.name}\n"
         summary += f"Lines: {len(node.content.splitlines()):,}\n"
 
+    # Log memory before tree generation
+    log_memory_stats("before tree structure generation")
+
     tree = "Directory structure:\n" + _create_tree_structure(query, node=node)
 
+    # Log memory before content gathering (this is the memory-intensive part)
+    log_memory_stats("before content gathering")
+
     content = _gather_file_contents(node)
 
-    token_estimate = _format_token_count(tree + content)
-    if token_estimate:
-        summary += f"\nEstimated tokens: {token_estimate}"
+    # Force garbage collection after content gathering
+    force_garbage_collection()
+    log_memory_stats("after content gathering and cleanup")
+
+    # Count tokens with optimization
+    token_count = count_tokens_optimized(tree + content)
+    if token_count > 0:
+        summary += f"\nEstimated tokens: {format_token_count(token_count)}"
+
+    # Final cleanup
+    if hasattr(node, "clear_content_cache_recursive"):
+        node.clear_content_cache_recursive()
+
+    # Clear the tiktoken encoding cache to free memory
+    clear_encoding_cache()
+    force_garbage_collection()
+    log_memory_stats("after final cache and encoding cleanup")
 
     return summary, tree, content
 
@@ -133,7 +146,12 @@ def _gather_file_contents(node: FileSystemNode) -> str:
 
 
 def _gather_file_contents_recursive(node: FileSystemNode, buffer: StringIO) -> None:
-    """Recursively gather file contents into a StringIO buffer to reduce memory usage.
+    """Recursively gather file contents with memory optimization.
+
+    This version includes memory optimizations:
+    - Progressive content cache clearing
+    - Periodic garbage collection
+    - Memory-aware processing
 
     Parameters
     ----------
@@ -144,12 +162,21 @@ def _gather_file_contents_recursive(node: FileSystemNode, buffer: StringIO) -> N
 
     """
     if node.type != FileSystemNodeType.DIRECTORY:
+        # Write content and immediately clear cache to free memory
         buffer.write(node.content_string)
+        node.clear_content_cache()
         return
 
-    for child in node.children:
+    for files_processed, child in enumerate(node.children, 1):
         _gather_file_contents_recursive(child, buffer)
 
+        # Progressive cleanup every 10 files to prevent memory accumulation
+        if files_processed % 10 == 0:
+            force_garbage_collection()
+
+    # Clear content cache for this directory after processing all children
+    node.clear_content_cache()
+
 
 def _create_tree_structure(
     query: IngestionQuery,
@@ -201,35 +228,3 @@ def _create_tree_structure(
         for i, child in enumerate(node.children):
             tree_str += _create_tree_structure(query, node=child, prefix=prefix, is_last=i == len(node.children) - 1)
     return tree_str
-
-
-def _format_token_count(text: str) -> str | None:
-    """Return a human-readable token-count string (e.g. 1.2k, 1.2 M).
-
-    Parameters
-    ----------
-    text : str
-        The text string for which the token count is to be estimated.
-
-    Returns
-    -------
-    str | None
-        The formatted number of tokens as a string (e.g., ``"1.2k"``, ``"1.2M"``), or ``None`` if an error occurs.
-
-    """
-    try:
-        encoding = tiktoken.get_encoding("o200k_base")  # gpt-4o, gpt-4o-mini
-        total_tokens = len(encoding.encode(text, disallowed_special=()))
-    except (ValueError, UnicodeEncodeError) as exc:
-        logger.warning("Failed to estimate token size", extra={"error": str(exc)})
-        return None
-    except (requests.exceptions.RequestException, ssl.SSLError) as exc:
-        # If network errors, skip token count estimation instead of erroring out
-        logger.warning("Failed to download tiktoken model", extra={"error": str(exc)})
-        return None
-
-    for threshold, suffix in _TOKEN_THRESHOLDS:
-        if total_tokens >= threshold:
-            return f"{total_tokens / threshold:.1f}{suffix}"
-
-    return str(total_tokens)
diff --git a/src/gitingest/schemas/filesystem.py b/src/gitingest/schemas/filesystem.py
index 4df8212a..944e374f 100644
--- a/src/gitingest/schemas/filesystem.py
+++ b/src/gitingest/schemas/filesystem.py
@@ -50,6 +50,7 @@ class FileSystemNode:  # pylint: disable=too-many-instance-attributes
     dir_count: int = 0
     depth: int = 0
     children: list[FileSystemNode] = field(default_factory=list)
+    _content_cache: str | None = field(default=None, init=False)
 
     def sort_children(self) -> None:
         """Sort the children nodes of a directory according to a specific order.
@@ -106,10 +107,9 @@ def content_string(self) -> str:
 
     @property
     def content(self) -> str:  # pylint: disable=too-many-return-statements
-        """Return file content (if text / notebook) or an explanatory placeholder.
+        """Return file content with caching for memory optimization.
 
-        Heuristically decides whether the file is text or binary by decoding a small chunk of the file
-        with multiple encodings and checking for common binary markers.
+        Uses lazy loading and caching to reduce memory usage for large repositories.
 
         Returns
         -------
@@ -129,14 +129,50 @@ def content(self) -> str:  # pylint: disable=too-many-return-statements
         if self.type == FileSystemNodeType.SYMLINK:
             return ""  # TODO: are we including the empty content of symlinks?
 
-        if self.path.suffix == ".ipynb":  # Notebook
+        # Return cached content if available
+        if self._content_cache is not None:
+            return self._content_cache
+
+        # Load and cache content
+        self._content_cache = self._load_content()
+        return self._content_cache
+
+    def _load_content(self) -> str:
+        """Load file content from disk.
+
+        Returns
+        -------
+        str
+            The file content
+
+        """
+        # Handle notebooks separately
+        if self.path.suffix == ".ipynb":
             try:
                 return process_notebook(self.path)
             except Exception as exc:
                 return f"Error processing notebook: {exc}"
 
+        # Read file chunk for analysis
         chunk = _read_chunk(self.path)
 
+        # Determine the appropriate content based on chunk analysis
+        return self._analyze_chunk_and_read(chunk)
+
+    def _analyze_chunk_and_read(self, chunk: bytes | None) -> str:
+        """Analyze file chunk and return appropriate content.
+
+        Parameters
+        ----------
+        chunk : bytes | None
+            The file chunk to analyze
+
+        Returns
+        -------
+        str
+            The file content or error message
+
+        """
         if chunk is None:
             return "Error reading file"
 
@@ -187,3 +223,13 @@ def _read_file_content_streaming(self, encoding: str, chunk_size: int = 8192) ->
             return content_buffer.getvalue()
         finally:
             content_buffer.close()
+
+    def clear_content_cache(self) -> None:
+        """Clear cached content to free memory."""
+        self._content_cache = None
+
+    def clear_content_cache_recursive(self) -> None:
+        """Recursively clear content cache for this node and all children."""
+        self.clear_content_cache()
+        for child in self.children:
+            child.clear_content_cache_recursive()
diff --git a/src/gitingest/utils/memory_utils.py b/src/gitingest/utils/memory_utils.py
index c52c990b..45f768bf 100644
--- a/src/gitingest/utils/memory_utils.py
+++ b/src/gitingest/utils/memory_utils.py
@@ -44,7 +44,7 @@ def force_garbage_collection() -> None:
         logger.warning("Failed to force garbage collection", extra={"error": str(exc)})
 
 
-def check_memory_pressure(threshold_mb: float = 3000) -> bool:
+def check_memory_pressure(threshold_mb: float = 2000) -> bool:
     """Check if memory usage is above threshold.
 
     Parameters
diff --git a/src/gitingest/utils/token_utils.py b/src/gitingest/utils/token_utils.py
new file mode 100644
index 00000000..b3307010
--- /dev/null
+++ b/src/gitingest/utils/token_utils.py
@@ -0,0 +1,183 @@
+"""Optimized token counting utilities with memory management."""
+
+from __future__ import annotations
+
+import gc
+import os
+from typing import Protocol, Self
+
+from gitingest.utils.logging_config import get_logger
+
+# Try to import tiktoken, but don't fail if it's not available
+try:
+    import tiktoken
+
+    TIKTOKEN_AVAILABLE = True
+except ImportError:
+    tiktoken = None  # type: ignore[assignment]
+    TIKTOKEN_AVAILABLE = False
+
+logger = get_logger(__name__)
+
+# Environment variable to disable token counting for memory-sensitive environments
+DISABLE_TOKEN_COUNTING = os.getenv("GITINGEST_DISABLE_TOKEN_COUNTING", "false").lower() == "true"
+
+# Constants for token formatting
+TOKEN_MILLION = 1_000_000
+TOKEN_THOUSAND = 1_000
+
+
+class Encoding(Protocol):
+    """Protocol for tiktoken encoding objects."""
+
+    def encode(self, text: str, *, disallowed_special: tuple[str, ...] = ()) -> list[int]:
+        """Encode text to tokens."""
+
+
+class TokenEncoder:
+    """Singleton class to manage token encoding with lazy loading."""
+
+    _instance: TokenEncoder | None = None
+    _encoding: Encoding | None = None
+
+    def __new__(cls) -> Self:
+        """Create or return the singleton instance."""
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+
+    def get_encoding(self) -> Encoding | None:
+        """Get or create a cached encoding instance.
+
+        Returns
+        -------
+        Encoding | None
+            The cached encoding instance, or None if token counting is disabled
+
+        """
+        if DISABLE_TOKEN_COUNTING:
+            return None
+
+        if self._encoding is None:
+            self._encoding = self._load_encoding()
+        return self._encoding
+
+    def _load_encoding(self) -> Encoding | None:
+        """Load the tiktoken encoding.
+
+        Returns
+        -------
+        Encoding | None
+            The encoding instance or None if tiktoken is not available
+
+        """
+        if not TIKTOKEN_AVAILABLE:
+            logger.warning("tiktoken not available, token counting disabled")
+            return None
+
+        try:
+            return tiktoken.get_encoding("o200k_base")  # type: ignore[union-attr]
+        except Exception as exc:
+            logger.warning("Failed to load tiktoken encoding", extra={"error": str(exc)})
+            return None
+
+    def clear_cache(self) -> None:
+        """Clear the encoding cache to free memory."""
+        if self._encoding is not None:
+            self._encoding = None
+            gc.collect()
+
+
+# Create the singleton instance
+_token_encoder = TokenEncoder()
+
+
+def get_cached_encoding() -> Encoding | None:
+    """Get or create a cached encoding instance.
+
+    Returns
+    -------
+    Encoding | None
+        The cached encoding instance, or None if token counting is disabled
+
+    """
+    return _token_encoder.get_encoding()
+
+
+def clear_encoding_cache() -> None:
+    """Clear the global encoding cache to free memory."""
+    _token_encoder.clear_cache()
+
+
+def count_tokens_optimized(text: str, chunk_size: int = 100000) -> int:
+    """Count tokens with memory optimization.
+
+    Parameters
+    ----------
+    text : str
+        Text to count tokens for
+    chunk_size : int
+        Size of chunks to process at a time (default: 100k chars)
+
+    Returns
+    -------
+    int
+        Total token count, or estimated count if token counting is disabled
+
+    """
+    # If token counting is disabled, return a rough estimate
+    if DISABLE_TOKEN_COUNTING:
+        # Rough estimate: ~1.3 tokens per character for code
+        return int(len(text) * 1.3)
+
+    try:
+        encoding = get_cached_encoding()
+        if encoding is None:
+            # Fallback to estimation
+            return int(len(text) * 1.3)
+
+        # Process in chunks to avoid memory spike for large texts
+        total_tokens = 0
+
+        if len(text) > chunk_size:
+            # Process large texts in chunks
+            for i in range(0, len(text), chunk_size):
+                chunk = text[i : i + chunk_size]
+                tokens = encoding.encode(chunk, disallowed_special=())
+                total_tokens += len(tokens)
+                # Clear the tokens list immediately to free memory
+                del tokens
+        else:
+            tokens = encoding.encode(text, disallowed_special=())
+            total_tokens = len(tokens)
+            del tokens
+
+    except (ValueError, UnicodeEncodeError) as exc:
+        logger.warning("Failed to count tokens", extra={"error": str(exc)})
+        return int(len(text) * 1.3)  # Fallback to estimation
+    except Exception as exc:
+        logger.warning("Unexpected error counting tokens", extra={"error": str(exc)})
+        return int(len(text) * 1.3)  # Fallback to estimation
+
+    return total_tokens
+
+
+def format_token_count(count: int) -> str:
+    """Format token count as human-readable string.
+
+    Parameters
+    ----------
+    count : int
+        Token count
+
+    Returns
+    -------
+    str
+        Formatted string (e.g., "1.2k", "1.2M")
+
+    """
+    if count >= TOKEN_MILLION:
+        return f"{count / TOKEN_MILLION:.1f}M"
+    if count >= TOKEN_THOUSAND:
+        return f"{count / TOKEN_THOUSAND:.1f}k"
+    return str(count)
diff --git a/src/server/query_processor.py b/src/server/query_processor.py
index 5b0d0a95..76b42995 100644
--- a/src/server/query_processor.py
+++ b/src/server/query_processor.py
@@ -12,6 +12,7 @@
 from gitingest.query_parser import parse_remote_repo
 from gitingest.utils.git_utils import resolve_commit, validate_github_token
 from gitingest.utils.logging_config import get_logger
+from gitingest.utils.memory_utils import force_garbage_collection
 from gitingest.utils.pattern_utils import process_patterns
 from server.memory_metrics import MemoryTracker
 from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType, S3Metadata
@@ -351,7 +352,8 @@ async def process_query(
 
         # Repository was already cleaned up after ingestion to free memory earlier
 
-        return IngestSuccessResponse(
+        # Create response
+        response = IngestSuccessResponse(
             repo_url=input_text,
             short_repo_url=short_repo_url,
             summary=summary,
@@ -363,6 +365,14 @@ async def process_query(
             pattern=pattern,
         )
 
+        # Aggressive cleanup of large strings to free memory
+        del tree
+        del content
+        del summary
+        force_garbage_collection()
+
+        return response
+
 
 def _print_query(url: str, max_file_size: int, pattern_type: str, pattern: str) -> None:
     """Print a formatted summary of the query details for debugging.