diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4fe177f..95f42c2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,7 +25,7 @@ repos: hooks: - id: mypy name: mypy - entry: uv run mypy src/ + entry: uv run mypy src/ tests/ language: system types: [python] pass_filenames: false diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..1d8877b --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,45 @@ +This is built on top of [CocoIndex v1](https://cocoindex.io/docs-v1/llms.txt). + + +## Build and Test Commands + +This project uses [uv](https://docs.astral.sh/uv/) for project management. + +```bash +uv run mypy . # Type check Python code +uv run pytest tests/ # Run Python tests +``` + +## Code Conventions + +### Internal vs External Modules + +We distinguish between **internal modules** (under packages with `_` prefix, e.g. `_internal.*` or `connectors.*._source`) and **external modules** (which users can directly import). + +**External modules** (user-facing, e.g. `cocoindex/ops/sentence_transformers.py`): + +* Be strict about not leaking implementation details +* Use `__all__` to explicitly list public exports +* Prefix ALL non-public symbols with `_`, including: + * Standard library imports: `import threading as _threading`, `import typing as _typing` + * Third-party imports: `import numpy as _np`, `from numpy.typing import NDArray as _NDArray` + * Internal package imports: `from cocoindex.resources import schema as _schema` +* Exception: `TYPE_CHECKING` imports for type hints don't need prefixing + +**Internal modules** (e.g. `cocoindex/_internal/component_ctx.py`): + +* Less strict since users shouldn't import these directly +* Standard library and internal imports don't need underscore prefix +* Only prefix symbols that are truly private to the module itself (e.g. `_context_var` for a module-private ContextVar) + +### Type Annotations + +Avoid `Any` whenever feasible. Use specific types — including concrete types from third-party libraries. Only use `Any` when the type is truly generic and no downstream code needs to downcast it. + +### Multi-Value Returns + +For functions returning multiple values, use `NamedTuple` instead of plain tuples. At call sites, access fields by name (`result.can_reuse`) rather than positional unpacking — this prevents misreading fields in the wrong order. + +### Testing Guidelines + +We prefer end-to-end tests on user-facing APIs, over unit tests on smaller internal functions. With this said, there're cases where unit tests are necessary, e.g. for internal logic with various situations and edge cases, in which case it's usually easier to cover various scenarios with unit tests. diff --git a/README.md b/README.md index 70a2f81..775f875 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,6 @@ Use the cocoindex-code MCP server for semantic code search when: |----------|-------------|---------| | `COCOINDEX_CODE_ROOT_PATH` | Root path of the codebase | Auto-discovered (see below) | | `COCOINDEX_CODE_EMBEDDING_MODEL` | Embedding model (see below) | `sbert/sentence-transformers/all-MiniLM-L6-v2` | -| `COCOINDEX_CODE_BATCH_SIZE` | Max batch size for local embedding model | `16` | | `COCOINDEX_CODE_EXTRA_EXTENSIONS` | Additional file extensions to index (comma-separated, e.g. `"inc:php,yaml,toml"` — use `ext:lang` to override language detection) | _(none)_ | | `COCOINDEX_CODE_EXCLUDED_PATTERNS` | Additional glob patterns to exclude from indexing as a JSON array (e.g. `'["**/migration.sql", "{**/*.md,**/*.txt}"]'`) | _(none)_ | @@ -281,7 +280,6 @@ claude mcp add cocoindex-code \ ```bash claude mcp add cocoindex-code \ -e COCOINDEX_CODE_EMBEDDING_MODEL=sbert/nomic-ai/CodeRankEmbed \ - -e COCOINDEX_CODE_BATCH_SIZE=16 \ -- cocoindex-code ``` diff --git a/pyproject.toml b/pyproject.toml index 150bcbb..e13ecf4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,12 +23,15 @@ classifiers = [ dependencies = [ "mcp>=1.0.0", - "cocoindex[litellm]==1.0.0a29", + "cocoindex[litellm]==1.0.0a31", "sentence-transformers>=2.2.0", "sqlite-vec>=0.1.0", "pydantic>=2.0.0", "numpy>=1.24.0", "einops>=0.8.2", + "typer>=0.9.0", + "msgspec>=0.19.0", + "pyyaml>=6.0", ] [project.optional-dependencies] @@ -43,6 +46,7 @@ dev = [ [project.scripts] cocoindex-code = "cocoindex_code:main" +ccc = "cocoindex_code.cli:app" [project.urls] Homepage = "https://github.com/cocoindex-io/cocoindex-code" @@ -66,6 +70,7 @@ dev = [ "ruff>=0.1.0", "mypy>=1.0.0", "prek>=0.1.0", + "types-pyyaml>=6.0.12.20250915", ] [tool.uv] diff --git a/src/cocoindex_code/__init__.py b/src/cocoindex_code/__init__.py index 51297f8..18c0f14 100644 --- a/src/cocoindex_code/__init__.py +++ b/src/cocoindex_code/__init__.py @@ -4,8 +4,7 @@ logging.basicConfig(level=logging.WARNING) -from .config import Config # noqa: E402 -from .server import main, mcp # noqa: E402 +from ._version import __version__ # noqa: E402 +from .server import main # noqa: E402 -__version__ = "0.1.0" -__all__ = ["Config", "main", "mcp"] +__all__ = ["main", "__version__"] diff --git a/src/cocoindex_code/_version.py b/src/cocoindex_code/_version.py new file mode 100644 index 0000000..7fa6f60 --- /dev/null +++ b/src/cocoindex_code/_version.py @@ -0,0 +1,3 @@ +# This file will be rewritten by the release workflow. +# DO NOT ADD ANYTHING ELSE TO THIS FILE. +__version__ = "999.0.0" diff --git a/src/cocoindex_code/cli.py b/src/cocoindex_code/cli.py new file mode 100644 index 0000000..3cb973e --- /dev/null +++ b/src/cocoindex_code/cli.py @@ -0,0 +1,312 @@ +"""CLI entry point for cocoindex-code (ccc command).""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +import typer as _typer + +if TYPE_CHECKING: + from .client import DaemonClient + +from .protocol import ProjectStatusResponse, SearchResponse +from .settings import ( + default_project_settings, + default_user_settings, + find_parent_with_marker, + find_project_root, + save_project_settings, + save_user_settings, + user_settings_path, +) + +app = _typer.Typer( + name="ccc", + help="CocoIndex Code — index and search codebases.", + no_args_is_help=True, +) + +daemon_app = _typer.Typer(name="daemon", help="Manage the daemon process.") +app.add_typer(daemon_app, name="daemon") + + +# --------------------------------------------------------------------------- +# Shared CLI helpers (G1) +# --------------------------------------------------------------------------- + + +def require_project_root() -> Path: + """Find the project root by walking up from CWD. + + Exits with code 1 if not found. + """ + root = find_project_root(Path.cwd()) + if root is None: + _typer.echo( + "Error: Not in an initialized project directory.\n" + "Run `ccc init` in your project root to get started.", + err=True, + ) + raise _typer.Exit(code=1) + return root + + +def require_daemon_for_project() -> tuple[DaemonClient, str]: + """Resolve project root, then connect to daemon (auto-starting if needed). + + Returns ``(client, project_root_str)``. Exits on failure. + """ + from .client import ensure_daemon + + project_root = require_project_root() + try: + client = ensure_daemon() + except Exception as e: + _typer.echo(f"Error: Failed to connect to daemon: {e}", err=True) + raise _typer.Exit(code=1) + return client, str(project_root) + + +def resolve_default_path(project_root: Path) -> str | None: + """Compute default ``--path`` filter from CWD relative to project root.""" + cwd = Path.cwd() + try: + rel = cwd.relative_to(project_root) + except ValueError: + return None + if rel == Path("."): + return None + return f"{rel.as_posix()}/*" + + +def print_index_stats(status: ProjectStatusResponse) -> None: + """Print formatted index statistics.""" + _typer.echo("\nIndex stats:") + _typer.echo(f" Chunks: {status.total_chunks}") + _typer.echo(f" Files: {status.total_files}") + if status.languages: + _typer.echo(" Languages:") + for lang, count in sorted(status.languages.items(), key=lambda x: -x[1]): + _typer.echo(f" {lang}: {count} chunks") + + +def print_search_results(response: SearchResponse) -> None: + """Print formatted search results.""" + if not response.success: + _typer.echo(f"Search failed: {response.message}", err=True) + return + + if not response.results: + _typer.echo("No results found.") + return + + for i, r in enumerate(response.results, 1): + _typer.echo(f"\n--- Result {i} (score: {r.score:.3f}) ---") + _typer.echo(f"File: {r.file_path}:{r.start_line}-{r.end_line} [{r.language}]") + _typer.echo(r.content) + + +# --------------------------------------------------------------------------- +# Commands (G2-G5) +# --------------------------------------------------------------------------- + + +@app.command() +def init( + force: bool = _typer.Option(False, "-f", "--force", help="Skip parent directory warning"), +) -> None: + """Initialize a project for cocoindex-code.""" + from .settings import project_settings_path + + cwd = Path.cwd() + settings_file = project_settings_path(cwd) + + # Check if already initialized + if settings_file.is_file(): + _typer.echo("Project already initialized.") + return + + # Check parent directories for markers + if not force: + parent = find_parent_with_marker(cwd) + if parent is not None and parent != cwd: + _typer.echo( + f"Warning: A parent directory has a project marker: {parent}\n" + "You might want to run `ccc init` there instead.\n" + "Use `ccc init -f` to initialize here anyway." + ) + raise _typer.Exit(code=1) + + # Create user settings if missing + user_path = user_settings_path() + if not user_path.is_file(): + save_user_settings(default_user_settings()) + _typer.echo(f"Created user settings: {user_path}") + + # Create project settings + save_project_settings(cwd, default_project_settings()) + _typer.echo(f"Created project settings: {settings_file}") + _typer.echo("Project initialized. Run `ccc index` to build the index.") + + +@app.command() +def index() -> None: + """Create/update index for the codebase.""" + client, project_root = require_daemon_for_project() + _typer.echo("Indexing...") + try: + resp = client.index(project_root) + except RuntimeError as e: + _typer.echo(f"Indexing failed: {e}", err=True) + raise _typer.Exit(code=1) + if not resp.success: + _typer.echo(f"Indexing failed: {resp.message}", err=True) + raise _typer.Exit(code=1) + + status = client.project_status(project_root) + print_index_stats(status) + + +@app.command() +def search( + query: list[str] = _typer.Argument(..., help="Search query"), + lang: list[str] = _typer.Option([], "--lang", help="Filter by language"), + path: str | None = _typer.Option(None, "--path", help="Filter by file path glob"), + offset: int = _typer.Option(0, "--offset", help="Number of results to skip"), + limit: int = _typer.Option(10, "--limit", help="Maximum results to return"), + refresh: bool = _typer.Option(False, "--refresh", help="Refresh index before searching"), +) -> None: + """Semantic search across the codebase.""" + client, project_root = require_daemon_for_project() + query_str = " ".join(query) + + # Default path filter from CWD + paths: list[str] | None = None + if path is not None: + paths = [path] + else: + default = resolve_default_path(Path(project_root)) + if default is not None: + paths = [default] + + resp = client.search( + project_root=project_root, + query=query_str, + languages=lang or None, + paths=paths, + limit=limit, + offset=offset, + refresh=refresh, + ) + print_search_results(resp) + + +@app.command() +def status() -> None: + """Show project status.""" + client, project_root = require_daemon_for_project() + resp = client.project_status(project_root) + print_index_stats(resp) + + +@app.command() +def mcp() -> None: + """Run as MCP server (stdio mode).""" + import asyncio + + client, project_root = require_daemon_for_project() + + async def _run_mcp() -> None: + from .server import create_mcp_server + + mcp_server = create_mcp_server(client, project_root) + # Trigger initial indexing in background + asyncio.create_task(_bg_index(client, project_root)) + await mcp_server.run_stdio_async() + + asyncio.run(_run_mcp()) + + +async def _bg_index(client, project_root: str) -> None: # type: ignore[no-untyped-def] + """Index in background, swallowing errors.""" + import asyncio + + loop = asyncio.get_event_loop() + try: + await loop.run_in_executor(None, client.index, project_root) + except Exception: + pass + + +# --- Daemon subcommands (G5) --- + + +@daemon_app.command("status") +def daemon_status() -> None: + """Show daemon status.""" + from .client import ensure_daemon + + try: + client = ensure_daemon() + except Exception as e: + _typer.echo(f"Error: {e}", err=True) + raise _typer.Exit(code=1) + + resp = client.daemon_status() + _typer.echo(f"Daemon version: {resp.version}") + _typer.echo(f"Uptime: {resp.uptime_seconds:.1f}s") + if resp.projects: + _typer.echo("Projects:") + for p in resp.projects: + state = "indexing" if p.indexing else "idle" + _typer.echo(f" {p.project_root} [{state}]") + else: + _typer.echo("No projects loaded.") + client.close() + + +@daemon_app.command("restart") +def daemon_restart() -> None: + """Restart the daemon.""" + from .client import _wait_for_daemon, start_daemon, stop_daemon + + _typer.echo("Stopping daemon...") + stop_daemon() + + _typer.echo("Starting daemon...") + start_daemon() + try: + _wait_for_daemon() + _typer.echo("Daemon restarted.") + except TimeoutError: + _typer.echo("Error: Daemon did not start in time.", err=True) + raise _typer.Exit(code=1) + + +@daemon_app.command("stop") +def daemon_stop() -> None: + """Stop the daemon.""" + from .client import DaemonClient + + try: + client = DaemonClient.connect() + client.handshake() + client.stop() + client.close() + _typer.echo("Daemon stopped.") + except (ConnectionRefusedError, OSError): + _typer.echo("Daemon is not running.") + + +@app.command("run-daemon", hidden=True) +def run_daemon_cmd() -> None: + """Internal: run the daemon process.""" + from .daemon import run_daemon + + run_daemon() + + +# Allow running as module: python -m cocoindex_code.cli +if __name__ == "__main__": + app() diff --git a/src/cocoindex_code/client.py b/src/cocoindex_code/client.py new file mode 100644 index 0000000..f62b05c --- /dev/null +++ b/src/cocoindex_code/client.py @@ -0,0 +1,243 @@ +"""Client for communicating with the daemon.""" + +from __future__ import annotations + +import logging +import os +import signal +import subprocess +import sys +import time +from multiprocessing.connection import Client, Connection +from pathlib import Path + +from ._version import __version__ +from .daemon import _connection_family, daemon_pid_path, daemon_socket_path +from .protocol import ( + DaemonStatusResponse, + ErrorResponse, + HandshakeRequest, + HandshakeResponse, + IndexRequest, + IndexResponse, + ProjectStatusRequest, + ProjectStatusResponse, + Request, + Response, + SearchRequest, + SearchResponse, + StopRequest, + StopResponse, + decode_response, + encode_request, +) + +logger = logging.getLogger(__name__) + + +class DaemonClient: + """Client for communicating with the daemon.""" + + _conn: Connection + + def __init__(self, conn: Connection) -> None: + self._conn = conn + + @classmethod + def connect(cls) -> DaemonClient: + """Connect to daemon. Raises ConnectionRefusedError if not running.""" + sock = daemon_socket_path() + if not os.path.exists(sock): + raise ConnectionRefusedError(f"Daemon socket not found: {sock}") + try: + conn = Client(sock, family=_connection_family()) + except (ConnectionRefusedError, FileNotFoundError, OSError) as e: + raise ConnectionRefusedError(f"Cannot connect to daemon: {e}") from e + return cls(conn) + + def handshake(self) -> HandshakeResponse: + """Send version handshake.""" + return self._send(HandshakeRequest(version=__version__)) # type: ignore[return-value] + + def index(self, project_root: str) -> IndexResponse: + """Request indexing. Blocks until complete.""" + return self._send(IndexRequest(project_root=project_root)) # type: ignore[return-value] + + def search( + self, + project_root: str, + query: str, + languages: list[str] | None = None, + paths: list[str] | None = None, + limit: int = 5, + offset: int = 0, + refresh: bool = False, + ) -> SearchResponse: + """Search the codebase.""" + return self._send( # type: ignore[return-value] + SearchRequest( + project_root=project_root, + query=query, + languages=languages, + paths=paths, + limit=limit, + offset=offset, + refresh=refresh, + ) + ) + + def project_status(self, project_root: str) -> ProjectStatusResponse: + return self._send( # type: ignore[return-value] + ProjectStatusRequest(project_root=project_root) + ) + + def daemon_status(self) -> DaemonStatusResponse: + from .protocol import DaemonStatusRequest + + return self._send(DaemonStatusRequest()) # type: ignore[return-value] + + def stop(self) -> StopResponse: + return self._send(StopRequest()) # type: ignore[return-value] + + def close(self) -> None: + try: + self._conn.close() + except Exception: + pass + + def _send(self, req: Request) -> Response: + self._conn.send_bytes(encode_request(req)) + data = self._conn.recv_bytes() + resp = decode_response(data) + if isinstance(resp, ErrorResponse): + raise RuntimeError(f"Daemon error: {resp.message}") + return resp + + +# --------------------------------------------------------------------------- +# Daemon lifecycle helpers +# --------------------------------------------------------------------------- + + +def is_daemon_running() -> bool: + """Check if the daemon is running.""" + return os.path.exists(daemon_socket_path()) + + +def start_daemon() -> None: + """Start the daemon as a background process.""" + from .daemon import daemon_dir + + daemon_dir().mkdir(parents=True, exist_ok=True) + log_path = daemon_dir() / "daemon.log" + + # Use the ccc entry point if available, otherwise fall back to python -m + ccc_path = _find_ccc_executable() + if ccc_path: + cmd = [ccc_path, "run-daemon"] + else: + cmd = [sys.executable, "-m", "cocoindex_code.cli", "run-daemon"] + + log_fd = open(log_path, "a") + subprocess.Popen( + cmd, + start_new_session=True, + stdout=log_fd, + stderr=log_fd, + stdin=subprocess.DEVNULL, + ) + log_fd.close() + + +def _find_ccc_executable() -> str | None: + """Find the ccc executable in PATH or the same directory as python.""" + python_dir = Path(sys.executable).parent + # On Windows the script is ccc.exe; on Unix it's just ccc + names = ["ccc.exe", "ccc"] if sys.platform == "win32" else ["ccc"] + for name in names: + ccc = python_dir / name + if ccc.exists(): + return str(ccc) + return None + + +def stop_daemon() -> None: + """Stop the daemon gracefully.""" + try: + client = DaemonClient.connect() + client.handshake() + client.stop() + client.close() + except (ConnectionRefusedError, OSError): + pass + + # If daemon doesn't respond, try SIGTERM via PID + pid_path = daemon_pid_path() + if pid_path.exists(): + try: + pid = int(pid_path.read_text().strip()) + if pid != os.getpid(): # Never kill ourselves (happens when daemon runs in a thread) + os.kill(pid, signal.SIGTERM) + except (ValueError, ProcessLookupError, PermissionError): + pass + + # Clean up stale files (named pipes on Windows clean up automatically) + if sys.platform != "win32": + sock = daemon_socket_path() + try: + Path(sock).unlink(missing_ok=True) + except Exception: + pass + try: + pid_path.unlink(missing_ok=True) + except Exception: + pass + + +def _wait_for_daemon(timeout: float = 5.0) -> None: + """Wait for the daemon socket/pipe to become available.""" + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + if os.path.exists(daemon_socket_path()): + return + time.sleep(0.1) + raise TimeoutError("Daemon did not start in time") + + +def ensure_daemon() -> DaemonClient: + """Connect to daemon, starting or restarting as needed. + + 1. Try to connect to existing daemon. + 2. If connection refused: start daemon, retry connect with backoff. + 3. If connected but version mismatch: stop old daemon, start new one. + """ + # Try connecting to existing daemon + try: + client = DaemonClient.connect() + resp = client.handshake() + if resp.ok: + return client + # Version mismatch — restart + client.close() + stop_daemon() + except (ConnectionRefusedError, OSError): + pass + + # Start daemon + start_daemon() + _wait_for_daemon() + + # Connect with retries + for attempt in range(10): + try: + client = DaemonClient.connect() + resp = client.handshake() + if resp.ok: + return client + raise RuntimeError( + f"Daemon version mismatch: expected {__version__}, got {resp.daemon_version}" + ) + except (ConnectionRefusedError, OSError): + time.sleep(0.5) + + raise RuntimeError("Failed to connect to daemon after starting it") diff --git a/src/cocoindex_code/daemon.py b/src/cocoindex_code/daemon.py new file mode 100644 index 0000000..6660452 --- /dev/null +++ b/src/cocoindex_code/daemon.py @@ -0,0 +1,424 @@ +"""Daemon process: listener loop, project registry, request dispatch.""" + +from __future__ import annotations + +import asyncio +import logging +import os +import signal +import sys +import threading +import time +from multiprocessing.connection import Connection, Listener +from pathlib import Path +from typing import Any + +from ._version import __version__ +from .project import Project +from .protocol import ( + DaemonProjectInfo, + DaemonStatusRequest, + DaemonStatusResponse, + ErrorResponse, + HandshakeRequest, + HandshakeResponse, + IndexRequest, + IndexResponse, + ProjectStatusRequest, + ProjectStatusResponse, + Request, + Response, + SearchRequest, + SearchResponse, + SearchResult, + StopRequest, + StopResponse, + decode_request, + encode_response, +) +from .query import query_codebase +from .settings import ( + load_project_settings, + load_user_settings, + user_settings_dir, +) +from .shared import SQLITE_DB, Embedder, create_embedder + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Daemon paths +# --------------------------------------------------------------------------- + + +def daemon_dir() -> Path: + """Return the daemon directory (``~/.cocoindex_code/``).""" + return user_settings_dir() + + +def _connection_family() -> str: + """Return the multiprocessing connection family for this platform.""" + return "AF_PIPE" if sys.platform == "win32" else "AF_UNIX" + + +def daemon_socket_path() -> str: + """Return the daemon socket/pipe address.""" + if sys.platform == "win32": + import hashlib + + # Hash the daemon dir so COCOINDEX_CODE_DIR overrides create unique pipe names, + # preventing conflicts between different daemon instances (tests, users, etc.) + dir_hash = hashlib.md5(str(daemon_dir()).encode()).hexdigest()[:12] + return rf"\\.\pipe\cocoindex_code_{dir_hash}" + return str(daemon_dir() / "daemon.sock") + + +def daemon_pid_path() -> Path: + """Return the path for the daemon's PID file.""" + return daemon_dir() / "daemon.pid" + + +def daemon_log_path() -> Path: + """Return the path for the daemon's log file.""" + return daemon_dir() / "daemon.log" + + +# --------------------------------------------------------------------------- +# Project Registry +# --------------------------------------------------------------------------- + + +class ProjectRegistry: + """Manages loaded projects and their indexes.""" + + _projects: dict[str, Project] + _index_locks: dict[str, asyncio.Lock] + _indexing: dict[str, bool] + _embedder: Embedder + + def __init__(self, embedder: Embedder) -> None: + self._projects = {} + self._index_locks = {} + self._indexing = {} + self._embedder = embedder + + async def get_project(self, project_root: str) -> Project: + """Get or create a Project for the given root. Lazy initialization.""" + if project_root not in self._projects: + root = Path(project_root) + project_settings = load_project_settings(root) + project = await Project.create(root, project_settings, self._embedder) + self._projects[project_root] = project + self._index_locks[project_root] = asyncio.Lock() + self._indexing[project_root] = False + return self._projects[project_root] + + async def update_index(self, project_root: str) -> None: + """Update index for project, serialized by per-project lock.""" + project = await self.get_project(project_root) + lock = self._index_locks[project_root] + async with lock: + self._indexing[project_root] = True + try: + await project.update_index() + finally: + self._indexing[project_root] = False + + async def search( + self, + project_root: str, + query: str, + languages: list[str] | None = None, + paths: list[str] | None = None, + limit: int = 5, + offset: int = 0, + ) -> list[SearchResult]: + """Search within a project.""" + project = await self.get_project(project_root) + root = Path(project_root) + target_db = root / ".cocoindex_code" / "target_sqlite.db" + results = await query_codebase( + query=query, + target_sqlite_db_path=target_db, + env=project.env, + limit=limit, + offset=offset, + languages=languages, + paths=paths, + ) + return [ + SearchResult( + file_path=r.file_path, + language=r.language, + content=r.content, + start_line=r.start_line, + end_line=r.end_line, + score=r.score, + ) + for r in results + ] + + def get_status(self, project_root: str) -> ProjectStatusResponse: + """Get index stats for a project.""" + project = self._projects.get(project_root) + if project is None: + return ProjectStatusResponse( + indexing=False, total_chunks=0, total_files=0, languages={} + ) + + db = project.env.get_context(SQLITE_DB) + with db.readonly() as conn: + total_chunks = conn.execute("SELECT COUNT(*) FROM code_chunks_vec").fetchone()[0] + total_files = conn.execute( + "SELECT COUNT(DISTINCT file_path) FROM code_chunks_vec" + ).fetchone()[0] + lang_rows = conn.execute( + "SELECT language, COUNT(*) as cnt FROM code_chunks_vec" + " GROUP BY language ORDER BY cnt DESC" + ).fetchall() + + return ProjectStatusResponse( + indexing=self._indexing.get(project_root, False), + total_chunks=total_chunks, + total_files=total_files, + languages={lang: cnt for lang, cnt in lang_rows}, + ) + + def list_projects(self) -> list[DaemonProjectInfo]: + """List all loaded projects with their indexing state.""" + return [ + DaemonProjectInfo( + project_root=root, + indexing=self._indexing.get(root, False), + ) + for root in self._projects + ] + + +# --------------------------------------------------------------------------- +# Connection handler +# --------------------------------------------------------------------------- + + +async def handle_connection( + conn: Connection, + registry: ProjectRegistry, + start_time: float, + shutdown_event: asyncio.Event, +) -> None: + """Handle a single client connection.""" + loop = asyncio.get_event_loop() + handshake_done = False + + try: + while not shutdown_event.is_set(): + try: + data: bytes = await loop.run_in_executor(None, conn.recv_bytes) + except (EOFError, OSError): + break + + try: + req = decode_request(data) + except Exception as e: + resp: Response = ErrorResponse(message=f"Invalid request: {e}") + conn.send_bytes(encode_response(resp)) + continue + + if not handshake_done: + if not isinstance(req, HandshakeRequest): + resp = ErrorResponse(message="First message must be a handshake") + conn.send_bytes(encode_response(resp)) + break + + ok = req.version == __version__ + resp = HandshakeResponse(ok=ok, daemon_version=__version__) + conn.send_bytes(encode_response(resp)) + if not ok: + break + handshake_done = True + continue + + resp = await _dispatch(req, registry, start_time, shutdown_event) + conn.send_bytes(encode_response(resp)) + + if isinstance(req, StopRequest): + break + except Exception: + logger.exception("Error handling connection") + finally: + try: + conn.close() + except Exception: + pass + + +async def _dispatch( + req: Request, + registry: ProjectRegistry, + start_time: float, + shutdown_event: asyncio.Event, +) -> Response: + """Dispatch a request to the appropriate handler.""" + try: + if isinstance(req, IndexRequest): + await registry.update_index(req.project_root) + return IndexResponse(success=True) + + if isinstance(req, SearchRequest): + if req.refresh: + await registry.update_index(req.project_root) + results = await registry.search( + project_root=req.project_root, + query=req.query, + languages=req.languages, + paths=req.paths, + limit=req.limit, + offset=req.offset, + ) + return SearchResponse( + success=True, + results=results, + total_returned=len(results), + offset=req.offset, + ) + + if isinstance(req, ProjectStatusRequest): + return registry.get_status(req.project_root) + + if isinstance(req, DaemonStatusRequest): + return DaemonStatusResponse( + version=__version__, + uptime_seconds=time.monotonic() - start_time, + projects=registry.list_projects(), + ) + + if isinstance(req, StopRequest): + shutdown_event.set() + return StopResponse(ok=True) + + return ErrorResponse(message=f"Unknown request type: {type(req).__name__}") + except Exception as e: + logger.exception("Error dispatching request") + return ErrorResponse(message=str(e)) + + +# --------------------------------------------------------------------------- +# Daemon main +# --------------------------------------------------------------------------- + + +def run_daemon() -> None: + """Main entry point for the daemon process (blocking).""" + daemon_dir().mkdir(parents=True, exist_ok=True) + + # Load user settings + user_settings = load_user_settings() + + # Set environment variables from settings + for key, value in user_settings.envs.items(): + os.environ[key] = value + + # Create embedder + embedder = create_embedder(user_settings.embedding) + + # Write PID file + pid_path = daemon_pid_path() + pid_path.write_text(str(os.getpid())) + + # Set up logging to file + log_path = daemon_log_path() + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s: %(message)s", + handlers=[logging.FileHandler(str(log_path)), logging.StreamHandler()], + force=True, + ) + + logger.info("Daemon starting (PID %d, version %s)", os.getpid(), __version__) + + try: + asyncio.run(_async_daemon_main(embedder)) + finally: + # Clean up PID file and socket (named pipes on Windows clean up automatically) + try: + pid_path.unlink(missing_ok=True) + except Exception: + pass + if sys.platform != "win32": + sock = daemon_socket_path() + try: + Path(sock).unlink(missing_ok=True) + except Exception: + pass + logger.info("Daemon stopped") + + +async def _async_daemon_main(embedder: Embedder) -> None: + """Async main loop for the daemon.""" + start_time = time.monotonic() + registry = ProjectRegistry(embedder) + shutdown_event = asyncio.Event() + + sock_path = daemon_socket_path() + # Remove stale socket (not applicable for Windows named pipes) + if sys.platform != "win32": + try: + Path(sock_path).unlink(missing_ok=True) + except Exception: + pass + + listener = Listener(sock_path, family=_connection_family()) + logger.info("Listening on %s", sock_path) + + loop = asyncio.get_event_loop() + + # Handle signals for graceful shutdown (not supported on all platforms/contexts) + try: + for sig in (signal.SIGTERM, signal.SIGINT): + loop.add_signal_handler(sig, shutdown_event.set) + except (RuntimeError, NotImplementedError): + pass # Not in main thread, or not supported on this platform (e.g. Windows) + + tasks: set[asyncio.Task[Any]] = set() + + async def _spawn_handler( + conn: Connection, + reg: ProjectRegistry, + st: float, + evt: asyncio.Event, + task_set: set[asyncio.Task[Any]], + ) -> None: + task = asyncio.create_task(handle_connection(conn, reg, st, evt)) + task_set.add(task) + task.add_done_callback(task_set.discard) + + # Run accept loop in a thread so we can shut down cleanly + def _accept_loop() -> None: + while not shutdown_event.is_set(): + try: + try: + listener._listener._socket.settimeout(0.5) # type: ignore[attr-defined] + except AttributeError: + pass # AF_PIPE (Windows) doesn't expose ._socket + conn = listener.accept() + # Schedule the handler on the event loop + asyncio.run_coroutine_threadsafe( + _spawn_handler(conn, registry, start_time, shutdown_event, tasks), + loop, + ) + except OSError: + if shutdown_event.is_set(): + break + # Socket timeout — just retry + continue + + accept_thread = threading.Thread(target=_accept_loop, daemon=True) + accept_thread.start() + + try: + await shutdown_event.wait() + finally: + listener.close() + accept_thread.join(timeout=2) + if tasks: + await asyncio.gather(*tasks, return_exceptions=True) diff --git a/src/cocoindex_code/indexer.py b/src/cocoindex_code/indexer.py index a7f35de..18f0dd7 100644 --- a/src/cocoindex_code/indexer.py +++ b/src/cocoindex_code/indexer.py @@ -1,5 +1,7 @@ """CocoIndex app for indexing codebases.""" +from __future__ import annotations + import cocoindex as coco from cocoindex.connectors import localfs, sqlite from cocoindex.connectors.sqlite import Vec0TableDef @@ -8,63 +10,8 @@ from cocoindex.resources.file import PatternFilePathMatcher from cocoindex.resources.id import IdGenerator -from .config import config -from .shared import CODEBASE_DIR, SQLITE_DB, CodeChunk, embedder - -# File patterns for supported languages -DEFAULT_INCLUDED_PATTERNS = [ - "**/*.py", # Python - "**/*.pyi", # Python stubs - "**/*.js", # JavaScript - "**/*.jsx", # JavaScript React - "**/*.ts", # TypeScript - "**/*.tsx", # TypeScript React - "**/*.mjs", # JavaScript ES modules - "**/*.cjs", # JavaScript CommonJS - "**/*.rs", # Rust - "**/*.go", # Go - "**/*.java", # Java - "**/*.c", # C - "**/*.h", # C/C++ headers - "**/*.cpp", # C++ - "**/*.hpp", # C++ headers - "**/*.cc", # C++ - "**/*.cxx", # C++ - "**/*.hxx", # C++ headers - "**/*.hh", # C++ headers - "**/*.cs", # C# - "**/*.sql", # SQL - "**/*.sh", # Shell - "**/*.bash", # Bash - "**/*.zsh", # Zsh - "**/*.md", # Markdown - "**/*.mdx", # MDX - "**/*.txt", # Plain text - "**/*.rst", # reStructuredText - "**/*.php", # PHP - "**/*.lua", # Lua -] - -INCLUDED_PATTERNS = DEFAULT_INCLUDED_PATTERNS + [f"**/*{ext}" for ext in config.extra_extensions] - -# Language overrides from extra_extensions (e.g. ".inc" -> "php") -LANGUAGE_OVERRIDES: dict[str, str] = { - ext: lang for ext, lang in config.extra_extensions.items() if lang is not None -} - -DEFAULT_EXCLUDED_PATTERNS = [ - "**/.*", # Hidden directories - "**/__pycache__", # Python cache - "**/node_modules", # Node.js dependencies - "**/target", # Rust/Maven build output - "**/build/assets", # Build asserts directories - "**/dist", # Distribution directories - "**/vendor/*.*/*", # Go vendor directory (domain-based paths) - "**/vendor/*", # PHP vendor directory - "**/.cocoindex_code", # Our own index directory -] - -EXCLUDED_PATTERNS = DEFAULT_EXCLUDED_PATTERNS + config.excluded_patterns +from .settings import PROJECT_SETTINGS +from .shared import CODEBASE_DIR, EMBEDDER, SQLITE_DB, CodeChunk # Chunking configuration CHUNK_SIZE = 2000 @@ -81,25 +28,26 @@ async def process_file( table: sqlite.TableTarget[CodeChunk], ) -> None: """Process a single file: chunk, embed, and store.""" - # Read file content + ps = coco.use_context(PROJECT_SETTINGS) + embedder = coco.use_context(EMBEDDER) + try: content = await file.read_text() except UnicodeDecodeError: - # Skip binary files return if not content.strip(): return - # Get relative path and detect language suffix = file.file_path.path.suffix + # Check language overrides from project settings + override_map = {f".{lo.ext}": lo.lang for lo in ps.language_overrides} language = ( - LANGUAGE_OVERRIDES.get(suffix) + override_map.get(suffix) or detect_code_language(filename=file.file_path.path.name) or "text" ) - # Split into chunks chunks = splitter.split( content, chunk_size=CHUNK_SIZE, @@ -110,9 +58,7 @@ async def process_file( id_gen = IdGenerator() - async def process( - chunk: Chunk, - ) -> None: + async def process(chunk: Chunk) -> None: table.declare_row( row=CodeChunk( id=await id_gen.next_id(chunk.text), @@ -131,10 +77,10 @@ async def process( @coco.fn async def indexer_main() -> None: """Main indexing function - walks files and processes each.""" - db = coco.use_context(SQLITE_DB) + ps = coco.use_context(PROJECT_SETTINGS) - # Declare the table target for storing embeddings - table = await db.mount_table_target( + table = await sqlite.mount_table_target( + db=SQLITE_DB, table_name="code_chunks_vec", table_schema=await sqlite.TableSchema.from_class( CodeChunk, @@ -146,16 +92,14 @@ async def indexer_main() -> None: ), ) - # Walk source directory files = localfs.walk_dir( - coco.use_context(CODEBASE_DIR), + CODEBASE_DIR, recursive=True, path_matcher=PatternFilePathMatcher( - included_patterns=INCLUDED_PATTERNS, - excluded_patterns=EXCLUDED_PATTERNS, + included_patterns=ps.include_patterns, + excluded_patterns=ps.exclude_patterns, ), ) - # Process each file with coco.component_subpath(coco.Symbol("process_file")): await coco.mount_each(process_file, files.items(), table) diff --git a/src/cocoindex_code/project.py b/src/cocoindex_code/project.py index eeb5f7a..faf81c8 100644 --- a/src/cocoindex_code/project.py +++ b/src/cocoindex_code/project.py @@ -1,14 +1,16 @@ +"""Project management: wraps a CocoIndex Environment + App.""" + from __future__ import annotations import asyncio +from pathlib import Path import cocoindex as coco from cocoindex.connectors import sqlite -from cocoindex.connectors.localfs import register_base_dir -from .config import config from .indexer import indexer_main -from .shared import CODEBASE_DIR, SQLITE_DB +from .settings import PROJECT_SETTINGS, ProjectSettings +from .shared import CODEBASE_DIR, EMBEDDER, SQLITE_DB, Embedder class Project: @@ -34,20 +36,25 @@ def is_initial_index_done(self) -> bool: return self._initial_index_done @staticmethod - async def create() -> Project: - # Ensure index directory exists - config.index_dir.mkdir(parents=True, exist_ok=True) + async def create( + project_root: Path, + project_settings: ProjectSettings, + embedder: Embedder, + ) -> Project: + """Create a project with explicit settings and embedder.""" + index_dir = project_root / ".cocoindex_code" + index_dir.mkdir(parents=True, exist_ok=True) - # Set CocoIndex state database path - settings = coco.Settings.from_env(config.cocoindex_db_path) + cocoindex_db_path = index_dir / "cocoindex.db" + target_sqlite_db_path = index_dir / "target_sqlite.db" - context = coco.ContextProvider() + settings = coco.Settings.from_env(cocoindex_db_path) - # Provide codebase root directory to environment - context.provide(CODEBASE_DIR, register_base_dir("codebase", config.codebase_root_path)) - # Connect to SQLite with vector extension - conn = sqlite.connect(str(config.target_sqlite_db_path), load_vec="auto") - context.provide(SQLITE_DB, sqlite.register_db("index_db", conn)) + context = coco.ContextProvider() + context.provide(CODEBASE_DIR, project_root) + context.provide(SQLITE_DB, sqlite.connect(str(target_sqlite_db_path), load_vec=True)) + context.provide(EMBEDDER, embedder) + context.provide(PROJECT_SETTINGS, project_settings) env = coco.Environment(settings, context_provider=context) app = coco.App( @@ -63,14 +70,3 @@ async def create() -> Project: result._app = app result._index_lock = asyncio.Lock() return result - - -_project: Project | None = None - - -async def default_project() -> Project: - """Factory function to create the CocoIndexCode project.""" - global _project - if _project is None: - _project = await Project.create() - return _project diff --git a/src/cocoindex_code/protocol.py b/src/cocoindex_code/protocol.py new file mode 100644 index 0000000..7672f95 --- /dev/null +++ b/src/cocoindex_code/protocol.py @@ -0,0 +1,145 @@ +"""IPC message types and serialization helpers for daemon communication.""" + +from __future__ import annotations + +import msgspec as _msgspec + +# --------------------------------------------------------------------------- +# Requests (tagged union via struct tag) +# --------------------------------------------------------------------------- + + +class HandshakeRequest(_msgspec.Struct, tag="handshake"): + version: str + + +class IndexRequest(_msgspec.Struct, tag="index"): + project_root: str + + +class SearchRequest(_msgspec.Struct, tag="search"): + project_root: str + query: str + languages: list[str] | None = None + paths: list[str] | None = None + limit: int = 5 + offset: int = 0 + refresh: bool = False + + +class ProjectStatusRequest(_msgspec.Struct, tag="project_status"): + project_root: str + + +class DaemonStatusRequest(_msgspec.Struct, tag="daemon_status"): + pass + + +class StopRequest(_msgspec.Struct, tag="stop"): + pass + + +Request = ( + HandshakeRequest + | IndexRequest + | SearchRequest + | ProjectStatusRequest + | DaemonStatusRequest + | StopRequest +) + +# --------------------------------------------------------------------------- +# Responses +# --------------------------------------------------------------------------- + + +class HandshakeResponse(_msgspec.Struct, tag="handshake"): + ok: bool + daemon_version: str + + +class IndexResponse(_msgspec.Struct, tag="index"): + success: bool + message: str | None = None + + +class SearchResult(_msgspec.Struct): + file_path: str + language: str + content: str + start_line: int + end_line: int + score: float + + +class SearchResponse(_msgspec.Struct, tag="search"): + success: bool + results: list[SearchResult] = [] + total_returned: int = 0 + offset: int = 0 + message: str | None = None + + +class ProjectStatusResponse(_msgspec.Struct, tag="project_status"): + indexing: bool + total_chunks: int + total_files: int + languages: dict[str, int] + + +class DaemonProjectInfo(_msgspec.Struct): + project_root: str + indexing: bool + + +class DaemonStatusResponse(_msgspec.Struct, tag="daemon_status"): + version: str + uptime_seconds: float + projects: list[DaemonProjectInfo] + + +class StopResponse(_msgspec.Struct, tag="stop"): + ok: bool + + +class ErrorResponse(_msgspec.Struct, tag="error"): + message: str + + +Response = ( + HandshakeResponse + | IndexResponse + | SearchResponse + | ProjectStatusResponse + | DaemonStatusResponse + | StopResponse + | ErrorResponse +) + +# --------------------------------------------------------------------------- +# Encode / decode helpers (msgpack binary) +# --------------------------------------------------------------------------- + +_request_encoder = _msgspec.msgpack.Encoder() +_request_decoder = _msgspec.msgpack.Decoder(Request) + +_response_encoder = _msgspec.msgpack.Encoder() +_response_decoder = _msgspec.msgpack.Decoder(Response) + + +def encode_request(req: Request) -> bytes: + return _request_encoder.encode(req) + + +def decode_request(data: bytes) -> Request: + result: Request = _request_decoder.decode(data) + return result + + +def encode_response(resp: Response) -> bytes: + return _response_encoder.encode(resp) + + +def decode_response(data: bytes) -> Response: + result: Response = _response_decoder.decode(data) + return result diff --git a/src/cocoindex_code/query.py b/src/cocoindex_code/query.py index 22b0380..80ad630 100644 --- a/src/cocoindex_code/query.py +++ b/src/cocoindex_code/query.py @@ -1,13 +1,14 @@ """Query implementation for codebase search.""" +from __future__ import annotations + import heapq import sqlite3 +from pathlib import Path from typing import Any -from .config import config -from .project import default_project from .schema import QueryResult -from .shared import SQLITE_DB, embedder, query_prompt_name +from .shared import EMBEDDER, SQLITE_DB, query_prompt_name def _l2_to_score(distance: float) -> float: @@ -83,6 +84,8 @@ def _full_scan_query( async def query_codebase( query: str, + target_sqlite_db_path: Path, + env: Any, limit: int = 10, offset: int = 0, languages: list[str] | None = None, @@ -95,31 +98,27 @@ async def query_codebase( Language filtering uses vec0 partition keys for exact index-level filtering. Path filtering triggers a full scan with distance computation. """ - if not config.target_sqlite_db_path.exists(): + if not target_sqlite_db_path.exists(): raise RuntimeError( - f"Index database not found at {config.target_sqlite_db_path}. " + f"Index database not found at {target_sqlite_db_path}. " "Please run a query with refresh_index=True first." ) - coco_proj = await default_project() - db = coco_proj.env.get_context(SQLITE_DB) + db = env.get_context(SQLITE_DB) + embedder = env.get_context(EMBEDDER) # Generate query embedding. query_embedding = await embedder.embed(query, query_prompt_name) embedding_bytes = query_embedding.astype("float32").tobytes() - with db.value.readonly() as conn: + with db.readonly() as conn: if paths: - # Path filter → full scan (vec0 can't filter on auxiliary columns). - # LIMIT/OFFSET handled in SQL. rows = _full_scan_query(conn, embedding_bytes, limit, offset, languages, paths) elif not languages or len(languages) == 1: - # Single language or no filter: one KNN query. lang = languages[0] if languages else None rows = _knn_query(conn, embedding_bytes, limit + offset, lang) else: - # Multiple languages: separate KNN per partition, merge by distance. fetch_k = limit + offset rows = heapq.nsmallest( fetch_k, @@ -128,7 +127,7 @@ async def query_codebase( for lang in languages for row in _knn_query(conn, embedding_bytes, fetch_k, lang) ), - key=lambda r: r[5], # distance column + key=lambda r: r[5], ) if not paths: diff --git a/src/cocoindex_code/server.py b/src/cocoindex_code/server.py index 605bd8e..5eec11d 100644 --- a/src/cocoindex_code/server.py +++ b/src/cocoindex_code/server.py @@ -1,38 +1,38 @@ -"""MCP server for codebase indexing and querying.""" +"""MCP server for codebase indexing and querying. + +Supports two modes: +1. Daemon-backed: ``create_mcp_server(client, project_root)`` — lightweight MCP + server that delegates to the daemon via a ``DaemonClient``. +2. Legacy entry point: ``main()`` — backward-compatible ``cocoindex-code`` CLI that + auto-creates settings from env vars and delegates to the daemon. +""" + +from __future__ import annotations -import argparse import asyncio +import json +import os +from pathlib import Path +from typing import TYPE_CHECKING from mcp.server.fastmcp import FastMCP from pydantic import BaseModel, Field -from .config import config -from .project import default_project -from .query import query_codebase -from .shared import SQLITE_DB - -# Initialize MCP server -mcp = FastMCP( - "cocoindex-code", - instructions=( - "Code search and codebase understanding tools." - "\n" - "Use when you need to find code, understand how something works," - " locate implementations, or explore an unfamiliar codebase." - "\n" - "Provides semantic search that understands meaning --" - " unlike grep or text matching," - " it finds relevant code even when exact keywords are unknown." - ), +if TYPE_CHECKING: + from .client import DaemonClient + +_MCP_INSTRUCTIONS = ( + "Code search and codebase understanding tools." + "\n" + "Use when you need to find code, understand how something works," + " locate implementations, or explore an unfamiliar codebase." + "\n" + "Provides semantic search that understands meaning --" + " unlike grep or text matching," + " it finds relevant code even when exact keywords are unknown." ) -async def _refresh_index() -> None: - """Refresh the index. Uses lock to prevent concurrent updates.""" - proj = await default_project() - await proj.update_index() - - # === Pydantic Models for Tool Inputs/Outputs === @@ -57,167 +57,145 @@ class SearchResultModel(BaseModel): message: str | None = None -# === MCP Tools === - - -@mcp.tool( - name="search", - description=( - "Semantic code search across the entire codebase" - " -- finds code by meaning, not just text matching." - " Use this instead of grep/glob when you need to find implementations," - " understand how features work," - " or locate related code without knowing exact names or keywords." - " Accepts natural language queries" - " (e.g., 'authentication logic', 'database connection handling')" - " or code snippets." - " Returns matching code chunks with file paths," - " line numbers, and relevance scores." - " Start with a small limit (e.g., 5);" - " if most results look relevant, use offset to paginate for more." - ), -) -async def search( - query: str = Field( - description=( - "Natural language query or code snippet to search for." - " Examples: 'error handling middleware'," - " 'how are users authenticated'," - " 'database connection pool'," - " or paste a code snippet to find similar code." - ) - ), - limit: int = Field( - default=5, - ge=1, - le=100, - description="Maximum number of results to return (1-100)", - ), - offset: int = Field( - default=0, - ge=0, - description="Number of results to skip for pagination", - ), - refresh_index: bool = Field( - default=True, +# === Daemon-backed MCP server factory === + + +def create_mcp_server(client: DaemonClient, project_root: str) -> FastMCP: + """Create a lightweight MCP server that delegates to the daemon.""" + mcp = FastMCP("cocoindex-code", instructions=_MCP_INSTRUCTIONS) + + @mcp.tool( + name="search", description=( - "Whether to incrementally update the index before searching." - " Set to False for faster consecutive queries" - " when the codebase hasn't changed." + "Semantic code search across the entire codebase" + " -- finds code by meaning, not just text matching." + " Use this instead of grep/glob when you need to find implementations," + " understand how features work," + " or locate related code without knowing exact names or keywords." + " Accepts natural language queries" + " (e.g., 'authentication logic', 'database connection handling')" + " or code snippets." + " Returns matching code chunks with file paths," + " line numbers, and relevance scores." + " Start with a small limit (e.g., 5);" + " if most results look relevant, use offset to paginate for more." ), - ), - languages: list[str] | None = Field( - default=None, - description=("Filter by programming language(s). Example: ['python', 'typescript']"), - ), - paths: list[str] | None = Field( - default=None, - description=( - "Filter by file path pattern(s) using GLOB wildcards (* and ?)." - " Example: ['src/utils/*', '*.py']" + ) + async def search( + query: str = Field( + description=( + "Natural language query or code snippet to search for." + " Examples: 'error handling middleware'," + " 'how are users authenticated'," + " 'database connection pool'," + " or paste a code snippet to find similar code." + ) + ), + limit: int = Field( + default=5, + ge=1, + le=100, + description="Maximum number of results to return (1-100)", + ), + offset: int = Field( + default=0, + ge=0, + description="Number of results to skip for pagination", ), - ), -) -> SearchResultModel: - """Query the codebase index.""" - proj = await default_project() - if not proj.is_initial_index_done: - return SearchResultModel( - success=False, - message=( - "The index is still being built — this may take a while" - " for a large codebase or after significant changes." - " Please try again shortly." + refresh_index: bool = Field( + default=True, + description=( + "Whether to incrementally update the index before searching." + " Set to False for faster consecutive queries" + " when the codebase hasn't changed." ), - ) - - try: - # Refresh index if requested - if refresh_index: - await _refresh_index() - - results = await query_codebase( - query=query, - limit=limit, - offset=offset, - languages=languages, - paths=paths, - ) - - return SearchResultModel( - success=True, - results=[ - CodeChunkResult( - file_path=r.file_path, - language=r.language, - content=r.content, - start_line=r.start_line, - end_line=r.end_line, - score=r.score, - ) - for r in results - ], - total_returned=len(results), - offset=offset, - ) - except RuntimeError as e: - # Index doesn't exist - return SearchResultModel( - success=False, - message=str(e), - ) - except Exception as e: - return SearchResultModel( - success=False, - message=f"Query failed: {e!s}", - ) - - -async def _async_serve() -> None: - """Async entry point for the MCP server.""" - - # Refresh index in background so startup isn't blocked - asyncio.create_task(_refresh_index()) - await mcp.run_stdio_async() - - -async def _async_index() -> None: - """Async entry point for the index command.""" - proj = await default_project() - await proj.update_index(report_to_stdout=True) - await _print_index_stats() - - -async def _print_index_stats() -> None: - """Print index statistics from the database.""" - db_path = config.target_sqlite_db_path - if not db_path.exists(): - print("No index database found.") - return - - proj = await default_project() - db = proj.env.get_context(SQLITE_DB) - - with db.value.readonly() as conn: - total_chunks = conn.execute("SELECT COUNT(*) FROM code_chunks_vec").fetchone()[0] - total_files = conn.execute( - "SELECT COUNT(DISTINCT file_path) FROM code_chunks_vec" - ).fetchone()[0] - langs = conn.execute( - "SELECT language, COUNT(*) as cnt FROM code_chunks_vec" - " GROUP BY language ORDER BY cnt DESC" - ).fetchall() - - print("\nIndex stats:") - print(f" Chunks: {total_chunks}") - print(f" Files: {total_files}") - if langs: - print(" Languages:") - for lang, count in langs: - print(f" {lang}: {count} chunks") + ), + languages: list[str] | None = Field( + default=None, + description="Filter by programming language(s). Example: ['python', 'typescript']", + ), + paths: list[str] | None = Field( + default=None, + description=( + "Filter by file path pattern(s) using GLOB wildcards (* and ?)." + " Example: ['src/utils/*', '*.py']" + ), + ), + ) -> SearchResultModel: + """Query the codebase index via the daemon.""" + loop = asyncio.get_event_loop() + try: + resp = await loop.run_in_executor( + None, + lambda: client.search( + project_root=project_root, + query=query, + languages=languages, + paths=paths, + limit=limit, + offset=offset, + refresh=refresh_index, + ), + ) + return SearchResultModel( + success=resp.success, + results=[ + CodeChunkResult( + file_path=r.file_path, + language=r.language, + content=r.content, + start_line=r.start_line, + end_line=r.end_line, + score=r.score, + ) + for r in resp.results + ], + total_returned=resp.total_returned, + offset=resp.offset, + message=resp.message, + ) + except Exception as e: + return SearchResultModel(success=False, message=f"Query failed: {e!s}") + + return mcp + + +# Keep the old `mcp` global for backward compatibility in __init__.py +mcp: FastMCP | None = None + + +# === Backward-compatible entry point === + + +def _convert_embedding_model(env_model: str) -> tuple[str, str]: + """Convert old COCOINDEX_CODE_EMBEDDING_MODEL to (provider, model).""" + sbert_prefix = "sbert/" + if env_model.startswith(sbert_prefix): + return "sentence-transformers", env_model[len(sbert_prefix) :] + return "litellm", env_model def main() -> None: - """Entry point for the cocoindex-code CLI.""" + """Backward-compatible entry point for ``cocoindex-code`` CLI. + + Auto-detects/creates settings from env vars, then delegates to daemon. + """ + import argparse + + from .client import ensure_daemon + from .settings import ( + EmbeddingSettings, + LanguageOverride, + default_project_settings, + default_user_settings, + find_parent_with_marker, + find_project_root, + project_settings_path, + save_project_settings, + save_user_settings, + user_settings_path, + ) + parser = argparse.ArgumentParser( prog="cocoindex-code", description="MCP server for codebase indexing and querying.", @@ -225,14 +203,107 @@ def main() -> None: subparsers = parser.add_subparsers(dest="command") subparsers.add_parser("serve", help="Run the MCP server (default)") subparsers.add_parser("index", help="Build/refresh the index and report stats") - args = parser.parse_args() + # --- Discover project root --- + cwd = Path.cwd() + project_root = find_project_root(cwd) + + if project_root is None: + # Try env var + env_root = os.environ.get("COCOINDEX_CODE_ROOT_PATH") + if env_root: + project_root = Path(env_root).resolve() + else: + # Use marker-based discovery + marker_root = find_parent_with_marker(cwd) + project_root = marker_root if marker_root is not None else cwd + + # --- Auto-create project settings if needed --- + proj_settings_file = project_settings_path(project_root) + if not proj_settings_file.is_file(): + ps = default_project_settings() + + # Migrate COCOINDEX_CODE_EXCLUDED_PATTERNS + raw_excluded = os.environ.get("COCOINDEX_CODE_EXCLUDED_PATTERNS", "").strip() + if raw_excluded: + try: + extra_excluded = json.loads(raw_excluded) + if isinstance(extra_excluded, list): + ps.exclude_patterns.extend( + p.strip() for p in extra_excluded if isinstance(p, str) and p.strip() + ) + except json.JSONDecodeError: + pass + + # Migrate COCOINDEX_CODE_EXTRA_EXTENSIONS + raw_extra = os.environ.get("COCOINDEX_CODE_EXTRA_EXTENSIONS", "") + for token in raw_extra.split(","): + token = token.strip() + if not token: + continue + if ":" in token: + ext, lang = token.split(":", 1) + ext = ext.strip() + lang = lang.strip() + ps.include_patterns.append(f"**/*.{ext}") + if lang: + ps.language_overrides.append(LanguageOverride(ext=ext, lang=lang)) + else: + ps.include_patterns.append(f"**/*.{token}") + + save_project_settings(project_root, ps) + + # --- Auto-create user settings if needed --- + user_file = user_settings_path() + if not user_file.is_file(): + us = default_user_settings() + + # Migrate COCOINDEX_CODE_EMBEDDING_MODEL + env_model = os.environ.get("COCOINDEX_CODE_EMBEDDING_MODEL", "") + if env_model: + provider, model = _convert_embedding_model(env_model) + us.embedding = EmbeddingSettings(provider=provider, model=model) + + # Migrate COCOINDEX_CODE_DEVICE + env_device = os.environ.get("COCOINDEX_CODE_DEVICE") + if env_device: + us.embedding.device = env_device + + save_user_settings(us) + + # --- Delegate to daemon --- if args.command == "index": - asyncio.run(_async_index()) + client = ensure_daemon() + resp = client.index(str(project_root)) + if resp.success: + status = client.project_status(str(project_root)) + print("\nIndex stats:") + print(f" Chunks: {status.total_chunks}") + print(f" Files: {status.total_files}") + if status.languages: + print(" Languages:") + for lang, count in sorted(status.languages.items(), key=lambda x: -x[1]): + print(f" {lang}: {count} chunks") + else: + print(f"Indexing failed: {resp.message}") + client.close() else: - asyncio.run(_async_serve()) + # Default: run MCP server + client = ensure_daemon() + mcp_server = create_mcp_server(client, str(project_root)) + async def _serve() -> None: + asyncio.create_task(_bg_index(client, str(project_root))) + await mcp_server.run_stdio_async() -if __name__ == "__main__": - main() + asyncio.run(_serve()) + + +async def _bg_index(client: DaemonClient, project_root: str) -> None: + """Index in background.""" + loop = asyncio.get_event_loop() + try: + await loop.run_in_executor(None, client.index, project_root) + except Exception: + pass diff --git a/src/cocoindex_code/settings.py b/src/cocoindex_code/settings.py new file mode 100644 index 0000000..97f356a --- /dev/null +++ b/src/cocoindex_code/settings.py @@ -0,0 +1,277 @@ +"""YAML settings schema, loading, saving, and path helpers.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +import cocoindex as _coco +import yaml as _yaml + +# --------------------------------------------------------------------------- +# Default file patterns (moved from indexer.py) +# --------------------------------------------------------------------------- + +DEFAULT_INCLUDED_PATTERNS: list[str] = [ + "**/*.py", # Python + "**/*.pyi", # Python stubs + "**/*.js", # JavaScript + "**/*.jsx", # JavaScript React + "**/*.ts", # TypeScript + "**/*.tsx", # TypeScript React + "**/*.mjs", # JavaScript ES modules + "**/*.cjs", # JavaScript CommonJS + "**/*.rs", # Rust + "**/*.go", # Go + "**/*.java", # Java + "**/*.c", # C + "**/*.h", # C/C++ headers + "**/*.cpp", # C++ + "**/*.hpp", # C++ headers + "**/*.cc", # C++ + "**/*.cxx", # C++ + "**/*.hxx", # C++ headers + "**/*.hh", # C++ headers + "**/*.cs", # C# + "**/*.sql", # SQL + "**/*.sh", # Shell + "**/*.bash", # Bash + "**/*.zsh", # Zsh + "**/*.md", # Markdown + "**/*.mdx", # MDX + "**/*.txt", # Plain text + "**/*.rst", # reStructuredText + "**/*.php", # PHP + "**/*.lua", # Lua +] + +DEFAULT_EXCLUDED_PATTERNS: list[str] = [ + "**/.*", # Hidden directories + "**/__pycache__", # Python cache + "**/node_modules", # Node.js dependencies + "**/target", # Rust/Maven build output + "**/build/assets", # Build assets directories + "**/dist", # Distribution directories + "**/vendor/*.*/*", # Go vendor directory (domain-based paths) + "**/vendor/*", # PHP vendor directory + "**/.cocoindex_code", # Our own index directory +] + +# --------------------------------------------------------------------------- +# Dataclasses +# --------------------------------------------------------------------------- + + +@dataclass +class EmbeddingSettings: + provider: str = "sentence-transformers" + model: str = "sentence-transformers/all-MiniLM-L6-v2" + device: str | None = None + + +@dataclass +class UserSettings: + embedding: EmbeddingSettings = field(default_factory=EmbeddingSettings) + envs: dict[str, str] = field(default_factory=dict) + + +@dataclass +class LanguageOverride: + ext: str # without dot, e.g. "inc" + lang: str # e.g. "php" + + +@dataclass +class ProjectSettings: + include_patterns: list[str] = field(default_factory=lambda: list(DEFAULT_INCLUDED_PATTERNS)) + exclude_patterns: list[str] = field(default_factory=lambda: list(DEFAULT_EXCLUDED_PATTERNS)) + language_overrides: list[LanguageOverride] = field(default_factory=list) + + +# CocoIndex context key for project settings +PROJECT_SETTINGS = _coco.ContextKey[ProjectSettings]("project_settings") + +# --------------------------------------------------------------------------- +# Default factories +# --------------------------------------------------------------------------- + + +def default_user_settings() -> UserSettings: + return UserSettings() + + +def default_project_settings() -> ProjectSettings: + return ProjectSettings() + + +# --------------------------------------------------------------------------- +# Path helpers +# --------------------------------------------------------------------------- + +_SETTINGS_DIR_NAME = ".cocoindex_code" +_SETTINGS_FILE_NAME = "settings.yml" + + +def user_settings_dir() -> Path: + """Return ``~/.cocoindex_code/``. + + Respects ``COCOINDEX_CODE_DIR`` env var for overriding the base directory. + """ + import os + + override = os.environ.get("COCOINDEX_CODE_DIR") + if override: + return Path(override) + return Path.home() / _SETTINGS_DIR_NAME + + +def user_settings_path() -> Path: + """Return ``~/.cocoindex_code/settings.yml``.""" + return user_settings_dir() / _SETTINGS_FILE_NAME + + +def project_settings_path(project_root: Path) -> Path: + """Return ``$PROJECT_ROOT/.cocoindex_code/settings.yml``.""" + return project_root / _SETTINGS_DIR_NAME / _SETTINGS_FILE_NAME + + +def find_project_root(start: Path) -> Path | None: + """Walk up from *start* looking for ``.cocoindex_code/settings.yml``. + + Returns the directory containing it, or ``None``. + """ + current = start.resolve() + while True: + if (current / _SETTINGS_DIR_NAME / _SETTINGS_FILE_NAME).is_file(): + return current + parent = current.parent + if parent == current: + return None + current = parent + + +def find_parent_with_marker(start: Path) -> Path | None: + """Walk up from *start* looking for ``.cocoindex_code/`` or ``.git/``. + + Returns the first directory found, or ``None``. + Does not consider the home directory or above, to avoid false positives + on CI runners where ~/.git may exist. + """ + home = Path.home().resolve() + current = start.resolve() + while True: + # Stop before reaching the home directory (home itself is not a project root) + if current == home: + return None + parent = current.parent + if parent == current: + return None + if (current / _SETTINGS_DIR_NAME).is_dir() or (current / ".git").is_dir(): + return current + current = parent + + +# --------------------------------------------------------------------------- +# Serialization helpers +# --------------------------------------------------------------------------- + + +def _user_settings_to_dict(settings: UserSettings) -> dict[str, Any]: + d: dict[str, Any] = {} + emb: dict[str, Any] = {} + if settings.embedding.provider != "sentence-transformers": + emb["provider"] = settings.embedding.provider + if settings.embedding.model != "sentence-transformers/all-MiniLM-L6-v2": + emb["model"] = settings.embedding.model + if settings.embedding.device is not None: + emb["device"] = settings.embedding.device + if emb: + d["embedding"] = emb + if settings.envs: + d["envs"] = dict(settings.envs) + return d + + +def _user_settings_from_dict(d: dict[str, Any]) -> UserSettings: + emb_dict = d.get("embedding", {}) + embedding = EmbeddingSettings( + provider=emb_dict.get("provider", "sentence-transformers"), + model=emb_dict.get("model", "sentence-transformers/all-MiniLM-L6-v2"), + device=emb_dict.get("device"), + ) + envs = d.get("envs", {}) + return UserSettings(embedding=embedding, envs=envs) + + +def _project_settings_to_dict(settings: ProjectSettings) -> dict[str, Any]: + d: dict[str, Any] = { + "include_patterns": settings.include_patterns, + "exclude_patterns": settings.exclude_patterns, + } + if settings.language_overrides: + d["language_overrides"] = [ + {"ext": lo.ext, "lang": lo.lang} for lo in settings.language_overrides + ] + return d + + +def _project_settings_from_dict(d: dict[str, Any]) -> ProjectSettings: + overrides = [ + LanguageOverride(ext=lo["ext"], lang=lo["lang"]) for lo in d.get("language_overrides", []) + ] + return ProjectSettings( + include_patterns=d.get("include_patterns", list(DEFAULT_INCLUDED_PATTERNS)), + exclude_patterns=d.get("exclude_patterns", list(DEFAULT_EXCLUDED_PATTERNS)), + language_overrides=overrides, + ) + + +# --------------------------------------------------------------------------- +# I/O +# --------------------------------------------------------------------------- + + +def load_user_settings() -> UserSettings: + """Read ``~/.cocoindex_code/settings.yml``, return defaults if missing.""" + path = user_settings_path() + if not path.is_file(): + return default_user_settings() + with open(path) as f: + data = _yaml.safe_load(f) + if not data: + return default_user_settings() + return _user_settings_from_dict(data) + + +def save_user_settings(settings: UserSettings) -> Path: + """Write user settings YAML. Returns path written.""" + path = user_settings_path() + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + _yaml.safe_dump(_user_settings_to_dict(settings), f, default_flow_style=False) + return path + + +def load_project_settings(project_root: Path) -> ProjectSettings: + """Read ``$PROJECT_ROOT/.cocoindex_code/settings.yml``. + + Raises ``FileNotFoundError`` if the file does not exist. + """ + path = project_settings_path(project_root) + if not path.is_file(): + raise FileNotFoundError(f"Project settings not found: {path}") + with open(path) as f: + data = _yaml.safe_load(f) + if not data: + return default_project_settings() + return _project_settings_from_dict(data) + + +def save_project_settings(project_root: Path, settings: ProjectSettings) -> Path: + """Write project settings YAML. Returns path written.""" + path = project_settings_path(project_root) + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + _yaml.safe_dump(_project_settings_to_dict(settings), f, default_flow_style=False) + return path diff --git a/src/cocoindex_code/shared.py b/src/cocoindex_code/shared.py index 48aa90c..714224d 100644 --- a/src/cocoindex_code/shared.py +++ b/src/cocoindex_code/shared.py @@ -1,56 +1,75 @@ -"""Shared singletons: config, embedder, and CocoIndex lifecycle.""" +"""Shared context keys, embedder factory, and CodeChunk schema.""" from __future__ import annotations import logging +import pathlib from dataclasses import dataclass -from typing import TYPE_CHECKING, Annotated +from typing import TYPE_CHECKING, Annotated, Union import cocoindex as coco from cocoindex.connectors import sqlite -from cocoindex.connectors.localfs import FilePath from numpy.typing import NDArray if TYPE_CHECKING: from cocoindex.ops.litellm import LiteLLMEmbedder from cocoindex.ops.sentence_transformers import SentenceTransformerEmbedder -from .config import config +from .settings import EmbeddingSettings logger = logging.getLogger(__name__) SBERT_PREFIX = "sbert/" -# Initialize embedder at module level based on model prefix -embedder: SentenceTransformerEmbedder | LiteLLMEmbedder -if config.embedding_model.startswith(SBERT_PREFIX): - from cocoindex.ops.sentence_transformers import SentenceTransformerEmbedder +# Models that define a "query" prompt for asymmetric retrieval. +_QUERY_PROMPT_MODELS = {"nomic-ai/nomic-embed-code", "nomic-ai/CodeRankEmbed"} - _model_name = config.embedding_model[len(SBERT_PREFIX) :] - # Models that define a "query" prompt for asymmetric retrieval. - _QUERY_PROMPT_MODELS = {"nomic-ai/nomic-embed-code", "nomic-ai/CodeRankEmbed"} - query_prompt_name: str | None = "query" if _model_name in _QUERY_PROMPT_MODELS else None - embedder = SentenceTransformerEmbedder( - _model_name, - device=config.device, - trust_remote_code=True, - ) - logger.info( - "Embedding model: %s | device: %s", - config.embedding_model, - config.device, - ) -else: - from cocoindex.ops.litellm import LiteLLMEmbedder +# Type alias +Embedder = Union["SentenceTransformerEmbedder", "LiteLLMEmbedder"] + +# Context keys +EMBEDDER = coco.ContextKey[Embedder]("embedder") +SQLITE_DB = coco.ContextKey[sqlite.ManagedConnection]("index_db", tracked=False) +CODEBASE_DIR = coco.ContextKey[pathlib.Path]("codebase", tracked=False) + +# Module-level variable — set by daemon at startup (needed for CodeChunk annotation). +embedder: Embedder | None = None + +# Query prompt name — set alongside embedder by create_embedder(). +query_prompt_name: str | None = None + + +def create_embedder(settings: EmbeddingSettings) -> Embedder: + """Create and return an embedder instance based on settings. + + Also sets the module-level ``embedder`` and ``query_prompt_name`` variables. + """ + global embedder, query_prompt_name + + if settings.provider == "sentence-transformers": + from cocoindex.ops.sentence_transformers import SentenceTransformerEmbedder + + model_name = settings.model + # Strip the legacy sbert/ prefix if present + if model_name.startswith(SBERT_PREFIX): + model_name = model_name[len(SBERT_PREFIX) :] + + query_prompt_name = "query" if model_name in _QUERY_PROMPT_MODELS else None + instance: Embedder = SentenceTransformerEmbedder( + model_name, + device=settings.device, + trust_remote_code=True, + ) + logger.info("Embedding model: %s | device: %s", settings.model, settings.device) + else: + from cocoindex.ops.litellm import LiteLLMEmbedder - embedder = LiteLLMEmbedder(config.embedding_model) - query_prompt_name = None - logger.info("Embedding model (LiteLLM): %s", config.embedding_model) + instance = LiteLLMEmbedder(settings.model) + query_prompt_name = None + logger.info("Embedding model (LiteLLM): %s", settings.model) -# Context key for SQLite database (connection managed in lifespan) -SQLITE_DB = coco.ContextKey[sqlite.SqliteDatabase]("sqlite_db") -# Context key for codebase root directory (provided in lifespan) -CODEBASE_DIR = coco.ContextKey[FilePath]("codebase_dir") + embedder = instance + return instance @dataclass diff --git a/tests/conftest.py b/tests/conftest.py index faabf0a..1cf43bd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,14 +2,11 @@ import os import tempfile -from collections.abc import AsyncIterator from pathlib import Path import pytest -import pytest_asyncio # === Environment setup BEFORE any cocoindex_code imports === -# Create test directory and set it BEFORE any module imports _TEST_DIR = Path(tempfile.mkdtemp(prefix="cocoindex_test_")) os.environ["COCOINDEX_CODE_ROOT_PATH"] = str(_TEST_DIR) @@ -18,17 +15,3 @@ def test_codebase_root() -> Path: """Session-scoped test codebase directory.""" return _TEST_DIR - - -@pytest_asyncio.fixture(scope="session", loop_scope="session") -async def coco_runtime() -> AsyncIterator[None]: - """ - Set up CocoIndex project for the entire test session. - - Uses session-scoped event loop to ensure CocoIndex environment - persists across all tests. - """ - from cocoindex_code.project import default_project - - await default_project() - yield diff --git a/tests/test_backward_compat.py b/tests/test_backward_compat.py new file mode 100644 index 0000000..96b4d67 --- /dev/null +++ b/tests/test_backward_compat.py @@ -0,0 +1,119 @@ +"""Tests for backward-compatible entry point settings migration.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from cocoindex_code.server import _convert_embedding_model +from cocoindex_code.settings import ( + EmbeddingSettings, + LanguageOverride, + UserSettings, + default_project_settings, + default_user_settings, + load_project_settings, + load_user_settings, + save_project_settings, + save_user_settings, +) + + +def test_legacy_entry_creates_settings_from_env_vars( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Settings migration from env vars should produce correct YAML values.""" + monkeypatch.setattr( + "cocoindex_code.settings.user_settings_dir", + lambda: tmp_path / "user", + ) + monkeypatch.setattr( + "cocoindex_code.settings.user_settings_path", + lambda: tmp_path / "user" / "settings.yml", + ) + + # Simulate env vars + us = default_user_settings() + provider, model = _convert_embedding_model("sbert/sentence-transformers/all-MiniLM-L6-v2") + us.embedding = EmbeddingSettings(provider=provider, model=model, device="cpu") + save_user_settings(us) + + loaded = load_user_settings() + assert loaded.embedding.provider == "sentence-transformers" + assert "all-MiniLM-L6-v2" in loaded.embedding.model + assert loaded.embedding.device == "cpu" + + +def test_legacy_entry_respects_existing_settings( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Pre-existing settings files should not be overwritten.""" + monkeypatch.setattr( + "cocoindex_code.settings.user_settings_dir", + lambda: tmp_path / "user", + ) + monkeypatch.setattr( + "cocoindex_code.settings.user_settings_path", + lambda: tmp_path / "user" / "settings.yml", + ) + + custom = UserSettings( + embedding=EmbeddingSettings(provider="litellm", model="custom/model"), + ) + save_user_settings(custom) + + # Loading should return existing settings + loaded = load_user_settings() + assert loaded.embedding.provider == "litellm" + assert loaded.embedding.model == "custom/model" + + +def test_legacy_embedding_model_conversion() -> None: + """Old sbert/ prefix and litellm-style model names should be converted correctly.""" + provider, model = _convert_embedding_model("sbert/sentence-transformers/all-MiniLM-L6-v2") + assert provider == "sentence-transformers" + assert model == "sentence-transformers/all-MiniLM-L6-v2" + + provider, model = _convert_embedding_model("gemini/text-embedding-004") + assert provider == "litellm" + assert model == "gemini/text-embedding-004" + + +def test_legacy_extra_extensions_conversion(tmp_path: Path) -> None: + """COCOINDEX_CODE_EXTRA_EXTENSIONS should produce language_overrides and include_patterns.""" + ps = default_project_settings() + + # Simulate: "inc:php,yaml,toml" + raw = "inc:php,yaml,toml" + for token in raw.split(","): + token = token.strip() + if ":" in token: + ext, lang = token.split(":", 1) + ps.include_patterns.append(f"**/*.{ext.strip()}") + ps.language_overrides.append(LanguageOverride(ext=ext.strip(), lang=lang.strip())) + else: + ps.include_patterns.append(f"**/*.{token}") + + save_project_settings(tmp_path, ps) + loaded = load_project_settings(tmp_path) + + assert any(lo.ext == "inc" and lo.lang == "php" for lo in loaded.language_overrides) + assert "**/*.inc" in loaded.include_patterns + assert "**/*.yaml" in loaded.include_patterns + assert "**/*.toml" in loaded.include_patterns + + +def test_legacy_excluded_patterns_conversion(tmp_path: Path) -> None: + """COCOINDEX_CODE_EXCLUDED_PATTERNS should be appended to default exclude_patterns.""" + + ps = default_project_settings() + extra = ["**/migration.sql"] + ps.exclude_patterns.extend(extra) + + save_project_settings(tmp_path, ps) + loaded = load_project_settings(tmp_path) + + # Should have defaults + extra + assert "**/migration.sql" in loaded.exclude_patterns + assert "**/.*" in loaded.exclude_patterns # default diff --git a/tests/test_cli_helpers.py b/tests/test_cli_helpers.py new file mode 100644 index 0000000..13f33a1 --- /dev/null +++ b/tests/test_cli_helpers.py @@ -0,0 +1,64 @@ +"""Unit tests for shared CLI helpers.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from cocoindex_code.cli import require_project_root, resolve_default_path + + +def test_require_project_root_success(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + project = tmp_path / "project" + (project / ".cocoindex_code").mkdir(parents=True) + (project / ".cocoindex_code" / "settings.yml").write_text("include_patterns: []") + subdir = project / "src" + subdir.mkdir() + monkeypatch.chdir(subdir) + assert require_project_root() == project + + +def test_require_project_root_exits_when_not_initialized( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + standalone = tmp_path / "standalone" + standalone.mkdir() + monkeypatch.chdir(standalone) + from click.exceptions import Exit + + with pytest.raises(Exit): + require_project_root() + + +def test_resolve_default_path_from_subdirectory( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + project_root = tmp_path / "project" + subdir = project_root / "src" / "lib" + subdir.mkdir(parents=True) + monkeypatch.chdir(subdir) + result = resolve_default_path(project_root) + assert result == "src/lib/*" + + +def test_resolve_default_path_from_project_root( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + project_root = tmp_path / "project" + project_root.mkdir() + monkeypatch.chdir(project_root) + result = resolve_default_path(project_root) + assert result is None + + +def test_resolve_default_path_outside_project( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + project_root = tmp_path / "project" + project_root.mkdir() + other = tmp_path / "other" + other.mkdir() + monkeypatch.chdir(other) + result = resolve_default_path(project_root) + assert result is None diff --git a/tests/test_client.py b/tests/test_client.py new file mode 100644 index 0000000..0f27b88 --- /dev/null +++ b/tests/test_client.py @@ -0,0 +1,113 @@ +"""Tests for DaemonClient and ensure_daemon().""" + +from __future__ import annotations + +import sys +import tempfile +import threading +import time +import uuid +from collections.abc import Iterator +from multiprocessing.connection import Client +from pathlib import Path + +import pytest + +from cocoindex_code._version import __version__ +from cocoindex_code.client import DaemonClient +from cocoindex_code.daemon import _connection_family +from cocoindex_code.protocol import ( + HandshakeRequest, + StopRequest, + encode_request, +) +from cocoindex_code.settings import ( + default_user_settings, + save_user_settings, +) + + +@pytest.fixture() +def daemon_env(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> tuple[Path, str, Path]: + """Set up daemon environment for client tests.""" + user_dir = tmp_path / "user_home" / ".cocoindex_code" + user_dir.mkdir(parents=True) + + if sys.platform == "win32": + sock_path = rf"\\.\pipe\ccc_client_{uuid.uuid4().hex[:12]}" + else: + sock_dir = Path(tempfile.mkdtemp(prefix="ccc_client_")) + sock_path = str(sock_dir / "d.sock") + pid_path = user_dir / "daemon.pid" + + monkeypatch.setattr("cocoindex_code.settings.user_settings_dir", lambda: user_dir) + monkeypatch.setattr( + "cocoindex_code.settings.user_settings_path", + lambda: user_dir / "settings.yml", + ) + save_user_settings(default_user_settings()) + + # Override socket/pid paths for short AF_UNIX paths + monkeypatch.setattr("cocoindex_code.daemon.daemon_socket_path", lambda: sock_path) + monkeypatch.setattr("cocoindex_code.client.daemon_socket_path", lambda: sock_path) + monkeypatch.setattr("cocoindex_code.client.daemon_pid_path", lambda: pid_path) + + return user_dir, sock_path, pid_path + + +@pytest.fixture() +def daemon_thread(daemon_env: tuple[Path, str, Path]) -> Iterator[str]: + """Start daemon in thread, yield socket path.""" + user_dir, sock_path, pid_path = daemon_env + + from cocoindex_code.daemon import run_daemon + + thread = threading.Thread(target=run_daemon, daemon=True) + thread.start() + + # Wait for socket/pipe + import os + + deadline = time.monotonic() + 30 + while time.monotonic() < deadline: + if os.path.exists(sock_path): + break + time.sleep(0.2) + + yield sock_path + + try: + conn = Client(sock_path, family=_connection_family()) + conn.send_bytes(encode_request(HandshakeRequest(version=__version__))) + conn.recv_bytes() + conn.send_bytes(encode_request(StopRequest())) + conn.recv_bytes() + conn.close() + except Exception: + pass + thread.join(timeout=5) + + +def test_client_connect_to_running_daemon(daemon_thread: str) -> None: + client = DaemonClient.connect() + resp = client.handshake() + assert resp.ok is True + client.close() + + +def test_client_connect_refuses_when_no_daemon( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + sock_dir = Path(tempfile.mkdtemp(prefix="ccc_noconn_")) + sock_path = str(sock_dir / "d.sock") + monkeypatch.setattr("cocoindex_code.client.daemon_socket_path", lambda: sock_path) + + with pytest.raises(ConnectionRefusedError): + DaemonClient.connect() + + +def test_client_close_is_idempotent(daemon_thread: str) -> None: + client = DaemonClient.connect() + client.handshake() + client.close() + client.close() # should not raise diff --git a/tests/test_daemon.py b/tests/test_daemon.py new file mode 100644 index 0000000..294e40c --- /dev/null +++ b/tests/test_daemon.py @@ -0,0 +1,162 @@ +"""Integration tests for the daemon process. + +Runs the daemon in a background thread with a shared embedder. +Uses a session-scoped fixture to avoid re-creating the daemon for each test. +""" + +from __future__ import annotations + +import os +import tempfile +import threading +import time +from collections.abc import Iterator +from multiprocessing.connection import Client, Connection +from pathlib import Path + +import pytest + +from cocoindex_code._version import __version__ +from cocoindex_code.daemon import _connection_family +from cocoindex_code.protocol import ( + DaemonStatusRequest, + HandshakeRequest, + IndexRequest, + ProjectStatusRequest, + Response, + SearchRequest, + decode_response, + encode_request, +) +from cocoindex_code.settings import ( + default_project_settings, + default_user_settings, + save_project_settings, + save_user_settings, +) + +SAMPLE_MAIN_PY = '''\ +"""Main module.""" + +def calculate_fibonacci(n: int) -> int: + """Calculate the nth Fibonacci number.""" + if n <= 1: + return n + return calculate_fibonacci(n - 1) + calculate_fibonacci(n - 2) +''' + + +@pytest.fixture(scope="session") +def daemon_sock() -> Iterator[str]: + """Start a daemon once per session and return the socket path.""" + import cocoindex_code.daemon as dm + from cocoindex_code.settings import EmbeddingSettings + from cocoindex_code.shared import create_embedder + from cocoindex_code.shared import embedder as shared_emb + + emb = shared_emb if shared_emb is not None else create_embedder(EmbeddingSettings()) + + # Use a short path to stay within AF_UNIX limit + user_dir = Path(tempfile.mkdtemp(prefix="ccc_d_")) + user_dir.mkdir(parents=True, exist_ok=True) + + # Use COCOINDEX_CODE_DIR env var for isolation instead of direct module patching. + # Direct patching of dm.user_settings_dir leaks across test modules and causes + # stop_daemon() in other fixtures to read the wrong PID file (pytest's own PID). + old_env = os.environ.get("COCOINDEX_CODE_DIR") + os.environ["COCOINDEX_CODE_DIR"] = str(user_dir) + + # Patch create_embedder to reuse the already-loaded embedder (performance) + _orig_create_embedder = dm.create_embedder # type: ignore[attr-defined] + dm.create_embedder = lambda settings: emb # type: ignore[attr-defined] + + save_user_settings(default_user_settings()) + + thread = threading.Thread(target=dm.run_daemon, daemon=True) + thread.start() + + sock_path = dm.daemon_socket_path() + + deadline = time.monotonic() + 20 + while time.monotonic() < deadline: + if os.path.exists(sock_path): + break + time.sleep(0.1) + else: + raise TimeoutError("Daemon did not start") + + yield sock_path + + # Restore patches and env var + dm.create_embedder = _orig_create_embedder # type: ignore[attr-defined] + if old_env is None: + os.environ.pop("COCOINDEX_CODE_DIR", None) + else: + os.environ["COCOINDEX_CODE_DIR"] = old_env + + +@pytest.fixture(scope="session") +def daemon_project(daemon_sock: str) -> str: + """Create and index a project once for the session. Returns project_root str.""" + project = Path(tempfile.mkdtemp(prefix="ccc_proj_")) + save_project_settings(project, default_project_settings()) + (project / "main.py").write_text(SAMPLE_MAIN_PY) + + conn = Client(daemon_sock, family=_connection_family()) + conn.send_bytes(encode_request(HandshakeRequest(version=__version__))) + decode_response(conn.recv_bytes()) + conn.send_bytes(encode_request(IndexRequest(project_root=str(project)))) + decode_response(conn.recv_bytes()) + conn.close() + + return str(project) + + +def _connect_and_handshake(sock_path: str) -> tuple[Connection, Response]: + conn = Client(sock_path, family=_connection_family()) + conn.send_bytes(encode_request(HandshakeRequest(version=__version__))) + resp = decode_response(conn.recv_bytes()) + return conn, resp + + +def test_daemon_starts_and_accepts_handshake(daemon_sock: str) -> None: + conn, resp = _connect_and_handshake(daemon_sock) + assert resp.ok is True # type: ignore[union-attr] + assert resp.daemon_version == __version__ # type: ignore[union-attr] + conn.close() + + +def test_daemon_rejects_version_mismatch(daemon_sock: str) -> None: + conn = Client(daemon_sock, family=_connection_family()) + conn.send_bytes(encode_request(HandshakeRequest(version="0.0.0-fake"))) + resp = decode_response(conn.recv_bytes()) + assert resp.ok is False # type: ignore[union-attr] + conn.close() + + +def test_daemon_status(daemon_sock: str) -> None: + conn, _ = _connect_and_handshake(daemon_sock) + conn.send_bytes(encode_request(DaemonStatusRequest())) + resp = decode_response(conn.recv_bytes()) + assert resp.version == __version__ # type: ignore[union-attr] + assert resp.uptime_seconds > 0 # type: ignore[union-attr] + conn.close() + + +def test_daemon_project_status_after_index(daemon_sock: str, daemon_project: str) -> None: + conn, _ = _connect_and_handshake(daemon_sock) + conn.send_bytes(encode_request(ProjectStatusRequest(project_root=daemon_project))) + resp = decode_response(conn.recv_bytes()) + assert resp.total_chunks > 0 # type: ignore[union-attr] + assert resp.total_files > 0 # type: ignore[union-attr] + conn.close() + + +def test_daemon_search_after_index(daemon_sock: str, daemon_project: str) -> None: + conn, _ = _connect_and_handshake(daemon_sock) + conn.send_bytes(encode_request(SearchRequest(project_root=daemon_project, query="fibonacci"))) + resp = decode_response(conn.recv_bytes()) + assert resp.success is True # type: ignore[union-attr] + assert len(resp.results) > 0 # type: ignore[union-attr] + assert "main.py" in resp.results[0].file_path # type: ignore[union-attr] + conn.close() diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 8071831..2094ff6 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -1,17 +1,24 @@ -"""Tests for cocoindex-code: config discovery and end-to-end indexing/querying.""" +"""End-to-end tests exercising the full CLI → daemon → index → search flow. -import shutil +Each test uses a real daemon subprocess (via COCOINDEX_CODE_DIR env var) +and the actual CLI commands through typer's CliRunner. +""" + +from __future__ import annotations + +import os +import tempfile +from collections.abc import Iterator from pathlib import Path import pytest +from typer.testing import CliRunner -from cocoindex_code.config import _discover_codebase_root -from cocoindex_code.project import default_project -from cocoindex_code.query import query_codebase +from cocoindex_code.cli import app +from cocoindex_code.client import stop_daemon +from cocoindex_code.settings import find_parent_with_marker -pytest_plugins = ("pytest_asyncio",) - -# === Sample codebase files === +runner = CliRunner() SAMPLE_MAIN_PY = '''\ """Main application entry point.""" @@ -69,37 +76,6 @@ def execute_query(self, sql: str) -> list[dict]: return [] ''' -SAMPLE_ML_MODEL_PY = '''\ -"""Machine learning model implementation.""" - -class NeuralNetwork: - """A simple neural network for classification.""" - - def __init__(self, layers: list[int]): - self.layers = layers - self.weights = [] - - def train(self, data: list, labels: list) -> None: - """Train the neural network on provided data.""" - pass - - def predict(self, input_data: list) -> float: - """Make a prediction using the trained model.""" - return 0.0 -''' - -SAMPLE_UTILS_AUTH_PY = '''\ -"""Utility functions for authentication.""" - -def authenticate_user(username: str, password: str) -> bool: - """Authenticate a user with username and password.""" - return username == "admin" and password == "secret" - -def create_login_session(user_id: int) -> str: - """Create a new login session for the authenticated user.""" - return f"session_{user_id}" -''' - SAMPLE_APP_JS = """\ /** Express web application server. */ @@ -111,299 +87,154 @@ def create_login_session(user_id: int) -> str: res.json({ message: `Hello, ${name}!` }); } -function startServer(port) { - app.get('/api/greet', handleRequest); - app.listen(port, () => console.log(`Server running on port ${port}`)); -} - -module.exports = { handleRequest, startServer }; -""" - -SAMPLE_HELPERS_TS = """\ -/** TypeScript helper utilities for data transformation. */ - -interface DataRecord { - id: number; - value: string; - timestamp: Date; -} - -function transformRecords(records: DataRecord[]): Map { - const result = new Map(); - for (const record of records) { - result.set(record.id, record.value.toUpperCase()); - } - return result; -} - -function filterByTimestamp(records: DataRecord[], after: Date): DataRecord[] { - return records.filter(r => r.timestamp > after); -} - -export { transformRecords, filterByTimestamp, DataRecord }; +module.exports = { handleRequest }; """ -# === Helper functions === - - -def clear_codebase(codebase: Path) -> None: - """Remove all files from the codebase (except .cocoindex_code).""" - for item in codebase.iterdir(): - if item.name != ".cocoindex_code": - if item.is_dir(): - shutil.rmtree(item) - else: - item.unlink() - - -def setup_base_codebase(codebase: Path) -> None: - """Set up the base codebase files.""" - clear_codebase(codebase) - (codebase / "main.py").write_text(SAMPLE_MAIN_PY) - (codebase / "utils.py").write_text(SAMPLE_UTILS_PY) - - lib_dir = codebase / "lib" - lib_dir.mkdir(exist_ok=True) +@pytest.fixture(scope="module") +def e2e_env() -> Iterator[Path]: + """Set up a temp project dir with sample files and a daemon subprocess. + + Uses COCOINDEX_CODE_DIR to redirect the daemon to a temp directory, + so the subprocess picks up the right paths. + """ + base_dir = Path(tempfile.mkdtemp(prefix="ccc_e2e_")) + project_dir = base_dir / "project" + project_dir.mkdir() + (project_dir / "main.py").write_text(SAMPLE_MAIN_PY) + (project_dir / "utils.py").write_text(SAMPLE_UTILS_PY) + lib_dir = project_dir / "lib" + lib_dir.mkdir() (lib_dir / "database.py").write_text(SAMPLE_DATABASE_PY) - -def setup_multi_lang_codebase(codebase: Path) -> None: - """Set up a codebase with Python, JavaScript, and TypeScript files.""" - clear_codebase(codebase) - (codebase / "main.py").write_text(SAMPLE_MAIN_PY) - (codebase / "utils.py").write_text(SAMPLE_UTILS_PY) - - lib_dir = codebase / "lib" - lib_dir.mkdir(exist_ok=True) - (lib_dir / "database.py").write_text(SAMPLE_DATABASE_PY) - - (codebase / "app.js").write_text(SAMPLE_APP_JS) - (lib_dir / "helpers.ts").write_text(SAMPLE_HELPERS_TS) - - -# === Tests === - - -class TestEndToEnd: - """End-to-end tests for the complete index-query workflow.""" - - @pytest.mark.asyncio(loop_scope="session") - async def test_index_and_query_codebase( - self, test_codebase_root: Path, coco_runtime: None - ) -> None: - """Should index a codebase and return relevant query results.""" - setup_base_codebase(test_codebase_root) - await (await default_project()).update_index() - - # Verify index was created - index_dir = test_codebase_root / ".cocoindex_code" - assert index_dir.exists() - assert (index_dir / "target_sqlite.db").exists() - - # Query for Fibonacci - results = await query_codebase("fibonacci calculation") - assert len(results) > 0 - assert "main.py" in results[0].file_path - assert "fibonacci" in results[0].content.lower() - - # Query for database connection - results = await query_codebase("database connection") - assert len(results) > 0 - assert "database.py" in results[0].file_path - - @pytest.mark.asyncio(loop_scope="session") - async def test_incremental_update_add_file( - self, test_codebase_root: Path, coco_runtime: None - ) -> None: - """Should reflect newly added files after re-indexing.""" - setup_base_codebase(test_codebase_root) - await (await default_project()).update_index() - - # Query for ML content - should not find it - results = await query_codebase("machine learning neural network") - has_ml = any( - "neural" in r.content.lower() or "machine learning" in r.content.lower() - for r in results + old_env = os.environ.get("COCOINDEX_CODE_DIR") + os.environ["COCOINDEX_CODE_DIR"] = str(base_dir) + + try: + yield project_dir + finally: + stop_daemon() + if old_env is None: + os.environ.pop("COCOINDEX_CODE_DIR", None) + else: + os.environ["COCOINDEX_CODE_DIR"] = old_env + + +class TestCLIEndToEnd: + """Tests that exercise ccc init → index → search → status via the real CLI.""" + + def test_init_creates_settings(self, e2e_env: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.chdir(e2e_env) + result = runner.invoke(app, ["init"], catch_exceptions=False) + assert result.exit_code == 0, result.output + assert (e2e_env / ".cocoindex_code" / "settings.yml").exists() + assert "Created project settings" in result.output or "already initialized" in result.output + + def test_init_already_initialized(self, e2e_env: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.chdir(e2e_env) + result = runner.invoke(app, ["init"], catch_exceptions=False) + assert result.exit_code == 0 + assert "already initialized" in result.output + + def test_index(self, e2e_env: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.chdir(e2e_env) + result = runner.invoke(app, ["index"], catch_exceptions=False) + assert result.exit_code == 0, result.output + assert "Chunks:" in result.output + assert "Files:" in result.output + + def test_status(self, e2e_env: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.chdir(e2e_env) + result = runner.invoke(app, ["status"], catch_exceptions=False) + assert result.exit_code == 0, result.output + assert "Chunks:" in result.output + + def test_search_fibonacci(self, e2e_env: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.chdir(e2e_env) + result = runner.invoke(app, ["search", "fibonacci", "calculation"], catch_exceptions=False) + assert result.exit_code == 0, result.output + assert "main.py" in result.output + + def test_search_database(self, e2e_env: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.chdir(e2e_env) + result = runner.invoke(app, ["search", "database", "connection"], catch_exceptions=False) + assert result.exit_code == 0, result.output + assert "database.py" in result.output + + def test_search_with_lang_filter(self, e2e_env: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.chdir(e2e_env) + result = runner.invoke( + app, ["search", "function", "--lang", "python"], catch_exceptions=False ) - assert not has_ml or results[0].score < 0.5 - - # Add a new ML file - (test_codebase_root / "ml_model.py").write_text(SAMPLE_ML_MODEL_PY) - - # Re-index and query again - await (await default_project()).update_index() - results = await query_codebase("neural network machine learning") - - assert len(results) > 0 - assert "ml_model.py" in results[0].file_path - - @pytest.mark.asyncio(loop_scope="session") - async def test_incremental_update_modify_file( - self, test_codebase_root: Path, coco_runtime: None - ) -> None: - """Should reflect file modifications after re-indexing.""" - setup_base_codebase(test_codebase_root) - await (await default_project()).update_index() - - # Modify utils.py to add authentication - (test_codebase_root / "utils.py").write_text(SAMPLE_UTILS_AUTH_PY) + assert result.exit_code == 0, result.output + assert "python" in result.output.lower() - # Re-index and query for authentication - await (await default_project()).update_index() - results = await query_codebase("user authentication login") + def test_search_with_path_filter(self, e2e_env: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.chdir(e2e_env) + result = runner.invoke( + app, ["search", "function", "--path", "lib/*"], catch_exceptions=False + ) + assert result.exit_code == 0, result.output + assert "lib/" in result.output + + def test_search_no_results(self, e2e_env: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.chdir(e2e_env) + result = runner.invoke( + app, + ["search", "xyzzy_nonexistent_symbol_12345"], + catch_exceptions=False, + ) + assert result.exit_code == 0 - assert len(results) > 0 - assert "utils.py" in results[0].file_path - content_lower = results[0].content.lower() - assert "authenticate" in content_lower or "login" in content_lower + def test_daemon_status(self, e2e_env: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.chdir(e2e_env) + result = runner.invoke(app, ["daemon", "status"], catch_exceptions=False) + assert result.exit_code == 0, result.output + assert "Daemon version:" in result.output - @pytest.mark.asyncio(loop_scope="session") - async def test_incremental_update_delete_file( - self, test_codebase_root: Path, coco_runtime: None + def test_incremental_index_new_file( + self, e2e_env: Path, monkeypatch: pytest.MonkeyPatch ) -> None: - """Should no longer return results from deleted files after re-indexing.""" - setup_base_codebase(test_codebase_root) - await (await default_project()).update_index() + """Adding a file and re-indexing should make it searchable.""" + monkeypatch.chdir(e2e_env) + (e2e_env / "app.js").write_text(SAMPLE_APP_JS) - # Query for database - should find it - results = await query_codebase("database connection execute query") - assert any("database.py" in r.file_path for r in results) + result = runner.invoke(app, ["index"], catch_exceptions=False) + assert result.exit_code == 0 - # Delete the database file - (test_codebase_root / "lib" / "database.py").unlink() + result = runner.invoke(app, ["search", "handleRequest"], catch_exceptions=False) + assert result.exit_code == 0 + assert "app.js" in result.output - # Re-index and query again - should no longer find database.py - await (await default_project()).update_index() - results = await query_codebase("database connection execute query") - assert not any("database.py" in r.file_path for r in results) + def test_not_initialized_errors(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Running commands outside an initialized project should fail.""" + standalone = tmp_path / "standalone" + standalone.mkdir() + monkeypatch.chdir(standalone) + result = runner.invoke(app, ["index"]) + assert result.exit_code != 0 + assert "ccc init" in result.output class TestCodebaseRootDiscovery: - """Tests for codebase root discovery logic - stateless, no global config needed.""" + """Tests for find_parent_with_marker helper.""" - def test_prefers_cocoindex_code_over_git( - self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Should prefer .cocoindex_code directory over .git when both exist.""" - # Create both markers in parent + def test_prefers_cocoindex_code_over_git(self, tmp_path: Path) -> None: parent = tmp_path / "project" parent.mkdir() (parent / ".cocoindex_code").mkdir() (parent / ".git").mkdir() - - # Run from a subdirectory subdir = parent / "src" / "lib" subdir.mkdir(parents=True) + assert find_parent_with_marker(subdir) == parent - monkeypatch.chdir(subdir) - result = _discover_codebase_root() - assert result == parent - - def test_finds_git_in_parent_hierarchy( - self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Should find .git in parent when deeply nested.""" - # Create .git at root level + def test_finds_git_in_parent_hierarchy(self, tmp_path: Path) -> None: (tmp_path / ".git").mkdir() - - # Create deep nesting deep_dir = tmp_path / "a" / "b" / "c" / "d" / "e" deep_dir.mkdir(parents=True) + assert find_parent_with_marker(deep_dir) == tmp_path - monkeypatch.chdir(deep_dir) - result = _discover_codebase_root() - assert result == tmp_path - - def test_falls_back_to_cwd_when_no_markers( - self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch - ) -> None: - """Should fall back to cwd when no .git or .cocoindex_code found.""" - # Create empty directory with no markers + def test_falls_back_to_none_when_no_markers(self, tmp_path: Path) -> None: empty_dir = tmp_path / "standalone" empty_dir.mkdir() - - monkeypatch.chdir(empty_dir) - result = _discover_codebase_root() - assert result == empty_dir - - -class TestSearchFilters: - """End-to-end tests for language and file_path search filters.""" - - @pytest.mark.asyncio(loop_scope="session") - async def test_filter_by_language(self, test_codebase_root: Path, coco_runtime: None) -> None: - """Should return only results matching the specified language.""" - setup_multi_lang_codebase(test_codebase_root) - await (await default_project()).update_index() - - results = await query_codebase("function", limit=50, languages=["python"]) - assert len(results) > 0 - assert all(r.language == "python" for r in results) - - @pytest.mark.asyncio(loop_scope="session") - async def test_filter_by_language_multiple( - self, test_codebase_root: Path, coco_runtime: None - ) -> None: - """Should return results matching any of the specified languages.""" - setup_multi_lang_codebase(test_codebase_root) - await (await default_project()).update_index() - - results = await query_codebase("function", limit=50, languages=["python", "javascript"]) - assert len(results) > 0 - languages_found = {r.language for r in results} - assert languages_found <= {"python", "javascript"} - # Should find at least one of each since both have relevant content - assert "python" in languages_found - assert "javascript" in languages_found - - @pytest.mark.asyncio(loop_scope="session") - async def test_filter_by_file_path_glob( - self, test_codebase_root: Path, coco_runtime: None - ) -> None: - """Should return only results matching the file path glob pattern.""" - setup_multi_lang_codebase(test_codebase_root) - await (await default_project()).update_index() - - results = await query_codebase("function", limit=50, paths=["lib/*"]) - assert len(results) > 0 - assert all(r.file_path.startswith("lib/") for r in results) - - @pytest.mark.asyncio(loop_scope="session") - async def test_filter_by_file_path_wildcard_extension( - self, test_codebase_root: Path, coco_runtime: None - ) -> None: - """Should filter by file extension using glob wildcard.""" - setup_multi_lang_codebase(test_codebase_root) - await (await default_project()).update_index() - - results = await query_codebase("function", limit=50, paths=["*.js"]) - assert len(results) > 0 - assert all(r.file_path.endswith(".js") for r in results) - - @pytest.mark.asyncio(loop_scope="session") - async def test_filter_by_both_language_and_file_path( - self, test_codebase_root: Path, coco_runtime: None - ) -> None: - """Should apply both language and file path filters together.""" - setup_multi_lang_codebase(test_codebase_root) - await (await default_project()).update_index() - - # Filter for Python files under lib/ - results = await query_codebase("function", limit=50, languages=["python"], paths=["lib/*"]) - assert len(results) > 0 - assert all(r.language == "python" for r in results) - assert all(r.file_path.startswith("lib/") for r in results) - - @pytest.mark.asyncio(loop_scope="session") - async def test_no_filter_returns_all_languages( - self, test_codebase_root: Path, coco_runtime: None - ) -> None: - """Should return results from all languages when no filter is applied.""" - setup_multi_lang_codebase(test_codebase_root) - await (await default_project()).update_index() - - results = await query_codebase("function", limit=50) - languages_found = {r.language for r in results} - # Should find at least Python and JavaScript - assert len(languages_found) >= 2 + assert find_parent_with_marker(empty_dir) is None diff --git a/tests/test_e2e_daemon.py b/tests/test_e2e_daemon.py new file mode 100644 index 0000000..042db1e --- /dev/null +++ b/tests/test_e2e_daemon.py @@ -0,0 +1,127 @@ +"""End-to-end tests for the CLI → daemon subprocess flow. + +These tests start a real daemon subprocess via ``start_daemon()`` and interact +with it through ``DaemonClient``, mirroring how ``ccc index`` / ``ccc search`` +actually work. +""" + +from __future__ import annotations + +import os +import tempfile +import time +from collections.abc import Iterator +from pathlib import Path + +import pytest + +from cocoindex_code._version import __version__ +from cocoindex_code.client import DaemonClient, start_daemon, stop_daemon +from cocoindex_code.daemon import daemon_socket_path +from cocoindex_code.settings import ( + default_project_settings, + default_user_settings, + save_project_settings, + save_user_settings, +) + +SAMPLE_PY = '''\ +"""Sample module.""" + +def calculate_fibonacci(n: int) -> int: + """Calculate the nth Fibonacci number.""" + if n <= 1: + return n + return calculate_fibonacci(n - 1) + calculate_fibonacci(n - 2) +''' + + +@pytest.fixture(scope="module") +def e2e_daemon() -> Iterator[tuple[str, Path]]: + """Start a real daemon subprocess and return (sock_path, project_dir). + + Uses COCOINDEX_CODE_DIR env var so the subprocess uses the temp directory. + """ + # Use a short temp dir to stay within AF_UNIX path limit + base_dir = Path(tempfile.mkdtemp(prefix="ccc_e2e_")) + project_dir = base_dir / "proj" + project_dir.mkdir() + (project_dir / "main.py").write_text(SAMPLE_PY) + + # Set env var BEFORE calling any daemon/settings functions + old_env = os.environ.get("COCOINDEX_CODE_DIR") + os.environ["COCOINDEX_CODE_DIR"] = str(base_dir) + + try: + save_user_settings(default_user_settings()) + save_project_settings(project_dir, default_project_settings()) + + start_daemon() + + sock_path = daemon_socket_path() + deadline = time.monotonic() + 20 + while time.monotonic() < deadline: + if os.path.exists(sock_path): + break + time.sleep(0.2) + else: + log = base_dir / "daemon.log" + log_content = log.read_text() if log.exists() else "(no log)" + raise TimeoutError(f"Daemon did not start.\nLog:\n{log_content}") + + yield sock_path, project_dir + finally: + stop_daemon() + if old_env is None: + os.environ.pop("COCOINDEX_CODE_DIR", None) + else: + os.environ["COCOINDEX_CODE_DIR"] = old_env + + +def test_daemon_subprocess_starts(e2e_daemon: tuple[str, Path]) -> None: + """The daemon should be reachable via DaemonClient after start_daemon().""" + client = DaemonClient.connect() + resp = client.handshake() + assert resp.ok + assert resp.daemon_version == __version__ + client.close() + + +def test_index_and_search_via_client(e2e_daemon: tuple[str, Path]) -> None: + """Index a project and search via the client, same as ccc index / ccc search.""" + _, project_dir = e2e_daemon + + client = DaemonClient.connect() + client.handshake() + + resp = client.index(str(project_dir)) + assert resp.success + + status = client.project_status(str(project_dir)) + assert status.total_chunks > 0 + assert status.total_files > 0 + + search_resp = client.search(str(project_dir), query="fibonacci") + assert search_resp.success + assert len(search_resp.results) > 0 + assert "main.py" in search_resp.results[0].file_path + + client.close() + + +def test_daemon_survives_client_disconnect(e2e_daemon: tuple[str, Path]) -> None: + """Daemon should keep running after a client disconnects.""" + _, project_dir = e2e_daemon + + c1 = DaemonClient.connect() + c1.handshake() + c1.search(str(project_dir), query="fibonacci") + c1.close() + + c2 = DaemonClient.connect() + resp = c2.handshake() + assert resp.ok + search_resp = c2.search(str(project_dir), query="fibonacci") + assert search_resp.success + assert len(search_resp.results) > 0 + c2.close() diff --git a/tests/test_protocol.py b/tests/test_protocol.py new file mode 100644 index 0000000..e2b8e18 --- /dev/null +++ b/tests/test_protocol.py @@ -0,0 +1,156 @@ +"""Unit tests for the protocol module.""" + +from __future__ import annotations + +from cocoindex_code.protocol import ( + DaemonProjectInfo, + DaemonStatusRequest, + DaemonStatusResponse, + ErrorResponse, + HandshakeRequest, + IndexRequest, + IndexResponse, + ProjectStatusRequest, + ProjectStatusResponse, + Request, + Response, + SearchRequest, + SearchResponse, + SearchResult, + StopRequest, + StopResponse, + decode_request, + decode_response, + encode_request, + encode_response, +) + + +def test_encode_decode_handshake_request() -> None: + req = HandshakeRequest(version="1.0.0") + data = encode_request(req) + decoded = decode_request(data) + assert isinstance(decoded, HandshakeRequest) + assert decoded.version == "1.0.0" + + +def test_encode_decode_search_request_with_defaults() -> None: + req = SearchRequest(project_root="/tmp", query="test") + data = encode_request(req) + decoded = decode_request(data) + assert isinstance(decoded, SearchRequest) + assert decoded.languages is None + assert decoded.limit == 5 + assert decoded.offset == 0 + assert decoded.refresh is False + + +def test_encode_decode_search_request_with_all_fields() -> None: + req = SearchRequest( + project_root="/tmp/proj", + query="hello world", + languages=["python", "rust"], + paths=["src/*"], + limit=20, + offset=5, + refresh=True, + ) + data = encode_request(req) + decoded = decode_request(data) + assert isinstance(decoded, SearchRequest) + assert decoded.project_root == "/tmp/proj" + assert decoded.query == "hello world" + assert decoded.languages == ["python", "rust"] + assert decoded.paths == ["src/*"] + assert decoded.limit == 20 + assert decoded.offset == 5 + assert decoded.refresh is True + + +def test_encode_decode_search_response_with_results() -> None: + resp = SearchResponse( + success=True, + results=[ + SearchResult( + file_path="main.py", + language="python", + content="def foo(): pass", + start_line=1, + end_line=1, + score=0.95, + ), + ], + total_returned=1, + offset=0, + ) + data = encode_response(resp) + decoded = decode_response(data) + assert isinstance(decoded, SearchResponse) + assert decoded.success is True + assert len(decoded.results) == 1 + assert decoded.results[0].file_path == "main.py" + assert decoded.results[0].score == 0.95 + + +def test_encode_decode_error_response() -> None: + resp = ErrorResponse(message="something failed") + data = encode_response(resp) + decoded = decode_response(data) + assert isinstance(decoded, ErrorResponse) + assert decoded.message == "something failed" + + +def test_encode_decode_daemon_status_response() -> None: + resp = DaemonStatusResponse( + version="1.0.0", + uptime_seconds=42.5, + projects=[ + DaemonProjectInfo(project_root="/tmp/proj", indexing=False), + ], + ) + data = encode_response(resp) + decoded = decode_response(data) + assert isinstance(decoded, DaemonStatusResponse) + assert decoded.version == "1.0.0" + assert decoded.uptime_seconds == 42.5 + assert len(decoded.projects) == 1 + assert decoded.projects[0].project_root == "/tmp/proj" + assert decoded.projects[0].indexing is False + + +def test_tagged_union_dispatch() -> None: + req = IndexRequest(project_root="/tmp") + data = encode_request(req) + decoded = decode_request(data) + assert isinstance(decoded, IndexRequest) + assert not isinstance(decoded, HandshakeRequest) + + +def test_all_request_types_round_trip() -> None: + requests: list[Request] = [ + HandshakeRequest(version="1.0.0"), + IndexRequest(project_root="/tmp"), + SearchRequest(project_root="/tmp", query="test"), + ProjectStatusRequest(project_root="/tmp"), + DaemonStatusRequest(), + StopRequest(), + ] + for req in requests: + data = encode_request(req) + decoded = decode_request(data) + assert type(decoded) is type(req) + + +def test_all_response_types_round_trip() -> None: + responses: list[Response] = [ + IndexResponse(success=True), + SearchResponse(success=True), + ProjectStatusResponse(indexing=False, total_chunks=0, total_files=0, languages={}), + DaemonStatusResponse(version="1.0.0", uptime_seconds=0.0, projects=[]), + StopResponse(ok=True), + ErrorResponse(message="err"), + ] + for resp in responses: + data = encode_response(resp) + decoded = decode_response(data) + assert type(decoded) is type(resp) diff --git a/tests/test_settings.py b/tests/test_settings.py new file mode 100644 index 0000000..fa038bd --- /dev/null +++ b/tests/test_settings.py @@ -0,0 +1,165 @@ +"""Unit tests for the settings module.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from cocoindex_code.settings import ( + DEFAULT_EXCLUDED_PATTERNS, + DEFAULT_INCLUDED_PATTERNS, + EmbeddingSettings, + LanguageOverride, + ProjectSettings, + UserSettings, + default_project_settings, + default_user_settings, + find_parent_with_marker, + find_project_root, + load_project_settings, + load_user_settings, + save_project_settings, + save_user_settings, +) + + +@pytest.fixture() +def _patch_user_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Redirect user_settings_dir() to a temp directory.""" + monkeypatch.setattr( + "cocoindex_code.settings.user_settings_dir", + lambda: tmp_path / ".cocoindex_code", + ) + monkeypatch.setattr( + "cocoindex_code.settings.user_settings_path", + lambda: tmp_path / ".cocoindex_code" / "settings.yml", + ) + + +def test_default_user_settings() -> None: + s = default_user_settings() + assert s.embedding.provider == "sentence-transformers" + assert "all-MiniLM-L6-v2" in s.embedding.model + assert s.embedding.device is None + assert s.envs == {} + + +def test_default_project_settings() -> None: + s = default_project_settings() + assert s.include_patterns == DEFAULT_INCLUDED_PATTERNS + assert s.exclude_patterns == DEFAULT_EXCLUDED_PATTERNS + assert s.language_overrides == [] + + +@pytest.mark.usefixtures("_patch_user_dir") +def test_save_and_load_user_settings(tmp_path: Path) -> None: + settings = UserSettings( + embedding=EmbeddingSettings( + provider="litellm", + model="gemini/text-embedding-004", + device="cpu", + ), + envs={"GEMINI_API_KEY": "test-key"}, + ) + save_user_settings(settings) + loaded = load_user_settings() + assert loaded.embedding.provider == settings.embedding.provider + assert loaded.embedding.model == settings.embedding.model + assert loaded.embedding.device == settings.embedding.device + assert loaded.envs == settings.envs + + +def test_save_and_load_project_settings(tmp_path: Path) -> None: + settings = ProjectSettings( + include_patterns=["**/*.py", "**/*.rs"], + exclude_patterns=["**/target"], + language_overrides=[LanguageOverride(ext="inc", lang="php")], + ) + save_project_settings(tmp_path, settings) + loaded = load_project_settings(tmp_path) + assert loaded.include_patterns == settings.include_patterns + assert loaded.exclude_patterns == settings.exclude_patterns + assert len(loaded.language_overrides) == 1 + assert loaded.language_overrides[0].ext == "inc" + assert loaded.language_overrides[0].lang == "php" + + +@pytest.mark.usefixtures("_patch_user_dir") +def test_load_user_settings_missing_file_returns_defaults() -> None: + loaded = load_user_settings() + expected = default_user_settings() + assert loaded.embedding.provider == expected.embedding.provider + assert loaded.embedding.model == expected.embedding.model + assert loaded.envs == expected.envs + + +def test_load_project_settings_missing_file_raises(tmp_path: Path) -> None: + with pytest.raises(FileNotFoundError): + load_project_settings(tmp_path) + + +def test_find_project_root_from_subdirectory(tmp_path: Path) -> None: + project = tmp_path / "project" + (project / ".cocoindex_code").mkdir(parents=True) + (project / ".cocoindex_code" / "settings.yml").write_text("include_patterns: []") + subdir = project / "src" / "lib" + subdir.mkdir(parents=True) + assert find_project_root(subdir) == project + + +def test_find_project_root_from_project_root(tmp_path: Path) -> None: + project = tmp_path / "project" + (project / ".cocoindex_code").mkdir(parents=True) + (project / ".cocoindex_code" / "settings.yml").write_text("include_patterns: []") + assert find_project_root(project) == project + + +def test_find_project_root_returns_none_when_not_initialized(tmp_path: Path) -> None: + standalone = tmp_path / "standalone" + standalone.mkdir() + assert find_project_root(standalone) is None + + +def test_find_parent_with_marker_finds_git(tmp_path: Path) -> None: + repo = tmp_path / "repo" + (repo / ".git").mkdir(parents=True) + subdir = repo / "src" + subdir.mkdir() + assert find_parent_with_marker(subdir) == repo + + +def test_find_parent_with_marker_prefers_cocoindex_code(tmp_path: Path) -> None: + repo = tmp_path / "repo" + (repo / ".git").mkdir(parents=True) + (repo / ".cocoindex_code").mkdir(parents=True) + subdir = repo / "src" + subdir.mkdir() + assert find_parent_with_marker(subdir) == repo + + +@pytest.mark.usefixtures("_patch_user_dir") +def test_user_settings_litellm_round_trip() -> None: + settings = UserSettings( + embedding=EmbeddingSettings( + provider="litellm", + model="gemini/text-embedding-004", + ), + envs={"GEMINI_API_KEY": "test"}, + ) + save_user_settings(settings) + loaded = load_user_settings() + assert loaded.embedding.provider == "litellm" + assert loaded.embedding.model == "gemini/text-embedding-004" + assert loaded.envs == {"GEMINI_API_KEY": "test"} + + +def test_project_settings_with_language_overrides(tmp_path: Path) -> None: + settings = ProjectSettings( + language_overrides=[LanguageOverride(ext="inc", lang="php")], + ) + save_project_settings(tmp_path, settings) + loaded = load_project_settings(tmp_path) + assert len(loaded.language_overrides) == 1 + assert loaded.language_overrides[0].ext == "inc" + assert loaded.language_overrides[0].lang == "php" diff --git a/uv.lock b/uv.lock index ae14f80..455158f 100644 --- a/uv.lock +++ b/uv.lock @@ -133,6 +133,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" }, ] +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -330,7 +339,7 @@ wheels = [ [[package]] name = "cocoindex" -version = "1.0.0a29" +version = "1.0.0a31" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -341,17 +350,17 @@ dependencies = [ { name = "typing-extensions" }, { name = "watchfiles" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b2/c4/ca9b256b9a1b52f752251fbd7e88f4ce2b540c2b92a3fcec23f3d250d30a/cocoindex-1.0.0a29.tar.gz", hash = "sha256:879840dec53f5f07aa26ebfed97005668b3ee594f982508e05df9bed19016b70", size = 288151, upload-time = "2026-03-13T00:44:51.906Z" } +sdist = { url = "https://files.pythonhosted.org/packages/19/20/245ec9ff9d499a9fc699ae1176e523ea671266c99678b93e2ab08802896b/cocoindex-1.0.0a31.tar.gz", hash = "sha256:302515fb8fa6ca7cb4bc7fda7397b321be34e0ab72c2a6cc2c4aa7249a270b44", size = 286158, upload-time = "2026-03-14T21:46:48.199Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/5b/09a2fb74ac5dd68f43355e6f7653d3a02271176b05de76edc75e2366b67f/cocoindex-1.0.0a29-cp311-abi3-macosx_10_12_x86_64.whl", hash = "sha256:54cac1a57aa68a6cb78568785d85cfec2f7dfa0703d38d74557ab63221455118", size = 6247093, upload-time = "2026-03-13T00:44:50.014Z" }, - { url = "https://files.pythonhosted.org/packages/b3/a6/a84a61b91a89848c1f70f92499a8f6578e48e52808b91f1d0bc54adb0499/cocoindex-1.0.0a29-cp311-abi3-macosx_11_0_arm64.whl", hash = "sha256:954769ecd274dd23c0dac2a1375121d52387a653b84c001d44bcc04096ee36e7", size = 6365382, upload-time = "2026-03-13T00:44:45.826Z" }, - { url = "https://files.pythonhosted.org/packages/1a/eb/87c78ac105c78aaf8cd7455e2caac6df8f6858ec369c00ddee58dd921262/cocoindex-1.0.0a29-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:41b4baa5afe425f61fe5179618eefcfcfe8c16debc4182f2b546d95b8e56c2b5", size = 6154033, upload-time = "2026-03-13T00:44:36.9Z" }, - { url = "https://files.pythonhosted.org/packages/7f/6f/63b88b93fa9565622797d33168b58007a4668518aa222deb50ba98a1767c/cocoindex-1.0.0a29-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:87ab220940bdd3fc956f58bab5ceac78245c41481ed96c06bc22aabbb430032d", size = 6347118, upload-time = "2026-03-13T00:44:41.468Z" }, - { url = "https://files.pythonhosted.org/packages/c7/4c/24b04b9e9f331b6ca83a73919b84f085e4c6dab85577d08fea35b50c4295/cocoindex-1.0.0a29-cp311-abi3-win_amd64.whl", hash = "sha256:5970d9554c664c402ed728104e10bf0ac6691f6a12632d81461809e475409587", size = 6527139, upload-time = "2026-03-13T00:44:53.691Z" }, - { url = "https://files.pythonhosted.org/packages/d8/5c/e60d526db051df98970b1d31c8f5f43d24e34a3aa78a222dc8adcedc71f8/cocoindex-1.0.0a29-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6c69b2df02f2111b8f8e2c57563169e8eb2810e427212731c7ddeedf1a4e57ae", size = 6362756, upload-time = "2026-03-13T00:44:47.892Z" }, - { url = "https://files.pythonhosted.org/packages/a7/ee/19f865d0a6c365bf94d057423cff56f9f60730fa827c5d25ae72061db332/cocoindex-1.0.0a29-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:33139c6c32e1e5acb33791d77e7b5e6f08c4012100ee703958ce57f3ed9c4b84", size = 6154611, upload-time = "2026-03-13T00:44:39.267Z" }, - { url = "https://files.pythonhosted.org/packages/48/00/d86379ca337300ddfcbc0473b1c54e55ece734c14861f0868ca1c831f78c/cocoindex-1.0.0a29-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:a09cdd0d321c9e5c4ec3deb1fc4c3ab3cd5403f6af47f62b6017223755acb12c", size = 6342766, upload-time = "2026-03-13T00:44:43.881Z" }, - { url = "https://files.pythonhosted.org/packages/86/73/5bc47eb075ec21a5004d0420897b567478ec8f50dd12a685d818b25437d2/cocoindex-1.0.0a29-cp314-cp314t-win_amd64.whl", hash = "sha256:19c2ea8584fe29bf62de076a57a5befcec17167458fc21b1138b041baa9c0973", size = 6523794, upload-time = "2026-03-13T00:44:55.884Z" }, + { url = "https://files.pythonhosted.org/packages/1e/58/c8db553650d58f49ad6ce729b1d923a57060da09dd0d8a03531e134945aa/cocoindex-1.0.0a31-cp311-abi3-macosx_10_12_x86_64.whl", hash = "sha256:02f4388d21f4ef1374981d4a8e6edd4a3f3d0d85fd63a75eb36cd266a2d3145d", size = 6246835, upload-time = "2026-03-14T21:46:46.675Z" }, + { url = "https://files.pythonhosted.org/packages/6c/18/c87a4f3f9332da8019571e7392508ffbd02de520482f7532604e1ef71b83/cocoindex-1.0.0a31-cp311-abi3-macosx_11_0_arm64.whl", hash = "sha256:b77f6f5b532b14587aef1c69359e3dc7ec443cb02a772f9bd06964d09ccde3f8", size = 6367066, upload-time = "2026-03-14T21:46:43.565Z" }, + { url = "https://files.pythonhosted.org/packages/38/e6/a747115c3d19364e52ac1f6eb4cbbf1ac951dd864e025ec50b12a4fcd04b/cocoindex-1.0.0a31-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c25de6648b9885b06f9c46d2535844e3e16434806ad457f6f67452ef30046b59", size = 6152245, upload-time = "2026-03-14T21:46:37.061Z" }, + { url = "https://files.pythonhosted.org/packages/5b/0f/f847ac9d5d0f08d6685334a6b50e628b165b7857612df2b97ef6948bf086/cocoindex-1.0.0a31-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:01177e30e4d31ea9990a5c2d53b13a4006647b000c333b03e7686bcb039f2c06", size = 6345010, upload-time = "2026-03-14T21:46:40.551Z" }, + { url = "https://files.pythonhosted.org/packages/44/bd/069ecf02de5707aea6a46d9688ad689547df85e21aa6368bb52c4006ba5f/cocoindex-1.0.0a31-cp311-abi3-win_amd64.whl", hash = "sha256:aa2c6db7e41517000d7ef717d5f288a238c1fa0fe23a17e98c32799bb6476943", size = 6532907, upload-time = "2026-03-14T21:46:49.169Z" }, + { url = "https://files.pythonhosted.org/packages/d8/30/97526840f74c9ab676ee6d20bfbe0d9a41590a6451d586fda028f2ec6f60/cocoindex-1.0.0a31-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:81ccf43d3900488029babb30ff292613dd863e5013f5e2737e588382f26f51c6", size = 6363058, upload-time = "2026-03-14T21:46:45.011Z" }, + { url = "https://files.pythonhosted.org/packages/1a/11/4579ff59c7c376a8d2db04cdbdec812ac9f34a5335dc949d8d713d9bb345/cocoindex-1.0.0a31-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:9964135f87cdbc604b64ad96c01c9cde0158859bd144a95160853c7891d78a35", size = 6151838, upload-time = "2026-03-14T21:46:38.919Z" }, + { url = "https://files.pythonhosted.org/packages/03/94/f91244554ce51ff1480731e8f3c7d5b53b7087bff7e44956824abd630e02/cocoindex-1.0.0a31-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:4142fbbc7c74a3295c907899c16d9a5ad36bb17bae6085f50ab755305c5e03f7", size = 6344002, upload-time = "2026-03-14T21:46:42.132Z" }, + { url = "https://files.pythonhosted.org/packages/82/77/8a45924b732bc83bc7f29171f1f3ef27bd233b98f3d08ff56acd983bcddd/cocoindex-1.0.0a31-cp314-cp314t-win_amd64.whl", hash = "sha256:ff20052618c180becd1d3a863b40d788d4141725dd55930d08cbb24fa941a740", size = 6522233, upload-time = "2026-03-14T21:46:50.806Z" }, ] [package.optional-dependencies] @@ -366,10 +375,13 @@ dependencies = [ { name = "cocoindex", extra = ["litellm"] }, { name = "einops" }, { name = "mcp" }, + { name = "msgspec" }, { name = "numpy" }, { name = "pydantic" }, + { name = "pyyaml" }, { name = "sentence-transformers" }, { name = "sqlite-vec" }, + { name = "typer" }, ] [package.optional-dependencies] @@ -390,13 +402,15 @@ dev = [ { name = "pytest-asyncio" }, { name = "pytest-cov" }, { name = "ruff" }, + { name = "types-pyyaml" }, ] [package.metadata] requires-dist = [ - { name = "cocoindex", extras = ["litellm"], specifier = "==1.0.0a29" }, + { name = "cocoindex", extras = ["litellm"], specifier = "==1.0.0a31" }, { name = "einops", specifier = ">=0.8.2" }, { name = "mcp", specifier = ">=1.0.0" }, + { name = "msgspec", specifier = ">=0.19.0" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.0.0" }, { name = "numpy", specifier = ">=1.24.0" }, { name = "prek", marker = "extra == 'dev'", specifier = ">=0.1.0" }, @@ -404,9 +418,11 @@ requires-dist = [ { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0.0" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.21.0" }, { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0.0" }, + { name = "pyyaml", specifier = ">=6.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" }, { name = "sentence-transformers", specifier = ">=2.2.0" }, { name = "sqlite-vec", specifier = ">=0.1.0" }, + { name = "typer", specifier = ">=0.9.0" }, ] provides-extras = ["dev"] @@ -418,6 +434,7 @@ dev = [ { name = "pytest-asyncio", specifier = ">=0.21.0" }, { name = "pytest-cov", specifier = ">=4.0.0" }, { name = "ruff", specifier = ">=0.1.0" }, + { name = "types-pyyaml", specifier = ">=6.0.12.20250915" }, ] [[package]] @@ -1271,6 +1288,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, ] +[[package]] +name = "msgspec" +version = "0.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ea/9c/bfbd12955a49180cbd234c5d29ec6f74fe641698f0cd9df154a854fc8a15/msgspec-0.20.0.tar.gz", hash = "sha256:692349e588fde322875f8d3025ac01689fead5901e7fb18d6870a44519d62a29", size = 317862, upload-time = "2025-11-24T03:56:28.934Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/59/fdcb3af72f750a8de2bcf39d62ada70b5eb17b06d7f63860e0a679cb656b/msgspec-0.20.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:09e0efbf1ac641fedb1d5496c59507c2f0dc62a052189ee62c763e0aae217520", size = 193345, upload-time = "2025-11-24T03:55:20.613Z" }, + { url = "https://files.pythonhosted.org/packages/5a/15/3c225610da9f02505d37d69a77f4a2e7daae2a125f99d638df211ba84e59/msgspec-0.20.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:23ee3787142e48f5ee746b2909ce1b76e2949fbe0f97f9f6e70879f06c218b54", size = 186867, upload-time = "2025-11-24T03:55:22.4Z" }, + { url = "https://files.pythonhosted.org/packages/81/36/13ab0c547e283bf172f45491edfdea0e2cecb26ae61e3a7b1ae6058b326d/msgspec-0.20.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:81f4ac6f0363407ac0465eff5c7d4d18f26870e00674f8fcb336d898a1e36854", size = 215351, upload-time = "2025-11-24T03:55:23.958Z" }, + { url = "https://files.pythonhosted.org/packages/6b/96/5c095b940de3aa6b43a71ec76275ac3537b21bd45c7499b5a17a429110fa/msgspec-0.20.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bb4d873f24ae18cd1334f4e37a178ed46c9d186437733351267e0a269bdf7e53", size = 219896, upload-time = "2025-11-24T03:55:25.356Z" }, + { url = "https://files.pythonhosted.org/packages/98/7a/81a7b5f01af300761087b114dafa20fb97aed7184d33aab64d48874eb187/msgspec-0.20.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b92b8334427b8393b520c24ff53b70f326f79acf5f74adb94fd361bcff8a1d4e", size = 220389, upload-time = "2025-11-24T03:55:26.99Z" }, + { url = "https://files.pythonhosted.org/packages/70/c0/3d0cce27db9a9912421273d49eab79ce01ecd2fed1a2f1b74af9b445f33c/msgspec-0.20.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:562c44b047c05cc0384e006fae7a5e715740215c799429e0d7e3e5adf324285a", size = 223348, upload-time = "2025-11-24T03:55:28.311Z" }, + { url = "https://files.pythonhosted.org/packages/89/5e/406b7d578926b68790e390d83a1165a9bfc2d95612a1a9c1c4d5c72ea815/msgspec-0.20.0-cp311-cp311-win_amd64.whl", hash = "sha256:d1dcc93a3ce3d3195985bfff18a48274d0b5ffbc96fa1c5b89da6f0d9af81b29", size = 188713, upload-time = "2025-11-24T03:55:29.553Z" }, + { url = "https://files.pythonhosted.org/packages/47/87/14fe2316624ceedf76a9e94d714d194cbcb699720b210ff189f89ca4efd7/msgspec-0.20.0-cp311-cp311-win_arm64.whl", hash = "sha256:aa387aa330d2e4bd69995f66ea8fdc87099ddeedf6fdb232993c6a67711e7520", size = 174229, upload-time = "2025-11-24T03:55:31.107Z" }, + { url = "https://files.pythonhosted.org/packages/d9/6f/1e25eee957e58e3afb2a44b94fa95e06cebc4c236193ed0de3012fff1e19/msgspec-0.20.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2aba22e2e302e9231e85edc24f27ba1f524d43c223ef5765bd8624c7df9ec0a5", size = 196391, upload-time = "2025-11-24T03:55:32.677Z" }, + { url = "https://files.pythonhosted.org/packages/7f/ee/af51d090ada641d4b264992a486435ba3ef5b5634bc27e6eb002f71cef7d/msgspec-0.20.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:716284f898ab2547fedd72a93bb940375de9fbfe77538f05779632dc34afdfde", size = 188644, upload-time = "2025-11-24T03:55:33.934Z" }, + { url = "https://files.pythonhosted.org/packages/49/d6/9709ee093b7742362c2934bfb1bbe791a1e09bed3ea5d8a18ce552fbfd73/msgspec-0.20.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:558ed73315efa51b1538fa8f1d3b22c8c5ff6d9a2a62eff87d25829b94fc5054", size = 218852, upload-time = "2025-11-24T03:55:35.575Z" }, + { url = "https://files.pythonhosted.org/packages/5c/a2/488517a43ccf5a4b6b6eca6dd4ede0bd82b043d1539dd6bb908a19f8efd3/msgspec-0.20.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:509ac1362a1d53aa66798c9b9fd76872d7faa30fcf89b2fba3bcbfd559d56eb0", size = 224937, upload-time = "2025-11-24T03:55:36.859Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e8/49b832808aa23b85d4f090d1d2e48a4e3834871415031ed7c5fe48723156/msgspec-0.20.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1353c2c93423602e7dea1aa4c92f3391fdfc25ff40e0bacf81d34dbc68adb870", size = 222858, upload-time = "2025-11-24T03:55:38.187Z" }, + { url = "https://files.pythonhosted.org/packages/9f/56/1dc2fa53685dca9c3f243a6cbecd34e856858354e455b77f47ebd76cf5bf/msgspec-0.20.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cb33b5eb5adb3c33d749684471c6a165468395d7aa02d8867c15103b81e1da3e", size = 227248, upload-time = "2025-11-24T03:55:39.496Z" }, + { url = "https://files.pythonhosted.org/packages/5a/51/aba940212c23b32eedce752896205912c2668472ed5b205fc33da28a6509/msgspec-0.20.0-cp312-cp312-win_amd64.whl", hash = "sha256:fb1d934e435dd3a2b8cf4bbf47a8757100b4a1cfdc2afdf227541199885cdacb", size = 190024, upload-time = "2025-11-24T03:55:40.829Z" }, + { url = "https://files.pythonhosted.org/packages/41/ad/3b9f259d94f183daa9764fef33fdc7010f7ecffc29af977044fa47440a83/msgspec-0.20.0-cp312-cp312-win_arm64.whl", hash = "sha256:00648b1e19cf01b2be45444ba9dc961bd4c056ffb15706651e64e5d6ec6197b7", size = 175390, upload-time = "2025-11-24T03:55:42.05Z" }, + { url = "https://files.pythonhosted.org/packages/8a/d1/b902d38b6e5ba3bdddbec469bba388d647f960aeed7b5b3623a8debe8a76/msgspec-0.20.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9c1ff8db03be7598b50dd4b4a478d6fe93faae3bd54f4f17aa004d0e46c14c46", size = 196463, upload-time = "2025-11-24T03:55:43.405Z" }, + { url = "https://files.pythonhosted.org/packages/57/b6/eff0305961a1d9447ec2b02f8c73c8946f22564d302a504185b730c9a761/msgspec-0.20.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f6532369ece217fd37c5ebcfd7e981f2615628c21121b7b2df9d3adcf2fd69b8", size = 188650, upload-time = "2025-11-24T03:55:44.761Z" }, + { url = "https://files.pythonhosted.org/packages/99/93/f2ec1ae1de51d3fdee998a1ede6b2c089453a2ee82b5c1b361ed9095064a/msgspec-0.20.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9a1697da2f85a751ac3cc6a97fceb8e937fc670947183fb2268edaf4016d1ee", size = 218834, upload-time = "2025-11-24T03:55:46.441Z" }, + { url = "https://files.pythonhosted.org/packages/28/83/36557b04cfdc317ed8a525c4993b23e43a8fbcddaddd78619112ca07138c/msgspec-0.20.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7fac7e9c92eddcd24c19d9e5f6249760941485dff97802461ae7c995a2450111", size = 224917, upload-time = "2025-11-24T03:55:48.06Z" }, + { url = "https://files.pythonhosted.org/packages/8f/56/362037a1ed5be0b88aced59272442c4b40065c659700f4b195a7f4d0ac88/msgspec-0.20.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f953a66f2a3eb8d5ea64768445e2bb301d97609db052628c3e1bcb7d87192a9f", size = 222821, upload-time = "2025-11-24T03:55:49.388Z" }, + { url = "https://files.pythonhosted.org/packages/92/75/fa2370ec341cedf663731ab7042e177b3742645c5dd4f64dc96bd9f18a6b/msgspec-0.20.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:247af0313ae64a066d3aea7ba98840f6681ccbf5c90ba9c7d17f3e39dbba679c", size = 227227, upload-time = "2025-11-24T03:55:51.125Z" }, + { url = "https://files.pythonhosted.org/packages/f1/25/5e8080fe0117f799b1b68008dc29a65862077296b92550632de015128579/msgspec-0.20.0-cp313-cp313-win_amd64.whl", hash = "sha256:67d5e4dfad52832017018d30a462604c80561aa62a9d548fc2bd4e430b66a352", size = 189966, upload-time = "2025-11-24T03:55:52.458Z" }, + { url = "https://files.pythonhosted.org/packages/79/b6/63363422153937d40e1cb349c5081338401f8529a5a4e216865decd981bf/msgspec-0.20.0-cp313-cp313-win_arm64.whl", hash = "sha256:91a52578226708b63a9a13de287b1ec3ed1123e4a088b198143860c087770458", size = 175378, upload-time = "2025-11-24T03:55:53.721Z" }, + { url = "https://files.pythonhosted.org/packages/bb/18/62dc13ab0260c7d741dda8dc7f481495b93ac9168cd887dda5929880eef8/msgspec-0.20.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:eead16538db1b3f7ec6e3ed1f6f7c5dec67e90f76e76b610e1ffb5671815633a", size = 196407, upload-time = "2025-11-24T03:55:55.001Z" }, + { url = "https://files.pythonhosted.org/packages/dd/1d/b9949e4ad6953e9f9a142c7997b2f7390c81e03e93570c7c33caf65d27e1/msgspec-0.20.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:703c3bb47bf47801627fb1438f106adbfa2998fe586696d1324586a375fca238", size = 188889, upload-time = "2025-11-24T03:55:56.311Z" }, + { url = "https://files.pythonhosted.org/packages/1e/19/f8bb2dc0f1bfe46cc7d2b6b61c5e9b5a46c62298e8f4d03bbe499c926180/msgspec-0.20.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6cdb227dc585fb109305cee0fd304c2896f02af93ecf50a9c84ee54ee67dbb42", size = 219691, upload-time = "2025-11-24T03:55:57.908Z" }, + { url = "https://files.pythonhosted.org/packages/b8/8e/6b17e43f6eb9369d9858ee32c97959fcd515628a1df376af96c11606cf70/msgspec-0.20.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27d35044dd8818ac1bd0fedb2feb4fbdff4e3508dd7c5d14316a12a2d96a0de0", size = 224918, upload-time = "2025-11-24T03:55:59.322Z" }, + { url = "https://files.pythonhosted.org/packages/1c/db/0e833a177db1a4484797adba7f429d4242585980b90882cc38709e1b62df/msgspec-0.20.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b4296393a29ee42dd25947981c65506fd4ad39beaf816f614146fa0c5a6c91ae", size = 223436, upload-time = "2025-11-24T03:56:00.716Z" }, + { url = "https://files.pythonhosted.org/packages/c3/30/d2ee787f4c918fd2b123441d49a7707ae9015e0e8e1ab51aa7967a97b90e/msgspec-0.20.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:205fbdadd0d8d861d71c8f3399fe1a82a2caf4467bc8ff9a626df34c12176980", size = 227190, upload-time = "2025-11-24T03:56:02.371Z" }, + { url = "https://files.pythonhosted.org/packages/ff/37/9c4b58ff11d890d788e700b827db2366f4d11b3313bf136780da7017278b/msgspec-0.20.0-cp314-cp314-win_amd64.whl", hash = "sha256:7dfebc94fe7d3feec6bc6c9df4f7e9eccc1160bb5b811fbf3e3a56899e398a6b", size = 193950, upload-time = "2025-11-24T03:56:03.668Z" }, + { url = "https://files.pythonhosted.org/packages/e9/4e/cab707bf2fa57408e2934e5197fc3560079db34a1e3cd2675ff2e47e07de/msgspec-0.20.0-cp314-cp314-win_arm64.whl", hash = "sha256:2ad6ae36e4a602b24b4bf4eaf8ab5a441fec03e1f1b5931beca8ebda68f53fc0", size = 179018, upload-time = "2025-11-24T03:56:05.038Z" }, + { url = "https://files.pythonhosted.org/packages/4c/06/3da3fc9aaa55618a8f43eb9052453cfe01f82930bca3af8cea63a89f3a11/msgspec-0.20.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f84703e0e6ef025663dd1de828ca028774797b8155e070e795c548f76dde65d5", size = 200389, upload-time = "2025-11-24T03:56:06.375Z" }, + { url = "https://files.pythonhosted.org/packages/83/3b/cc4270a5ceab40dfe1d1745856951b0a24fd16ac8539a66ed3004a60c91e/msgspec-0.20.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7c83fc24dd09cf1275934ff300e3951b3adc5573f0657a643515cc16c7dee131", size = 193198, upload-time = "2025-11-24T03:56:07.742Z" }, + { url = "https://files.pythonhosted.org/packages/cd/ae/4c7905ac53830c8e3c06fdd60e3cdcfedc0bbc993872d1549b84ea21a1bd/msgspec-0.20.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f13ccb1c335a124e80c4562573b9b90f01ea9521a1a87f7576c2e281d547f56", size = 225973, upload-time = "2025-11-24T03:56:09.18Z" }, + { url = "https://files.pythonhosted.org/packages/d9/da/032abac1de4d0678d99eaeadb1323bd9d247f4711c012404ba77ed6f15ca/msgspec-0.20.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:17c2b5ca19f19306fc83c96d85e606d2cc107e0caeea85066b5389f664e04846", size = 229509, upload-time = "2025-11-24T03:56:10.898Z" }, + { url = "https://files.pythonhosted.org/packages/69/52/fdc7bdb7057a166f309e0b44929e584319e625aaba4771b60912a9321ccd/msgspec-0.20.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d931709355edabf66c2dd1a756b2d658593e79882bc81aae5964969d5a291b63", size = 230434, upload-time = "2025-11-24T03:56:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/cb/fe/1dfd5f512b26b53043884e4f34710c73e294e7cc54278c3fe28380e42c37/msgspec-0.20.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:565f915d2e540e8a0c93a01ff67f50aebe1f7e22798c6a25873f9fda8d1325f8", size = 231758, upload-time = "2025-11-24T03:56:13.765Z" }, + { url = "https://files.pythonhosted.org/packages/97/f6/9ba7121b8e0c4e0beee49575d1dbc804e2e72467692f0428cf39ceba1ea5/msgspec-0.20.0-cp314-cp314t-win_amd64.whl", hash = "sha256:726f3e6c3c323f283f6021ebb6c8ccf58d7cd7baa67b93d73bfbe9a15c34ab8d", size = 206540, upload-time = "2025-11-24T03:56:15.029Z" }, + { url = "https://files.pythonhosted.org/packages/c8/3e/c5187de84bb2c2ca334ab163fcacf19a23ebb1d876c837f81a1b324a15bf/msgspec-0.20.0-cp314-cp314t-win_arm64.whl", hash = "sha256:93f23528edc51d9f686808a361728e903d6f2be55c901d6f5c92e44c6d546bfc", size = 183011, upload-time = "2025-11-24T03:56:16.442Z" }, +] + [[package]] name = "multidict" version = "6.7.1" @@ -2922,6 +2987,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994, upload-time = "2026-01-20T16:01:14.236Z" }, ] +[[package]] +name = "typer" +version = "0.24.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" }, +] + [[package]] name = "typer-slim" version = "0.21.1" @@ -2935,6 +3015,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c8/0a/4aca634faf693e33004796b6cee0ae2e1dba375a800c16ab8d3eff4bb800/typer_slim-0.21.1-py3-none-any.whl", hash = "sha256:6e6c31047f171ac93cc5a973c9e617dbc5ab2bddc4d0a3135dc161b4e2020e0d", size = 47444, upload-time = "2026-01-06T11:21:12.441Z" }, ] +[[package]] +name = "types-pyyaml" +version = "6.0.12.20250915" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/69/3c51b36d04da19b92f9e815be12753125bd8bc247ba0470a982e6979e71c/types_pyyaml-6.0.12.20250915.tar.gz", hash = "sha256:0f8b54a528c303f0e6f7165687dd33fafa81c807fcac23f632b63aa624ced1d3", size = 17522, upload-time = "2025-09-15T03:01:00.728Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/e0/1eed384f02555dde685fff1a1ac805c1c7dcb6dd019c916fe659b1c1f9ec/types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6", size = 20338, upload-time = "2025-09-15T03:00:59.218Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0"