77
88from __future__ import annotations
99
10- import glob
1110import os
11+ from pathlib import Path
1212from typing import Any
1313
1414import chromadb
2020
2121_embedding_fn = embedding_functions .DefaultEmbeddingFunction ()
2222
23+ _client : chromadb .PersistentClient | None = None
24+
2325
2426def _get_client () -> chromadb .PersistentClient :
25- os .makedirs (CHROMA_DIR , exist_ok = True )
26- return chromadb .PersistentClient (path = str (CHROMA_DIR ))
27+ global _client
28+ if _client is None :
29+ os .makedirs (CHROMA_DIR , exist_ok = True )
30+ _client = chromadb .PersistentClient (path = str (CHROMA_DIR ))
31+ return _client
2732
2833
29- def _get_collection (client : chromadb . PersistentClient ) -> Any :
30- return client .get_or_create_collection (
34+ def _get_collection () -> Any :
35+ return _get_client () .get_or_create_collection (
3136 name = COLLECTION_NAME ,
3237 embedding_function = _embedding_fn ,
3338 metadata = {"hnsw:space" : "cosine" },
@@ -51,9 +56,8 @@ def ingest_playbooks(playbooks_dir: str | None = None) -> dict[str, Any]:
5156
5257 Returns a summary dict with ``files_ingested``, ``total_chunks``, and ``details``.
5358 """
54- src = playbooks_dir or str (PLAYBOOKS_DIR )
55- client = _get_client ()
56- collection = _get_collection (client )
59+ src = Path (playbooks_dir ) if playbooks_dir else PLAYBOOKS_DIR
60+ collection = _get_collection ()
5761
5862 # Wipe existing data for a clean re-index.
5963 try :
@@ -63,27 +67,26 @@ def ingest_playbooks(playbooks_dir: str | None = None) -> dict[str, Any]:
6367 except Exception :
6468 pass
6569
66- md_files = sorted (glob .glob (os .path .join (src , "*.md" )))
70+ skip = {"README.md" , "full-irp-template.md" }
71+ md_files = sorted (src .glob ("*.md" ))
6772 total_chunks = 0
6873 details : list [dict [str , Any ]] = []
6974
7075 for filepath in md_files :
71- filename = os .path .basename (filepath )
72- if filename in ("README.md" , "full-irp-template.md" ):
76+ if filepath .name in skip :
7377 continue
7478
75- with open (filepath ) as f :
76- content = f .read ()
79+ content = filepath .read_text ()
7780 if not content .strip ():
7881 continue
7982
80- playbook_type = filename . removesuffix ( ".md" )
83+ playbook_type = filepath . stem
8184 chunks = chunk_document (content )
8285
8386 ids = [f"{ playbook_type } __chunk_{ i } " for i in range (len (chunks ))]
8487 metadatas = [
8588 {
86- "source_file" : filename ,
89+ "source_file" : filepath . name ,
8790 "playbook_type" : playbook_type ,
8891 "chunk_index" : i ,
8992 "total_chunks" : len (chunks ),
@@ -93,7 +96,7 @@ def ingest_playbooks(playbooks_dir: str | None = None) -> dict[str, Any]:
9396
9497 collection .add (ids = ids , documents = chunks , metadatas = metadatas )
9598 total_chunks += len (chunks )
96- details .append ({"file" : filename , "chunks" : len (chunks )})
99+ details .append ({"file" : filepath . name , "chunks" : len (chunks )})
97100
98101 return {
99102 "files_ingested" : len (details ),
@@ -104,8 +107,7 @@ def ingest_playbooks(playbooks_dir: str | None = None) -> dict[str, Any]:
104107
105108def search_playbooks (query : str , n_results : int = 5 ) -> list [dict [str , Any ]]:
106109 """Semantic search — returns the *n_results* closest playbook chunks."""
107- client = _get_client ()
108- collection = _get_collection (client )
110+ collection = _get_collection ()
109111
110112 results = collection .query (query_texts = [query ], n_results = n_results )
111113
@@ -124,8 +126,7 @@ def search_playbooks(query: str, n_results: int = 5) -> list[dict[str, Any]]:
124126
125127def list_playbooks () -> list [dict [str , Any ]]:
126128 """Return de-duplicated metadata for every ingested playbook type."""
127- client = _get_client ()
128- collection = _get_collection (client )
129+ collection = _get_collection ()
129130
130131 seen : dict [str , dict [str , Any ]] = {}
131132 for meta in collection .get ()["metadatas" ]:
0 commit comments