@@ -35,9 +35,7 @@ def cli(verbose: bool):
35
35
36
36
37
37
@cli .command ()
38
- @click .argument (
39
- "directory" , type = click .Path (exists = True , file_okay = False , path_type = Path )
40
- )
38
+ @click .argument ("paths" , nargs = - 1 , type = click .Path (exists = True , path_type = Path ))
41
39
@click .option (
42
40
"--pattern" , "-p" , default = "**/*.*" , help = "Glob pattern for files to index"
43
41
)
@@ -47,16 +45,29 @@ def cli(verbose: bool):
47
45
default = default_persist_dir ,
48
46
help = "Directory to persist the index" ,
49
47
)
50
- def index (directory : Path , pattern : str , persist_dir : Path ):
51
- """Index documents in a directory."""
48
+ def index (paths : list [Path ], pattern : str , persist_dir : Path ):
49
+ """Index documents in one or more directories."""
50
+ if not paths :
51
+ console .print ("❌ No paths provided" , style = "red" )
52
+ return
53
+
52
54
try :
53
55
indexer = Indexer (persist_directory = persist_dir , enable_persist = True )
54
- console .print (f"Indexing files in { directory } with pattern { pattern } " )
55
-
56
- # Index the files
57
- n_indexed = indexer .index_directory (directory , pattern )
58
-
59
- console .print (f"✅ Successfully indexed { n_indexed } files" , style = "green" )
56
+ total_indexed = 0
57
+
58
+ for path in paths :
59
+ if path .is_file ():
60
+ console .print (f"Indexing file: { path } " )
61
+ n_indexed = indexer .index_file (path )
62
+ if n_indexed is not None :
63
+ total_indexed += n_indexed
64
+ else :
65
+ console .print (f"Indexing files in { path } with pattern { pattern } " )
66
+ n_indexed = indexer .index_directory (path , pattern )
67
+ if n_indexed is not None :
68
+ total_indexed += n_indexed
69
+
70
+ console .print (f"✅ Successfully indexed { total_indexed } files" , style = "green" )
60
71
except Exception as e :
61
72
console .print (f"❌ Error indexing directory: { e } " , style = "red" )
62
73
@@ -74,6 +85,12 @@ def index(directory: Path, pattern: str, persist_dir: Path):
74
85
@click .option ("--max-tokens" , default = 4000 , help = "Maximum tokens in context window" )
75
86
@click .option ("--show-context" , is_flag = True , help = "Show the full context content" )
76
87
@click .option ("--raw" , is_flag = True , help = "Skip syntax highlighting" )
88
+ @click .option ("--explain" , is_flag = True , help = "Show scoring explanations" )
89
+ @click .option (
90
+ "--weights" ,
91
+ type = click .STRING ,
92
+ help = "Custom scoring weights as JSON string, e.g. '{\" recency_boost\" : 0.3}'" ,
93
+ )
77
94
def search (
78
95
query : str ,
79
96
paths : list [Path ],
@@ -82,21 +99,46 @@ def search(
82
99
max_tokens : int ,
83
100
show_context : bool ,
84
101
raw : bool ,
102
+ explain : bool ,
103
+ weights : str | None ,
85
104
):
86
105
"""Search the index and assemble context."""
87
106
paths = [path .resolve () for path in paths ]
88
107
89
108
# Hide ChromaDB output during initialization and search
90
109
with console .status ("Initializing..." ):
110
+ # Parse custom weights if provided
111
+ scoring_weights = None
112
+ if weights :
113
+ try :
114
+ import json
115
+
116
+ scoring_weights = json .loads (weights )
117
+ except json .JSONDecodeError as e :
118
+ console .print (f"❌ Invalid weights JSON: { e } " , style = "red" )
119
+ return
120
+ except Exception as e :
121
+ console .print (f"❌ Error parsing weights: { e } " , style = "red" )
122
+ return
123
+
91
124
# Temporarily redirect stdout to suppress ChromaDB output
92
125
stdout = sys .stdout
93
126
sys .stdout = open (os .devnull , "w" )
94
127
try :
95
- indexer = Indexer (persist_directory = persist_dir , enable_persist = True )
96
- assembler = ContextAssembler ( max_tokens = max_tokens )
97
- documents , distances = indexer . search (
98
- query , n_results = n_results , paths = paths
128
+ indexer = Indexer (
129
+ persist_directory = persist_dir ,
130
+ enable_persist = True ,
131
+ scoring_weights = scoring_weights ,
99
132
)
133
+ assembler = ContextAssembler (max_tokens = max_tokens )
134
+ if explain :
135
+ documents , distances , explanations = indexer .search (
136
+ query , n_results = n_results , paths = paths , explain = True
137
+ )
138
+ else :
139
+ documents , distances , _ = indexer .search (
140
+ query , n_results = n_results , paths = paths
141
+ )
100
142
finally :
101
143
sys .stdout .close ()
102
144
sys .stdout = stdout
@@ -128,20 +170,50 @@ def search(
128
170
for i , doc in enumerate (documents ):
129
171
source = doc .metadata .get ("source" , "unknown" )
130
172
distance = distances [i ]
131
- relevance = 1 - distance # Convert distance to similarity score
132
173
133
- # Show document header with relevance score
134
- console .print (
135
- f"\n [cyan]{ i + 1 } . { source } [/cyan] [yellow](relevance: { relevance :.2f} )[/yellow]"
136
- )
174
+ # Show document header
175
+ console .print (f"\n [cyan]{ i + 1 } . { source } [/cyan]" )
176
+
177
+ # Show scoring explanation if requested
178
+ if explain and explanations : # Make sure explanations is not None
179
+ explanation = explanations [i ]
180
+ console .print ("\n [bold]Scoring Breakdown:[/bold]" )
181
+
182
+ # Show individual score components
183
+ scores = explanation .get ("scores" , {})
184
+ for factor , score in scores .items ():
185
+ # Color code the scores
186
+ if score > 0 :
187
+ score_color = "green"
188
+ sign = "+"
189
+ elif score < 0 :
190
+ score_color = "red"
191
+ sign = ""
192
+ else :
193
+ score_color = "yellow"
194
+ sign = " "
195
+
196
+ # Print score and explanation
197
+ console .print (
198
+ f" { factor :15} [{ score_color } ]{ sign } { score :>6.3f} [/{ score_color } ] | { explanation ['explanations' ][factor ]} "
199
+ )
200
+
201
+ # Show total score
202
+ total = explanation ["total_score" ]
203
+ console .print (f"\n { 'Total' :15} [bold blue]{ total :>7.3f} [/bold blue]" )
204
+ else :
205
+ # Just show the base relevance score
206
+ relevance = 1 - distance
207
+ console .print (f"[yellow](relevance: { relevance :.2f} )[/yellow]" )
137
208
138
209
# Use file extension as lexer (strip the dot)
139
210
lexer = doc .metadata .get ("extension" , "" ).lstrip ("." ) or "text"
140
211
141
212
# Extract preview content (first ~200 chars)
142
213
preview = doc .content [:200 ] + ("..." if len (doc .content ) > 200 else "" )
143
214
144
- # Display with syntax highlighting
215
+ # Display preview with syntax highlighting
216
+ console .print ("\n [bold]Preview:[/bold]" )
145
217
syntax = Syntax (
146
218
preview ,
147
219
lexer ,
0 commit comments