2
2
from abc import abstractmethod
3
3
from typing import List , Optional , Tuple
4
4
5
+ from codegate .extract_snippets .factory import MessageCodeExtractorFactory
5
6
import structlog
6
7
from litellm import ChatCompletionRequest , ChatCompletionSystemMessage , ModelResponse
7
8
from litellm .types .utils import Delta , StreamingChoices
8
9
9
10
from codegate .config import Config
10
11
from codegate .pipeline .base import (
11
12
AlertSeverity ,
13
+ CodeSnippet ,
12
14
PipelineContext ,
13
15
PipelineResult ,
14
16
PipelineStep ,
@@ -44,7 +46,9 @@ def _hide_secret(self, match: Match) -> str:
44
46
pass
45
47
46
48
@abstractmethod
47
- def _notify_secret (self , match : Match , protected_text : List [str ]) -> None :
49
+ def _notify_secret (
50
+ self , match : Match , code_snippet : Optional [CodeSnippet ], protected_text : List [str ]
51
+ ) -> None :
48
52
"""
49
53
Notify about a found secret
50
54
TODO: If the secret came from a CodeSnippet we should notify about that. This would
@@ -106,7 +110,9 @@ def _get_surrounding_secret_lines(
106
110
end_line = min (secret_line + surrounding_lines , len (lines ))
107
111
return "\n " .join (lines [start_line :end_line ])
108
112
109
- def obfuscate (self , text : str ) -> tuple [str , List [Match ]]:
113
+ def obfuscate (self , text : str , snippet : Optional [CodeSnippet ]) -> tuple [str , List [Match ]]:
114
+ if snippet :
115
+ text = snippet .code
110
116
matches = CodegateSignatures .find_in_string (text )
111
117
if not matches :
112
118
return text , []
@@ -147,13 +153,14 @@ def obfuscate(self, text: str) -> tuple[str, List[Match]]:
147
153
logger .info (
148
154
f"\n Service: { match .service } "
149
155
f"\n Type: { match .type } "
156
+ f"\n Key: { match .secret_key } "
150
157
f"\n Original: { match .value } "
151
158
f"\n Encrypted: { hidden_secret } "
152
159
)
153
160
154
161
# Second pass. Notify the secrets in DB over the complete protected text.
155
162
for _ , _ , match in absolute_matches :
156
- self ._notify_secret (match , protected_text )
163
+ self ._notify_secret (match , code_snippet = snippet , protected_text = protected_text )
157
164
158
165
# Convert back to string
159
166
protected_string = "" .join (protected_text )
@@ -184,11 +191,23 @@ def _hide_secret(self, match: Match) -> str:
184
191
)
185
192
return f"REDACTED<${ encrypted_value } >"
186
193
187
- def _notify_secret (self , match : Match , protected_text : List [str ]) -> None :
194
+ def _notify_secret (
195
+ self , match : Match , code_snippet : Optional [CodeSnippet ], protected_text : List [str ]
196
+ ) -> None :
188
197
secret_lines = self ._get_surrounding_secret_lines (protected_text , match .line_number )
189
- notify_string = f"{ match .service } - { match .type } :\n { secret_lines } "
198
+ notify_string = (
199
+ f"**Secret Detected** 🔒\n "
200
+ f"- Service: { match .service } \n "
201
+ f"- Type: { match .type } \n "
202
+ f"- Key: { match .secret_key if match .secret_key else '(Unknown)' } \n "
203
+ f"- Line Number: { match .line_number } \n "
204
+ f"- Context:\n ```\n { secret_lines } \n ```"
205
+ )
190
206
self ._context .add_alert (
191
- self ._name , trigger_string = notify_string , severity_category = AlertSeverity .CRITICAL
207
+ self ._name ,
208
+ trigger_string = notify_string ,
209
+ severity_category = AlertSeverity .CRITICAL ,
210
+ code_snippet = code_snippet ,
192
211
)
193
212
194
213
@@ -205,7 +224,9 @@ def _hide_secret(self, match: Match) -> str:
205
224
"""
206
225
return "*" * 32
207
226
208
- def _notify_secret (self , match : Match , protected_text : List [str ]) -> None :
227
+ def _notify_secret (
228
+ self , match : Match , code_snippet : Optional [CodeSnippet ], protected_text : List [str ]
229
+ ) -> None :
209
230
pass
210
231
211
232
@@ -227,7 +248,12 @@ def name(self) -> str:
227
248
return "codegate-secrets"
228
249
229
250
def _redact_text (
230
- self , text : str , secrets_manager : SecretsManager , session_id : str , context : PipelineContext
251
+ self ,
252
+ text : str ,
253
+ snippet : Optional [CodeSnippet ],
254
+ secrets_manager : SecretsManager ,
255
+ session_id : str ,
256
+ context : PipelineContext ,
231
257
) -> tuple [str , List [Match ]]:
232
258
"""
233
259
Find and encrypt secrets in the given text.
@@ -242,7 +268,7 @@ def _redact_text(
242
268
"""
243
269
# Find secrets in the text
244
270
text_encryptor = SecretsEncryptor (secrets_manager , context , session_id )
245
- return text_encryptor .obfuscate (text )
271
+ return text_encryptor .obfuscate (text , snippet )
246
272
247
273
async def process (
248
274
self , request : ChatCompletionRequest , context : PipelineContext
@@ -273,40 +299,74 @@ async def process(
273
299
274
300
# get last user message block to get index for the first relevant user message
275
301
last_user_message = self .get_last_user_message_block (new_request , context .client )
276
- last_assistant_idx = - 1
277
- if last_user_message :
278
- _ , user_idx = last_user_message
279
- last_assistant_idx = user_idx - 1
302
+ last_assistant_idx = last_user_message [1 ] - 1 if last_user_message else - 1
280
303
281
304
# Process all messages
282
305
for i , message in enumerate (new_request ["messages" ]):
283
306
if "content" in message and message ["content" ]:
284
- # Protect the text
285
- protected_string , secrets_matched = self ._redact_text (
286
- str (message ["content" ]), secrets_manager , session_id , context
307
+ redacted_content , secrets_matched = self ._redact_message_content (
308
+ message ["content" ], secrets_manager , session_id , context
287
309
)
288
- new_request ["messages" ][i ]["content" ] = protected_string
289
-
290
- # Append the matches for messages after the last assistant message
310
+ new_request ["messages" ][i ]["content" ] = redacted_content
291
311
if i > last_assistant_idx :
292
312
total_matches += secrets_matched
313
+ new_request = self ._finalize_redaction (context , total_matches , new_request )
314
+ return PipelineResult (request = new_request , context = context )
315
+
316
+ def _redact_message_content (self , message_content , secrets_manager , session_id , context ):
317
+ # Extract any code snippets
318
+ extractor = MessageCodeExtractorFactory .create_snippet_extractor (context .client )
319
+ snippets = extractor .extract_snippets (message_content )
320
+ redacted_snippets = {}
321
+ total_matches = []
322
+
323
+ for snippet in snippets :
324
+ redacted_snippet , secrets_matched = self ._redact_text (
325
+ snippet , snippet , secrets_manager , session_id , context
326
+ )
327
+ redacted_snippets [snippet .code ] = redacted_snippet
328
+ total_matches .extend (secrets_matched )
329
+
330
+ non_snippet_parts = []
331
+ last_end = 0
332
+
333
+ for snippet in snippets :
334
+ snippet_text = snippet .code
335
+ start_index = message_content .find (snippet_text , last_end )
336
+ if start_index > last_end :
337
+ non_snippet_part = message_content [last_end :start_index ]
338
+ redacted_part , secrets_matched = self ._redact_text (
339
+ non_snippet_part , "" , secrets_manager , session_id , context
340
+ )
341
+ non_snippet_parts .append (redacted_part )
342
+ total_matches .extend (secrets_matched )
343
+
344
+ non_snippet_parts .append (redacted_snippets [snippet_text ])
345
+ last_end = start_index + len (snippet_text )
346
+
347
+ if last_end < len (message_content ):
348
+ remaining_text = message_content [last_end :]
349
+ redacted_remaining , secrets_matched = self ._redact_text (
350
+ remaining_text , "" , secrets_manager , session_id , context
351
+ )
352
+ non_snippet_parts .append (redacted_remaining )
353
+ total_matches .extend (secrets_matched )
293
354
294
- # Not count repeated secret matches
355
+ return "" .join (non_snippet_parts ), total_matches
356
+
357
+ def _finalize_redaction (self , context , total_matches , new_request ):
295
358
set_secrets_value = set (match .value for match in total_matches )
296
359
total_redacted = len (set_secrets_value )
297
360
context .secrets_found = total_redacted > 0
298
361
logger .info (f"Total secrets redacted since last assistant message: { total_redacted } " )
299
-
300
- # Store the count in context metadata
301
362
context .metadata ["redacted_secrets_count" ] = total_redacted
302
363
if total_redacted > 0 :
303
364
system_message = ChatCompletionSystemMessage (
304
365
content = Config .get_config ().prompts .secrets_redacted ,
305
366
role = "system" ,
306
367
)
307
- new_request = add_or_update_system_message (new_request , system_message , context )
308
-
309
- return PipelineResult (request = new_request , context = context )
368
+ return add_or_update_system_message (new_request , system_message , context )
369
+ return new_request
310
370
311
371
312
372
class SecretUnredactionStep (OutputPipelineStep ):
@@ -450,14 +510,13 @@ async def process_chunk(
450
510
or input_context .metadata .get ("redacted_secrets_count" , 0 ) == 0
451
511
):
452
512
return [chunk ]
513
+
453
514
tool_name = next (
454
515
(
455
516
tool .lower ()
456
517
for tool in ["Cline" , "Kodu" ]
457
518
for message in input_context .alerts_raised or []
458
519
if tool in str (message .trigger_string or "" )
459
- and "If you are Kodu"
460
- not in str (message .trigger_string or "" ) # this comes from our prompts
461
520
),
462
521
"" ,
463
522
)
0 commit comments