|
| 1 | +"""Pattern detection for destructive shell commands. |
| 2 | +
|
| 3 | +Detects dangerous patterns in shell commands using pure regex — no LLM |
| 4 | +calls, no caching, no yolo-mode checks. Covers: |
| 5 | +- Unix/Linux: rm -rf root/home, git push --mirror, git clean -fd, git reset --hard, |
| 6 | + git checkout/restore ., SQL DROP via clients, docker prune, accidental package publishes |
| 7 | +- Windows PowerShell: Remove-Item, rmdir, del, Format-Volume, Clear-Disk, registry operations |
| 8 | +- Windows CMD: rd, rmdir, del, erase with /s /q flags, format, diskpart |
| 9 | +""" |
| 10 | + |
| 11 | +import re |
| 12 | +from dataclasses import dataclass |
| 13 | + |
| 14 | + |
| 15 | +@dataclass |
| 16 | +class DestructiveCommandMatch: |
| 17 | + """Result of a destructive command pattern match.""" |
| 18 | + |
| 19 | + pattern_name: str |
| 20 | + description: str |
| 21 | + |
| 22 | + |
| 23 | +# --------------------------------------------------------------------------- |
| 24 | +# Shell-operator regex — same approach as force_push_guard |
| 25 | +# --------------------------------------------------------------------------- |
| 26 | + |
| 27 | +# Matches shell operators that precede a new command in a pipeline/chain. |
| 28 | +# E.g. "cd foo && rm -rf /" or "true || git reset --hard" |
| 29 | +# The capture ensures the command keyword follows a real shell boundary. |
| 30 | +_SHELL_OPERATOR_RE = re.compile(r"(?:^|&&|\|\||;|\|)\s*\w+", re.MULTILINE) |
| 31 | + |
| 32 | + |
| 33 | +def _is_real_command(command: str) -> bool: |
| 34 | + """Check that the destructive keyword is an actual invocation, not a string arg. |
| 35 | +
|
| 36 | + Handles compound commands like "cd foo && rm -rf /" while |
| 37 | + avoiding false positives like "echo 'rm -rf /'". |
| 38 | +
|
| 39 | + Args: |
| 40 | + command: The shell command string to inspect. |
| 41 | +
|
| 42 | + Returns: |
| 43 | + True if the command appears to be a real invocation. |
| 44 | + """ |
| 45 | + return bool(_SHELL_OPERATOR_RE.search(command)) |
| 46 | + |
| 47 | + |
| 48 | +# --------------------------------------------------------------------------- |
| 49 | +# Cheap pre-filter substrings — if none appear, bail immediately |
| 50 | +# --------------------------------------------------------------------------- |
| 51 | + |
| 52 | +_PREFILTER_SUBSTRINGS = ( |
| 53 | + # Unix/Linux |
| 54 | + "rm", |
| 55 | + "git", |
| 56 | + "docker", |
| 57 | + "drop", |
| 58 | + "npm", |
| 59 | + "yarn", |
| 60 | + "twine", |
| 61 | + "psql", |
| 62 | + "mysql", |
| 63 | + "sqlite3", |
| 64 | + # Windows PowerShell (cmdlets and common aliases) |
| 65 | + "remove-item", |
| 66 | + " ri ", |
| 67 | + "ri ", |
| 68 | + " rmdir", |
| 69 | + "del ", |
| 70 | + "erase", |
| 71 | + "format-volume", |
| 72 | + "clear-disk", |
| 73 | + "remove-itemproperty", |
| 74 | + "clear-recyclebin", |
| 75 | + "invoke-expression", |
| 76 | + " irm ", |
| 77 | + "iex", |
| 78 | + "get-childitem", |
| 79 | + # Windows CMD |
| 80 | + "rd ", |
| 81 | + "format", |
| 82 | + "diskpart", |
| 83 | + "bcdedit", |
| 84 | + "reg ", |
| 85 | + "netsh", |
| 86 | +) |
| 87 | + |
| 88 | + |
| 89 | +# --------------------------------------------------------------------------- |
| 90 | +# Pattern lists — organized by shell type |
| 91 | +# --------------------------------------------------------------------------- |
| 92 | + |
| 93 | +# Unix destructive patterns |
| 94 | +_UNIX_DESTRUCTIVE_PATTERNS: list[tuple[re.Pattern, str, str]] = [ |
| 95 | + # —— Tier 1 —————————————————————————————————————————————————————————————— |
| 96 | + # 1. rm -rf / / rm -rf /* (recursive delete of root filesystem) |
| 97 | + ( |
| 98 | + re.compile(r"\brm\b.*\s-rf?\b.*\s/\s*$"), |
| 99 | + "rm -rf /", |
| 100 | + "recursive delete of root filesystem", |
| 101 | + ), |
| 102 | + ( |
| 103 | + re.compile(r"\brm\b.*\s-rf?\b.*\s/\*\s*$"), |
| 104 | + "rm -rf /*", |
| 105 | + "recursive delete of root filesystem (glob)", |
| 106 | + ), |
| 107 | + # 2. rm -rf ~ / rm -rf ~/* (recursive delete of home directory) |
| 108 | + ( |
| 109 | + re.compile(r"\brm\b.*\s-rf?\b.*\s~\s*$"), |
| 110 | + "rm -rf ~", |
| 111 | + "recursive delete of home directory", |
| 112 | + ), |
| 113 | + ( |
| 114 | + re.compile(r"\brm\b.*\s-rf?\b.*\s~/\*\s*$"), |
| 115 | + "rm -rf ~/*", |
| 116 | + "recursive delete of home directory (glob)", |
| 117 | + ), |
| 118 | + # 3. git push --mirror (deletes remote branches not present locally) |
| 119 | + ( |
| 120 | + re.compile(r"\bgit\s+push\b.*--mirror\b"), |
| 121 | + "git push --mirror", |
| 122 | + "deletes remote branches not present locally", |
| 123 | + ), |
| 124 | + # 4. git clean -fd (deletes untracked files and directories) |
| 125 | + ( |
| 126 | + re.compile(r"\bgit\s+clean\b.*-f(?:[dxf]|\s+-?[dxf])"), |
| 127 | + "git clean -fd", |
| 128 | + "deletes untracked files and directories", |
| 129 | + ), |
| 130 | + # 5. git reset --hard (destroys all uncommitted changes) |
| 131 | + ( |
| 132 | + re.compile(r"\bgit\s+reset\b.*--hard\b"), |
| 133 | + "git reset --hard", |
| 134 | + "destroys all uncommitted changes", |
| 135 | + ), |
| 136 | + # 6. git checkout -- . / git restore . (discards all working dir changes) |
| 137 | + ( |
| 138 | + re.compile(r"\bgit\s+(?:checkout|restore)\b.*\s--?\s*\.\s*$"), |
| 139 | + "git checkout/restore .", |
| 140 | + "discards all working directory changes", |
| 141 | + ), |
| 142 | + # —— Tier 2 —————————————————————————————————————————————————————————————— |
| 143 | + # 7. DROP TABLE/DATABASE/SCHEMA via SQL client |
| 144 | + ( |
| 145 | + re.compile( |
| 146 | + r"(?:psql|mysql|sqlite3)\b.*(?:-c|-e)\b.*DROP\s+(?:TABLE|DATABASE|SCHEMA)\b", |
| 147 | + re.IGNORECASE, |
| 148 | + ), |
| 149 | + "DROP via SQL client", |
| 150 | + "drops a table/database/schema via SQL client", |
| 151 | + ), |
| 152 | + ( |
| 153 | + re.compile( |
| 154 | + r"DROP\s+(?:TABLE|DATABASE|SCHEMA)\b.*\|\s*(?:psql|mysql|sqlite3)\b", |
| 155 | + re.IGNORECASE, |
| 156 | + ), |
| 157 | + "DROP via SQL pipe", |
| 158 | + "drops a table/database/schema piped to SQL client", |
| 159 | + ), |
| 160 | + # 8. docker system prune -af / docker volume prune -f |
| 161 | + ( |
| 162 | + re.compile( |
| 163 | + r"\bdocker\s+(?:system|volume)\s+prune\b.*(?:-[af]|\s-[af]|\s--all)" |
| 164 | + ), |
| 165 | + "docker prune", |
| 166 | + "nukes Docker resources without confirmation", |
| 167 | + ), |
| 168 | + # 9. npm publish / yarn publish / twine upload |
| 169 | + ( |
| 170 | + re.compile(r"\b(?:npm|yarn)\s+publish\b"), |
| 171 | + "npm/yarn publish", |
| 172 | + "accidental package publishing", |
| 173 | + ), |
| 174 | + ( |
| 175 | + re.compile(r"\btwine\s+upload\b"), |
| 176 | + "twine upload", |
| 177 | + "accidental package publishing", |
| 178 | + ), |
| 179 | +] |
| 180 | + |
| 181 | +# Windows PowerShell destructive patterns |
| 182 | +_POWERSHELL_DESTRUCTIVE_PATTERNS: list[tuple[re.Pattern, str, str]] = [ |
| 183 | + # —— Tier 1 PowerShell ———————————————————————————————————————————————————— |
| 184 | + # 1. Remove-Item/ri with -Recurse/-r or -Force/-f flags |
| 185 | + ( |
| 186 | + re.compile( |
| 187 | + r"(?:^|[;|&])\s*(?:Remove-Item|ri)\b.*\s-(?:r|recurse|f|force)\b", |
| 188 | + re.IGNORECASE, |
| 189 | + ), |
| 190 | + "Remove-Item with recursive/force flags", |
| 191 | + "deletion with recursive or force flag", |
| 192 | + ), |
| 193 | + # 2. Remove-Item -Recurse -Force on system directories |
| 194 | + ( |
| 195 | + re.compile( |
| 196 | + r"\b(?:Remove-Item|ri)\b.*\s-(?:r|recurse)\b.*(?:C:|Windows|System32|Users|Program Files|ProgramData)", |
| 197 | + re.IGNORECASE, |
| 198 | + ), |
| 199 | + "Remove-Item on system location", |
| 200 | + "deletion operation on system directory or drive", |
| 201 | + ), |
| 202 | + # 3. Get-ChildItem piped to Remove-Item (pipeline delete) |
| 203 | + ( |
| 204 | + re.compile( |
| 205 | + r"\|\s*\b(?:Remove-Item|ri|del|erase)\b", |
| 206 | + re.IGNORECASE, |
| 207 | + ), |
| 208 | + "Piped deletion command", |
| 209 | + "deletion via pipeline (potentially recursive)", |
| 210 | + ), |
| 211 | + # 4. Format-Volume (disk formatting) |
| 212 | + ( |
| 213 | + re.compile( |
| 214 | + r"\b(?:Format-Volume|fdisk)\b", |
| 215 | + re.IGNORECASE, |
| 216 | + ), |
| 217 | + "Format-Volume", |
| 218 | + "formats a disk volume", |
| 219 | + ), |
| 220 | + # 5. Clear-Disk (wipes disk) |
| 221 | + ( |
| 222 | + re.compile( |
| 223 | + r"\bClear-Disk\b", |
| 224 | + re.IGNORECASE, |
| 225 | + ), |
| 226 | + "Clear-Disk", |
| 227 | + "removes all data and OEM recovery partitions", |
| 228 | + ), |
| 229 | + # 6. Remove-ItemProperty on critical registry paths |
| 230 | + ( |
| 231 | + re.compile( |
| 232 | + r"\b(?:Remove-ItemProperty|rp)\b.*\sHK(?:LM|CU|CR|U|CC):", |
| 233 | + re.IGNORECASE, |
| 234 | + ), |
| 235 | + "Remove-ItemProperty registry", |
| 236 | + "removes critical registry values", |
| 237 | + ), |
| 238 | + # 7. Clear-RecycleBin with -Force |
| 239 | + ( |
| 240 | + re.compile( |
| 241 | + r"\b(?:Clear-RecycleBin|recycle)\b.*\s-(?:f|force)\b", |
| 242 | + re.IGNORECASE, |
| 243 | + ), |
| 244 | + "Clear-RecycleBin -Force", |
| 245 | + "permanently deletes all recycle bin contents", |
| 246 | + ), |
| 247 | + # 8. Invoke-WebRequest / Invoke-RestMethod piped to IEX (remote code execution) |
| 248 | + ( |
| 249 | + re.compile( |
| 250 | + r"\b(?:irm|Invoke-WebRequest|iwr|Invoke-RestMethod|curl|wget)\b.*\|\s*(?:iex|Invoke-Expression)\b", |
| 251 | + re.IGNORECASE, |
| 252 | + ), |
| 253 | + "Download + Execute (IWR| IEX)", |
| 254 | + "downloads and executes remote code", |
| 255 | + ), |
| 256 | +] |
| 257 | + |
| 258 | +# Windows CMD destructive patterns |
| 259 | +_CMD_DESTRUCTIVE_PATTERNS: list[tuple[re.Pattern, str, str]] = [ |
| 260 | + # —— Tier 1 CMD ——————————————————————————————————————————————————————————— |
| 261 | + # 1. rd /s /q - recursive silent delete |
| 262 | + ( |
| 263 | + re.compile( |
| 264 | + r"\b(?:rmdir|rd)\b.*\s/s\b.*\s/q\b", |
| 265 | + re.IGNORECASE, |
| 266 | + ), |
| 267 | + "rd /s /q", |
| 268 | + "recursive silent directory delete", |
| 269 | + ), |
| 270 | + ( |
| 271 | + re.compile( |
| 272 | + r"\b(?:rmdir|rd)\b.*\s/q\b.*\s/s\b", |
| 273 | + re.IGNORECASE, |
| 274 | + ), |
| 275 | + "rd /s /q", |
| 276 | + "recursive silent directory delete", |
| 277 | + ), |
| 278 | + # 2. del /s /q /f on system directories |
| 279 | + ( |
| 280 | + re.compile( |
| 281 | + r"\b(?:del|erase)\b.*\s/s\b.*(?:Windows|System32|Program)", |
| 282 | + re.IGNORECASE, |
| 283 | + ), |
| 284 | + "del /s system files", |
| 285 | + "recursive delete of system files", |
| 286 | + ), |
| 287 | + ( |
| 288 | + re.compile( |
| 289 | + r"\b(?:del|erase)\b.*\s/f\b.*\s/s\b.*(?:Windows|System32|Program)", |
| 290 | + re.IGNORECASE, |
| 291 | + ), |
| 292 | + "del /f /s system files", |
| 293 | + "force recursive delete of system files", |
| 294 | + ), |
| 295 | + # 3. format command without confirmation |
| 296 | + ( |
| 297 | + re.compile( |
| 298 | + r"(?:^|&&|\|\||;|\|)\s*format\b.*\s(?:C:|D:|E:)", |
| 299 | + re.IGNORECASE, |
| 300 | + ), |
| 301 | + "format", |
| 302 | + "formats drive", |
| 303 | + ), |
| 304 | + ( |
| 305 | + re.compile( |
| 306 | + r"(?:^|&&|\|\||;|\|)\s*format\b.*\s/q\b.*\s(?:C:|D:|E:)", |
| 307 | + re.IGNORECASE, |
| 308 | + ), |
| 309 | + "format /q", |
| 310 | + "quick formats drive", |
| 311 | + ), |
| 312 | + # 4. diskpart invocation (almost never legitimate in automation) |
| 313 | + ( |
| 314 | + re.compile( |
| 315 | + r"\bdiskpart\b", |
| 316 | + re.IGNORECASE, |
| 317 | + ), |
| 318 | + "diskpart", |
| 319 | + "diskpart disk management tool", |
| 320 | + ), |
| 321 | + # 5. bcdedit (boot configuration) modifications |
| 322 | + ( |
| 323 | + re.compile( |
| 324 | + r"\bbcdedit\b.*\s/(?:delete|set|export|import|bootsequence)\b.*\s(?:{.*}|.*bootmgr|.*resume)", |
| 325 | + re.IGNORECASE, |
| 326 | + ), |
| 327 | + "bcdedit destructive", |
| 328 | + "modifies critical boot configuration", |
| 329 | + ), |
| 330 | + # 6. reg delete on critical keys |
| 331 | + ( |
| 332 | + re.compile( |
| 333 | + r"\breg\s+delete\b.*\sHK(?:LM|CR|CU)", |
| 334 | + re.IGNORECASE, |
| 335 | + ), |
| 336 | + "reg delete", |
| 337 | + "deletes critical registry keys", |
| 338 | + ), |
| 339 | +] |
| 340 | + |
| 341 | +# Combine all patterns |
| 342 | +_DESTRUCTIVE_PATTERNS = ( |
| 343 | + _UNIX_DESTRUCTIVE_PATTERNS |
| 344 | + + _POWERSHELL_DESTRUCTIVE_PATTERNS |
| 345 | + + _CMD_DESTRUCTIVE_PATTERNS |
| 346 | +) |
| 347 | + |
| 348 | + |
| 349 | +def detect_destructive_command(command: str) -> DestructiveCommandMatch | None: |
| 350 | + """Check if a shell command contains a destructive operation. |
| 351 | +
|
| 352 | + Uses a cheap substring pre-filter before any regex work, then verifies |
| 353 | + the command is a real invocation (not a string argument), then checks |
| 354 | + patterns first-match-wins. |
| 355 | +
|
| 356 | + Args: |
| 357 | + command: The shell command string to inspect. |
| 358 | +
|
| 359 | + Returns: |
| 360 | + DestructiveCommandMatch if a destructive pattern is found, None otherwise. |
| 361 | + """ |
| 362 | + # Quick pre-filter: bail if none of the trigger substrings appear |
| 363 | + command_lower = command.lower() |
| 364 | + if not any(sub in command_lower for sub in _PREFILTER_SUBSTRINGS): |
| 365 | + return None |
| 366 | + |
| 367 | + # Ensure the command is a real invocation, not a string argument |
| 368 | + if not _is_real_command(command): |
| 369 | + return None |
| 370 | + |
| 371 | + for pattern, name, description in _DESTRUCTIVE_PATTERNS: |
| 372 | + if pattern.search(command): |
| 373 | + return DestructiveCommandMatch(pattern_name=name, description=description) |
| 374 | + |
| 375 | + return None |
0 commit comments