mandiant · v1bh475u · Jan 30, 2025 · Jan 30, 2025 · Jan 30, 2025 · Jan 31, 2025
diff --git a/capa/features/extractors/cape/models.py b/capa/features/extractors/cape/models.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import binascii
 from typing import Any, Union, Literal, Optional, Annotated, TypeAlias
 
 from pydantic import Field, BaseModel, ConfigDict
@@ -27,7 +26,7 @@ def validate_hex_int(value):
 
 
 def validate_hex_bytes(value):
-    return binascii.unhexlify(value) if isinstance(value, str) else value
+    return bytes.fromhex(value) if isinstance(value, str) else value
 
 
 HexInt = Annotated[int, BeforeValidator(validate_hex_int)]

diff --git a/capa/features/extractors/common.py b/capa/features/extractors/common.py
@@ -15,7 +15,6 @@
 import io
 import re
 import logging
-import binascii
 import contextlib
 from typing import Iterator
 
@@ -114,7 +113,7 @@ def extract_arch(buf) -> Iterator[tuple[Feature, Address]]:
         # rules that rely on arch conditions will fail to match on shellcode.
         #
         # for (2), this logic will need to be updated as the format is implemented.
-        logger.debug("unsupported file format: %s, will not guess Arch", binascii.hexlify(buf[:4]).decode("ascii"))
+        logger.debug("unsupported file format: %s, will not guess Arch", bytes.hex(buf[:4]))
         return
 
 
@@ -145,5 +144,5 @@ def extract_os(buf, os=OS_AUTO) -> Iterator[tuple[Feature, Address]]:
         # rules that rely on OS conditions will fail to match on shellcode.
         #
         # for (2), this logic will need to be updated as the format is implemented.
-        logger.debug("unsupported file format: %s, will not guess OS", binascii.hexlify(buf[:4]).decode("ascii"))
+        logger.debug("unsupported file format: %s, will not guess OS", bytes.hex(buf[:4]))
         return
diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py
diff --git a/capa/features/extractors/ghidra/basicblock.py b/capa/features/extractors/ghidra/basicblock.py
@@ -14,7 +14,6 @@
 
 
 import string
-import struct
 from typing import Iterator
 
 import ghidra
@@ -35,13 +34,13 @@ def get_printable_len(op: ghidra.program.model.scalar.Scalar) -> int:
     op_val = op.getValue()
 
     if op_bit_len == 8:
-        chars = struct.pack("<B", op_val & 0xFF)
+        chars = (op_val & 0xFF).to_bytes(1, "little")
     elif op_bit_len == 16:
-        chars = struct.pack("<H", op_val & 0xFFFF)
+        chars = (op_val & 0xFFFF).to_bytes(2, "little")
     elif op_bit_len == 32:
-        chars = struct.pack("<I", op_val & 0xFFFFFFFF)
+        chars = (op_val & 0xFFFFFFFF).to_bytes(4, "little")
     elif op_bit_len == 64:
-        chars = struct.pack("<Q", op_val & 0xFFFFFFFFFFFFFFFF)
+        chars = (op_val & 0xFFFFFFFFFFFFFFFF).to_bytes(8, "little")
     else:
         raise ValueError(f"Unhandled operand data type 0x{op_bit_len:x}.")
 

diff --git a/capa/features/extractors/ghidra/file.py b/capa/features/extractors/ghidra/file.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import re
-import struct
 from typing import Iterator
 
 from ghidra.program.model.symbol import SourceType, SymbolType
@@ -52,7 +51,7 @@ def find_embedded_pe(block_bytez: bytes, mz_xor: list[tuple[bytes, bytes, int]])
             continue
 
         e_lfanew_bytes = block_bytez[e_lfanew : e_lfanew + 4]
-        newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(e_lfanew_bytes, i))[0]
+        newoff = int.from_bytes(capa.features.extractors.helpers.xor_static(e_lfanew_bytes, i), "little")
 
         # assume XOR'd "PE" bytes exist within threshold
         if newoff > MAX_OFFSET_PE_AFTER_MZ:

diff --git a/capa/features/extractors/helpers.py b/capa/features/extractors/helpers.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 
-import struct
 import builtins
 from typing import Iterator
 
@@ -157,7 +156,7 @@ def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[tuple[int, int]]:
         if pblen < (e_lfanew + 4):
             continue
 
-        newoff = struct.unpack("<I", xor_static(pbytes[e_lfanew : e_lfanew + 4], key))[0]
+        newoff = int.from_bytes(xor_static(pbytes[e_lfanew : e_lfanew + 4], key), "little")
 
         nextres = pbytes.find(mzx, off + 1)
         if nextres != -1:

diff --git a/capa/features/extractors/ida/basicblock.py b/capa/features/extractors/ida/basicblock.py
@@ -14,7 +14,6 @@
 
 
 import string
-import struct
 from typing import Iterator
 
 import idaapi
@@ -33,13 +32,13 @@ def get_printable_len(op: idaapi.op_t) -> int:
     op_val = capa.features.extractors.ida.helpers.mask_op_val(op)
 
     if op.dtype == idaapi.dt_byte:
-        chars = struct.pack("<B", op_val)
+        chars = (op_val).to_bytes(1, "little")
     elif op.dtype == idaapi.dt_word:
-        chars = struct.pack("<H", op_val)
+        chars = (op_val).to_bytes(2, "little")
     elif op.dtype == idaapi.dt_dword:
-        chars = struct.pack("<I", op_val)
+        chars = (op_val).to_bytes(4, "little")
     elif op.dtype == idaapi.dt_qword:
-        chars = struct.pack("<Q", op_val)
+        chars = (op_val).to_bytes(8, "little")
     else:
         raise ValueError(f"Unhandled operand data type 0x{op.dtype:x}.")
 

diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 
-import struct
 from typing import Iterator
 
 import idc
@@ -64,7 +63,7 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[tuple[int, int]]:
         if seg_max < (e_lfanew + 4):
             continue
 
-        newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i))[0]
+        newoff = int.from_bytes(capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i), "little")
 
         # assume XOR'd "PE" bytes exist within threshold
         if newoff > MAX_OFFSET_PE_AFTER_MZ:

diff --git a/capa/features/extractors/viv/basicblock.py b/capa/features/extractors/viv/basicblock.py
@@ -14,7 +14,6 @@
 
 
 import string
-import struct
 from typing import Iterator
 
 import envi
@@ -119,13 +118,13 @@ def get_printable_len(oper: envi.archs.i386.disasm.i386ImmOper) -> int:
     Return string length if all operand bytes are ascii or utf16-le printable
     """
     if oper.tsize == 1:
-        chars = struct.pack("<B", oper.imm)
+        chars = (oper.imm).to_bytes(1, "little")
     elif oper.tsize == 2:
-        chars = struct.pack("<H", oper.imm)
+        chars = (oper.imm).to_bytes(2, "little")
     elif oper.tsize == 4:
-        chars = struct.pack("<I", oper.imm)
+        chars = (oper.imm).to_bytes(4, "little")
     elif oper.tsize == 8:
-        chars = struct.pack("<Q", oper.imm)
+        chars = (oper.imm).to_bytes(8, "little")
     else:
         raise ValueError(f"unexpected oper.tsize: {oper.tsize}")
 

diff --git a/capa/features/freeze/features.py b/capa/features/freeze/features.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import binascii
 from typing import Union, Literal, Optional, Annotated
 
 from pydantic import Field, BaseModel, ConfigDict
@@ -85,7 +84,7 @@ def to_capa(self) -> capa.features.common.Feature:
             return capa.features.insn.Number(self.number, description=self.description)
 
         elif isinstance(self, BytesFeature):
-            return capa.features.common.Bytes(binascii.unhexlify(self.bytes), description=self.description)
+            return capa.features.common.Bytes(bytes.fromhex(self.bytes), description=self.description)
 
         elif isinstance(self, OffsetFeature):
             return capa.features.insn.Offset(self.offset, description=self.description)
@@ -191,7 +190,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
     elif isinstance(f, capa.features.common.Bytes):
         buf = f.value
         assert isinstance(buf, bytes)
-        return BytesFeature(bytes=binascii.hexlify(buf).decode("ascii"), description=f.description)
+        return BytesFeature(bytes=bytes.hex(buf), description=f.description)
 
     elif isinstance(f, capa.features.insn.Offset):
         assert isinstance(f.value, int)

diff --git a/scripts/import-to-ida.py b/scripts/import-to-ida.py
@@ -36,7 +36,6 @@
 """
 
 import logging
-import binascii
 from pathlib import Path
 
 import ida_nalt
@@ -85,7 +84,7 @@ def main():
     #
     # see: https://github.com/idapython/bin/issues/11
     a = meta.sample.md5.lower()
-    b = binascii.hexlify(ida_nalt.retrieve_input_file_md5()).decode("ascii").lower()
+    b = bytes.hex(ida_nalt.retrieve_input_file_md5()).lower()
     if not a.startswith(b):
         logger.error("sample mismatch")
         return -2

diff --git a/tests/fixtures.py b/tests/fixtures.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 
-import binascii
 import contextlib
 import collections
 from pathlib import Path
@@ -942,17 +941,17 @@ def parametrize(params, values, **kwargs):
         # insn/string, direct memory reference
         ("mimikatz", "function=0x46D6CE", capa.features.common.String("(null)"), True),
         # insn/bytes
-        ("mimikatz", "function=0x401517", capa.features.common.Bytes(binascii.unhexlify("CA3B0E000000F8AF47")), True),
-        ("mimikatz", "function=0x404414", capa.features.common.Bytes(binascii.unhexlify("0180000040EA4700")), True),
+        ("mimikatz", "function=0x401517", capa.features.common.Bytes(bytes.fromhex("CA3B0E000000F8AF47")), True),
+        ("mimikatz", "function=0x404414", capa.features.common.Bytes(bytes.fromhex("0180000040EA4700")), True),
         # don't extract byte features for obvious strings
         ("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardControl".encode("utf-16le")), False),
         ("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardTransmit".encode("utf-16le")), False),
         ("mimikatz", "function=0x40105D", capa.features.common.Bytes("ACR  > ".encode("utf-16le")), False),
         ("mimikatz", "function=0x40105D", capa.features.common.Bytes("nope".encode("ascii")), False),
         # push    offset aAcsAcr1220 ; "ACS..." -> where ACS == 41 00 43 00 == valid pointer to middle of instruction
-        ("mimikatz", "function=0x401000", capa.features.common.Bytes(binascii.unhexlify("FDFF59F647")), False),
+        ("mimikatz", "function=0x401000", capa.features.common.Bytes(bytes.fromhex("FDFF59F647")), False),
         # IDA features included byte sequences read from invalid memory, fixed in #409
-        ("mimikatz", "function=0x44570F", capa.features.common.Bytes(binascii.unhexlify("FF" * 256)), False),
+        ("mimikatz", "function=0x44570F", capa.features.common.Bytes(bytes.fromhex("FF" * 256)), False),
         # insn/bytes, pointer to string bytes
         ("mimikatz", "function=0x44EDEF", capa.features.common.Bytes("INPUTEVENT".encode("utf-16le")), False),
         # insn/characteristic(nzxor)

diff --git a/tests/test_binexport_features.py b/tests/test_binexport_features.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import binascii
 from typing import cast
 
 import pytest
@@ -302,7 +301,7 @@
         (
             "d1e650.ghidra.be2",
             "function=0x1165a4",
-            capa.features.common.Bytes(binascii.unhexlify("E405B89370BA6B419CD7925275BF6FCC1E8360CC")),
+            capa.features.common.Bytes(bytes.fromhex("E405B89370BA6B419CD7925275BF6FCC1E8360CC")),
             True,
         ),
         # # don't extract byte features for obvious strings

diff --git a/tests/test_ida_features.py b/tests/test_ida_features.py
@@ -60,7 +60,6 @@
 import sys
 import inspect
 import logging
-import binascii
 import traceback
 from pathlib import Path
 
@@ -86,7 +85,7 @@ def check_input_file(wanted):
     except UnicodeDecodeError:
         # in IDA 7.5 or so, GetInputFileMD5 started returning raw binary
         # rather than the hex digest
-        found = binascii.hexlify(idautils.GetInputFileMD5()[:15]).decode("ascii").lower()
+        found = bytes.hex(idautils.GetInputFileMD5()[:15]).lower()
 
     if not wanted.startswith(found):
         raise RuntimeError(f"please run the tests against sample with MD5: `{wanted}`")