From c71139f2a3723f73528bc3a803cc2281589ad7c9 Mon Sep 17 00:00:00 2001
From: vibhatsu <maulikbarot2915@gmail.com>
Date: Thu, 30 Jan 2025 23:04:56 +0530
Subject: [PATCH 1/5] refactor: replace binascii with bytes for hex conversions

Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>
---
 capa/features/extractors/cape/models.py | 3 +--
 capa/features/freeze/features.py        | 5 ++---
 scripts/import-to-ida.py                | 3 +--
 tests/fixtures.py                       | 9 ++++-----
 tests/test_binexport_features.py        | 3 +--
 tests/test_ida_features.py              | 3 +--
 6 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/capa/features/extractors/cape/models.py b/capa/features/extractors/cape/models.py
index 7117cc935..bfb3e21d6 100644
--- a/capa/features/extractors/cape/models.py
+++ b/capa/features/extractors/cape/models.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import binascii
 from typing import Any, Union, Literal, Optional, Annotated, TypeAlias
 
 from pydantic import Field, BaseModel, ConfigDict
@@ -27,7 +26,7 @@ def validate_hex_int(value):
 
 
 def validate_hex_bytes(value):
-    return binascii.unhexlify(value) if isinstance(value, str) else value
+    return bytes.fromhex(value) if isinstance(value, str) else value
 
 
 HexInt = Annotated[int, BeforeValidator(validate_hex_int)]
diff --git a/capa/features/freeze/features.py b/capa/features/freeze/features.py
index 683023944..151964e55 100644
--- a/capa/features/freeze/features.py
+++ b/capa/features/freeze/features.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import binascii
 from typing import Union, Literal, Optional, Annotated
 
 from pydantic import Field, BaseModel, ConfigDict
@@ -85,7 +84,7 @@ def to_capa(self) -> capa.features.common.Feature:
             return capa.features.insn.Number(self.number, description=self.description)
 
         elif isinstance(self, BytesFeature):
-            return capa.features.common.Bytes(binascii.unhexlify(self.bytes), description=self.description)
+            return capa.features.common.Bytes(bytes.fromhex(self.bytes), description=self.description)
 
         elif isinstance(self, OffsetFeature):
             return capa.features.insn.Offset(self.offset, description=self.description)
@@ -191,7 +190,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature":
     elif isinstance(f, capa.features.common.Bytes):
         buf = f.value
         assert isinstance(buf, bytes)
-        return BytesFeature(bytes=binascii.hexlify(buf).decode("ascii"), description=f.description)
+        return BytesFeature(bytes=bytes.hex(buf), description=f.description)
 
     elif isinstance(f, capa.features.insn.Offset):
         assert isinstance(f.value, int)
diff --git a/scripts/import-to-ida.py b/scripts/import-to-ida.py
index 89ba19454..3c468c414 100644
--- a/scripts/import-to-ida.py
+++ b/scripts/import-to-ida.py
@@ -36,7 +36,6 @@
 """
 
 import logging
-import binascii
 from pathlib import Path
 
 import ida_nalt
@@ -85,7 +84,7 @@ def main():
     #
     # see: https://github.com/idapython/bin/issues/11
     a = meta.sample.md5.lower()
-    b = binascii.hexlify(ida_nalt.retrieve_input_file_md5()).decode("ascii").lower()
+    b = bytes.hex(ida_nalt.retrieve_input_file_md5()).lower()
     if not a.startswith(b):
         logger.error("sample mismatch")
         return -2
diff --git a/tests/fixtures.py b/tests/fixtures.py
index 187a5f05f..b9199061d 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 
-import binascii
 import contextlib
 import collections
 from pathlib import Path
@@ -942,17 +941,17 @@ def parametrize(params, values, **kwargs):
         # insn/string, direct memory reference
         ("mimikatz", "function=0x46D6CE", capa.features.common.String("(null)"), True),
         # insn/bytes
-        ("mimikatz", "function=0x401517", capa.features.common.Bytes(binascii.unhexlify("CA3B0E000000F8AF47")), True),
-        ("mimikatz", "function=0x404414", capa.features.common.Bytes(binascii.unhexlify("0180000040EA4700")), True),
+        ("mimikatz", "function=0x401517", capa.features.common.Bytes(bytes.fromhex("CA3B0E000000F8AF47")), True),
+        ("mimikatz", "function=0x404414", capa.features.common.Bytes(bytes.fromhex("0180000040EA4700")), True),
         # don't extract byte features for obvious strings
         ("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardControl".encode("utf-16le")), False),
         ("mimikatz", "function=0x40105D", capa.features.common.Bytes("SCardTransmit".encode("utf-16le")), False),
         ("mimikatz", "function=0x40105D", capa.features.common.Bytes("ACR  > ".encode("utf-16le")), False),
         ("mimikatz", "function=0x40105D", capa.features.common.Bytes("nope".encode("ascii")), False),
         # push    offset aAcsAcr1220 ; "ACS..." -> where ACS == 41 00 43 00 == valid pointer to middle of instruction
-        ("mimikatz", "function=0x401000", capa.features.common.Bytes(binascii.unhexlify("FDFF59F647")), False),
+        ("mimikatz", "function=0x401000", capa.features.common.Bytes(bytes.fromhex("FDFF59F647")), False),
         # IDA features included byte sequences read from invalid memory, fixed in #409
-        ("mimikatz", "function=0x44570F", capa.features.common.Bytes(binascii.unhexlify("FF" * 256)), False),
+        ("mimikatz", "function=0x44570F", capa.features.common.Bytes(bytes.fromhex("FF" * 256)), False),
         # insn/bytes, pointer to string bytes
         ("mimikatz", "function=0x44EDEF", capa.features.common.Bytes("INPUTEVENT".encode("utf-16le")), False),
         # insn/characteristic(nzxor)
diff --git a/tests/test_binexport_features.py b/tests/test_binexport_features.py
index 8e76732ce..2695230c0 100644
--- a/tests/test_binexport_features.py
+++ b/tests/test_binexport_features.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import binascii
 from typing import cast
 
 import pytest
@@ -302,7 +301,7 @@
         (
             "d1e650.ghidra.be2",
             "function=0x1165a4",
-            capa.features.common.Bytes(binascii.unhexlify("E405B89370BA6B419CD7925275BF6FCC1E8360CC")),
+            capa.features.common.Bytes(bytes.fromhex("E405B89370BA6B419CD7925275BF6FCC1E8360CC")),
             True,
         ),
         # # don't extract byte features for obvious strings
diff --git a/tests/test_ida_features.py b/tests/test_ida_features.py
index da1a2ca4b..5e2f7380a 100644
--- a/tests/test_ida_features.py
+++ b/tests/test_ida_features.py
@@ -60,7 +60,6 @@
 import sys
 import inspect
 import logging
-import binascii
 import traceback
 from pathlib import Path
 
@@ -86,7 +85,7 @@ def check_input_file(wanted):
     except UnicodeDecodeError:
         # in IDA 7.5 or so, GetInputFileMD5 started returning raw binary
         # rather than the hex digest
-        found = binascii.hexlify(idautils.GetInputFileMD5()[:15]).decode("ascii").lower()
+        found = bytes.hex(idautils.GetInputFileMD5()[:15]).lower()
 
     if not wanted.startswith(found):
         raise RuntimeError(f"please run the tests against sample with MD5: `{wanted}`")

From 483f8c9a85879d0d2eb0ac34fd5a5c634e4836f8 Mon Sep 17 00:00:00 2001
From: vibhatsu <maulikbarot2915@gmail.com>
Date: Thu, 30 Jan 2025 23:07:33 +0530
Subject: [PATCH 2/5] refactor: replace struct unpacking with bytes conversion

Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>
---
 capa/features/extractors/common.py            |   5 +-
 capa/features/extractors/elf.py               | 247 ++++++++++++------
 capa/features/extractors/ghidra/basicblock.py |   9 +-
 capa/features/extractors/ghidra/file.py       |   3 +-
 capa/features/extractors/helpers.py           |   3 +-
 capa/features/extractors/ida/basicblock.py    |   9 +-
 capa/features/extractors/ida/file.py          |   3 +-
 capa/features/extractors/viv/basicblock.py    |   9 +-
 8 files changed, 190 insertions(+), 98 deletions(-)

diff --git a/capa/features/extractors/common.py b/capa/features/extractors/common.py
index f8918b8d8..9f25243d9 100644
--- a/capa/features/extractors/common.py
+++ b/capa/features/extractors/common.py
@@ -15,7 +15,6 @@
 import io
 import re
 import logging
-import binascii
 import contextlib
 from typing import Iterator
 
@@ -114,7 +113,7 @@ def extract_arch(buf) -> Iterator[tuple[Feature, Address]]:
         # rules that rely on arch conditions will fail to match on shellcode.
         #
         # for (2), this logic will need to be updated as the format is implemented.
-        logger.debug("unsupported file format: %s, will not guess Arch", binascii.hexlify(buf[:4]).decode("ascii"))
+        logger.debug("unsupported file format: %s, will not guess Arch", bytes.hex(buf[:4]))
         return
 
 
@@ -145,5 +144,5 @@ def extract_os(buf, os=OS_AUTO) -> Iterator[tuple[Feature, Address]]:
         # rules that rely on OS conditions will fail to match on shellcode.
         #
         # for (2), this logic will need to be updated as the format is implemented.
-        logger.debug("unsupported file format: %s, will not guess OS", binascii.hexlify(buf[:4]).decode("ascii"))
+        logger.debug("unsupported file format: %s, will not guess OS", bytes.hex(buf[:4]))
         return
diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py
index a3d52082e..15e655b23 100644
--- a/capa/features/extractors/elf.py
+++ b/capa/features/extractors/elf.py
@@ -12,12 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import struct
 import logging
 import itertools
 import collections
 from enum import Enum
-from typing import TYPE_CHECKING, BinaryIO, Iterator, Optional
+from typing import TYPE_CHECKING, Literal, BinaryIO, Iterator, Optional
 from dataclasses import dataclass
 
 if TYPE_CHECKING:
@@ -132,7 +131,7 @@ def __init__(self, f: BinaryIO):
 
         # these will all be initialized in `_parse()`
         self.bitness: int
-        self.endian: str
+        self.endian: Literal["little", "big"]
         self.e_phentsize: int
         self.e_phnum: int
         self.e_shentsize: int
@@ -150,7 +149,7 @@ def _parse(self):
         if not self.file_header.startswith(b"\x7fELF"):
             raise CorruptElfFile("missing magic header")
 
-        ei_class, ei_data = struct.unpack_from("BB", self.file_header, 4)
+        ei_class, ei_data = int.from_bytes(self.file_header[4:5]), int.from_bytes(self.file_header[5:6])
         logger.debug("ei_class: 0x%02x ei_data: 0x%02x", ei_class, ei_data)
         if ei_class == 1:
             self.bitness = 32
@@ -160,24 +159,28 @@ def _parse(self):
             raise CorruptElfFile(f"invalid ei_class: 0x{ei_class:02x}")
 
         if ei_data == 1:
-            self.endian = "<"
+            self.endian = "little"
         elif ei_data == 2:
-            self.endian = ">"
+            self.endian = "big"
         else:
             raise CorruptElfFile(f"not an ELF file: invalid ei_data: 0x{ei_data:02x}")
 
         if self.bitness == 32:
-            e_phoff, e_shoff = struct.unpack_from(self.endian + "II", self.file_header, 0x1C)
-            self.e_phentsize, self.e_phnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x2A)
-            self.e_shentsize, self.e_shnum, self.e_shstrndx = struct.unpack_from(
-                self.endian + "HHH", self.file_header, 0x2E
-            )
+            e_phoff = int.from_bytes(self.file_header[0x1C:0x20], byteorder=self.endian, signed=False)
+            e_shoff = int.from_bytes(self.file_header[0x20:0x24], byteorder=self.endian, signed=False)
+            self.e_phentsize = int.from_bytes(self.file_header[0x2A:0x2C], byteorder=self.endian, signed=False)
+            self.e_phnum = int.from_bytes(self.file_header[0x2C:0x2E], byteorder=self.endian, signed=False)
+            self.e_shentsize = int.from_bytes(self.file_header[0x2E:0x30], byteorder=self.endian, signed=False)
+            self.e_shnum = int.from_bytes(self.file_header[0x30:0x32], byteorder=self.endian, signed=False)
+            self.e_shstrndx = int.from_bytes(self.file_header[0x32:0x34], byteorder=self.endian, signed=False)
         elif self.bitness == 64:
-            e_phoff, e_shoff = struct.unpack_from(self.endian + "QQ", self.file_header, 0x20)
-            self.e_phentsize, self.e_phnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x36)
-            self.e_shentsize, self.e_shnum, self.e_shstrndx = struct.unpack_from(
-                self.endian + "HHH", self.file_header, 0x3A
-            )
+            e_phoff = int.from_bytes(self.file_header[0x20:0x28], byteorder=self.endian, signed=False)
+            e_shoff = int.from_bytes(self.file_header[0x28:0x30], byteorder=self.endian, signed=False)
+            self.e_phentsize = int.from_bytes(self.file_header[0x36:0x38], byteorder=self.endian, signed=False)
+            self.e_phnum = int.from_bytes(self.file_header[0x38:0x3A], byteorder=self.endian, signed=False)
+            self.e_shentsize = int.from_bytes(self.file_header[0x3A:0x3C], byteorder=self.endian, signed=False)
+            self.e_shnum = int.from_bytes(self.file_header[0x3C:0x3E], byteorder=self.endian, signed=False)
+            self.e_shstrndx = int.from_bytes(self.file_header[0x3E:0x40], byteorder=self.endian, signed=False)
         else:
             raise NotImplementedError()
 
@@ -227,7 +230,7 @@ def _parse(self):
 
     @property
     def ei_osabi(self) -> Optional[OS]:
-        (ei_osabi,) = struct.unpack_from(self.endian + "B", self.file_header, 7)
+        ei_osabi = int.from_bytes(self.file_header[7:8], byteorder=self.endian, signed=False)
         return ELF.OSABI.get(ei_osabi)
 
     MACHINE = {
@@ -324,7 +327,7 @@ def ei_osabi(self) -> Optional[OS]:
 
     @property
     def e_machine(self) -> Optional[str]:
-        (e_machine,) = struct.unpack_from(self.endian + "H", self.file_header, 0x12)
+        (e_machine,) = (int.from_bytes(self.file_header[0x12:0x14], byteorder=self.endian, signed=False),)
         return ELF.MACHINE.get(e_machine)
 
     def parse_program_header(self, i) -> Phdr:
@@ -332,13 +335,21 @@ def parse_program_header(self, i) -> Phdr:
         phent = self.phbuf[phent_offset : phent_offset + self.e_phentsize]
 
         if self.bitness == 32:
-            p_type, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz, p_flags = struct.unpack_from(
-                self.endian + "IIIIIII", phent, 0x0
-            )
+            p_type = int.from_bytes(phent[0:4], byteorder=self.endian, signed=False)
+            p_offset = int.from_bytes(phent[4:8], byteorder=self.endian, signed=False)
+            p_vaddr = int.from_bytes(phent[8:12], byteorder=self.endian, signed=False)
+            p_paddr = int.from_bytes(phent[12:16], byteorder=self.endian, signed=False)
+            p_filesz = int.from_bytes(phent[16:20], byteorder=self.endian, signed=False)
+            p_memsz = int.from_bytes(phent[20:24], byteorder=self.endian, signed=False)
+            p_flags = int.from_bytes(phent[24:28], byteorder=self.endian, signed=False)
         elif self.bitness == 64:
-            p_type, p_flags, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz = struct.unpack_from(
-                self.endian + "IIQQQQQ", phent, 0x0
-            )
+            p_type = int.from_bytes(phent[0:4], byteorder=self.endian, signed=False)
+            p_flags = int.from_bytes(phent[4:8], byteorder=self.endian, signed=False)
+            p_offset = int.from_bytes(phent[8:16], byteorder=self.endian, signed=False)
+            p_vaddr = int.from_bytes(phent[16:24], byteorder=self.endian, signed=False)
+            p_paddr = int.from_bytes(phent[24:32], byteorder=self.endian, signed=False)
+            p_filesz = int.from_bytes(phent[32:40], byteorder=self.endian, signed=False)
+            p_memsz = int.from_bytes(phent[40:48], byteorder=self.endian, signed=False)
         else:
             raise NotImplementedError()
 
@@ -362,13 +373,23 @@ def parse_section_header(self, i) -> Shdr:
         shent = self.shbuf[shent_offset : shent_offset + self.e_shentsize]
 
         if self.bitness == 32:
-            sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link, _, _, sh_entsize = struct.unpack_from(
-                self.endian + "IIIIIIIIII", shent, 0x0
-            )
+            sh_name = int.from_bytes(shent[0:4], byteorder=self.endian, signed=False)
+            sh_type = int.from_bytes(shent[4:8], byteorder=self.endian, signed=False)
+            sh_flags = int.from_bytes(shent[8:12], byteorder=self.endian, signed=False)
+            sh_addr = int.from_bytes(shent[12:16], byteorder=self.endian, signed=False)
+            sh_offset = int.from_bytes(shent[16:20], byteorder=self.endian, signed=False)
+            sh_size = int.from_bytes(shent[20:24], byteorder=self.endian, signed=False)
+            sh_link = int.from_bytes(shent[24:28], byteorder=self.endian, signed=False)
+            sh_entsize = int.from_bytes(shent[36:40], byteorder=self.endian, signed=False)
         elif self.bitness == 64:
-            sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link, _, _, sh_entsize = struct.unpack_from(
-                self.endian + "IIQQQQIIQQ", shent, 0x0
-            )
+            sh_name = int.from_bytes(shent[0:4], byteorder=self.endian, signed=False)
+            sh_type = int.from_bytes(shent[4:8], byteorder=self.endian, signed=False)
+            sh_flags = int.from_bytes(shent[8:16], byteorder=self.endian, signed=False)
+            sh_addr = int.from_bytes(shent[16:24], byteorder=self.endian, signed=False)
+            sh_offset = int.from_bytes(shent[24:32], byteorder=self.endian, signed=False)
+            sh_size = int.from_bytes(shent[32:40], byteorder=self.endian, signed=False)
+            sh_link = int.from_bytes(shent[40:44], byteorder=self.endian, signed=False)
+            sh_entsize = int.from_bytes(shent[56:64], byteorder=self.endian, signed=False)
         else:
             raise NotImplementedError()
 
@@ -426,9 +447,11 @@ def versions_needed(self) -> dict[str, set[str]]:
             vn_offset = 0x0
             while True:
                 # ElfXX_Verneed layout is the same on 32 and 64 bit
-                vn_version, vn_cnt, vn_file, vn_aux, vn_next = struct.unpack_from(
-                    self.endian + "HHIII", shdr.buf, vn_offset
-                )
+                vn_version = int.from_bytes(shdr.buf[vn_offset : vn_offset + 2], byteorder=self.endian, signed=False)
+                vn_cnt = int.from_bytes(shdr.buf[vn_offset + 2 : vn_offset + 4], byteorder=self.endian, signed=False)
+                vn_file = int.from_bytes(shdr.buf[vn_offset + 4 : vn_offset + 8], byteorder=self.endian, signed=False)
+                vn_aux = int.from_bytes(shdr.buf[vn_offset + 8 : vn_offset + 12], byteorder=self.endian, signed=False)
+                vn_next = int.from_bytes(shdr.buf[vn_offset + 12 : vn_offset + 16], byteorder=self.endian, signed=False)
                 if vn_version != 1:
                     # unexpected format, don't try to keep parsing
                     break
@@ -442,7 +465,12 @@ def versions_needed(self) -> dict[str, set[str]]:
                 vna_offset = vn_offset + vn_aux
                 for _ in range(vn_cnt):
                     # ElfXX_Vernaux layout is the same on 32 and 64 bit
-                    _, _, _, vna_name, vna_next = struct.unpack_from(self.endian + "IHHII", shdr.buf, vna_offset)
+                    vna_name = int.from_bytes(
+                        shdr.buf[vna_offset + 8 : vna_offset + 12], byteorder=self.endian, signed=False
+                    )
+                    vna_next = int.from_bytes(
+                        shdr.buf[vna_offset + 12 : vna_offset + 16], byteorder=self.endian, signed=False
+                    )
 
                     # ABI names, like: "GLIBC_2.2.5"
                     abi = read_cstr(linked_shdr.buf, vna_name)
@@ -473,10 +501,12 @@ def dynamic_entries(self) -> Iterator[tuple[int, int]]:
             offset = 0x0
             while True:
                 if self.bitness == 32:
-                    d_tag, d_val = struct.unpack_from(self.endian + "II", phdr.buf, offset)
+                    d_tag = int.from_bytes(phdr.buf[offset : offset + 4], byteorder=self.endian, signed=False)
+                    d_val = int.from_bytes(phdr.buf[offset + 4 : offset + 8], byteorder=self.endian, signed=False)
                     offset += 8
                 elif self.bitness == 64:
-                    d_tag, d_val = struct.unpack_from(self.endian + "QQ", phdr.buf, offset)
+                    d_tag = int.from_bytes(phdr.buf[offset : offset + 8], byteorder=self.endian, signed=False)
+                    d_val = int.from_bytes(phdr.buf[offset + 8 : offset + 16], byteorder=self.endian, signed=False)
                     offset += 16
                 else:
                     raise NotImplementedError()
@@ -580,7 +610,7 @@ class ABITag:
 
 
 class PHNote:
-    def __init__(self, endian: str, buf: bytes):
+    def __init__(self, endian: Literal["big", "little"], buf: bytes):
         self.endian = endian
         self.buf = buf
 
@@ -592,7 +622,9 @@ def __init__(self, endian: str, buf: bytes):
         self._parse()
 
     def _parse(self):
-        namesz, self.descsz, self.type_ = struct.unpack_from(self.endian + "III", self.buf, 0x0)
+        namesz = int.from_bytes(self.buf[0x0:0x4], byteorder=self.endian, signed=False)
+        self.descsz = int.from_bytes(self.buf[0x4:0x8], byteorder=self.endian, signed=False)
+        self.type_ = int.from_bytes(self.buf[0x8:0xC], byteorder=self.endian, signed=False)
         name_offset = 0xC
         self.desc_offset = name_offset + align(namesz, 0x4)
 
@@ -616,7 +648,10 @@ def abi_tag(self) -> Optional[ABITag]:
             return None
 
         desc = self.buf[self.desc_offset : self.desc_offset + self.descsz]
-        abi_tag, kmajor, kminor, kpatch = struct.unpack_from(self.endian + "IIII", desc, 0x0)
+        abi_tag = int.from_bytes(desc[0:4], byteorder=self.endian, signed=False)
+        kmajor = int.from_bytes(desc[4:8], byteorder=self.endian, signed=False)
+        kminor = int.from_bytes(desc[8:12], byteorder=self.endian, signed=False)
+        kpatch = int.from_bytes(desc[12:16], byteorder=self.endian, signed=False)
         logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag)
 
         os = GNU_ABI_TAG.get(abi_tag)
@@ -629,7 +664,7 @@ def abi_tag(self) -> Optional[ABITag]:
 
 
 class SHNote:
-    def __init__(self, endian: str, buf: bytes):
+    def __init__(self, endian: Literal["big", "little"], buf: bytes):
         self.endian = endian
         self.buf = buf
 
@@ -641,7 +676,9 @@ def __init__(self, endian: str, buf: bytes):
         self._parse()
 
     def _parse(self):
-        namesz, self.descsz, self.type_ = struct.unpack_from(self.endian + "III", self.buf, 0x0)
+        namesz = int.from_bytes(self.buf[0x0:0x4], byteorder=self.endian, signed=False)
+        self.descsz = int.from_bytes(self.buf[0x4:0x8], byteorder=self.endian, signed=False)
+        self.type_ = int.from_bytes(self.buf[0x8:0xC], byteorder=self.endian, signed=False)
         name_offset = 0xC
         self.desc_offset = name_offset + align(namesz, 0x4)
 
@@ -660,7 +697,10 @@ def abi_tag(self) -> Optional[ABITag]:
             return None
 
         desc = self.buf[self.desc_offset : self.desc_offset + self.descsz]
-        abi_tag, kmajor, kminor, kpatch = struct.unpack_from(self.endian + "IIII", desc, 0x0)
+        abi_tag = int.from_bytes(desc[0:4], byteorder=self.endian, signed=False)
+        kmajor = int.from_bytes(desc[4:8], byteorder=self.endian, signed=False)
+        kminor = int.from_bytes(desc[8:12], byteorder=self.endian, signed=False)
+        kpatch = int.from_bytes(desc[12:16], byteorder=self.endian, signed=False)
         logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag)
 
         os = GNU_ABI_TAG.get(abi_tag)
@@ -684,7 +724,7 @@ class Symbol:
 class SymTab:
     def __init__(
         self,
-        endian: str,
+        endian: Literal["big", "little"],
         bitness: int,
         symtab: Shdr,
         strtab: Shdr,
@@ -696,7 +736,7 @@ def __init__(
 
         self._parse(endian, bitness, symtab.buf)
 
-    def _parse(self, endian: str, bitness: int, symtab_buf: bytes) -> None:
+    def _parse(self, endian: Literal["big", "little"], bitness: int, symtab_buf: bytes) -> None:
         """
         return the symbol's information in
         the order specified by sys/elf32.h
@@ -706,12 +746,62 @@ def _parse(self, endian: str, bitness: int, symtab_buf: bytes) -> None:
 
         for i in range(int(len(self.symtab.buf) / self.symtab.entsize)):
             if bitness == 32:
-                name_offset, value, size, info, other, shndx = struct.unpack_from(
-                    endian + "IIIBBH", symtab_buf, i * self.symtab.entsize
+                name_offset = int.from_bytes(
+                    symtab_buf[i * self.symtab.entsize : i * self.symtab.entsize + 4], byteorder=endian, signed=False
+                )
+                value = int.from_bytes(
+                    symtab_buf[i * self.symtab.entsize + 4 : i * self.symtab.entsize + 8],
+                    byteorder=endian,
+                    signed=False,
+                )
+                size = int.from_bytes(
+                    symtab_buf[i * self.symtab.entsize + 8 : i * self.symtab.entsize + 12],
+                    byteorder=endian,
+                    signed=False,
+                )
+                info = int.from_bytes(
+                    symtab_buf[i * self.symtab.entsize + 12 : i * self.symtab.entsize + 13],
+                    byteorder=endian,
+                    signed=False,
+                )
+                other = int.from_bytes(
+                    symtab_buf[i * self.symtab.entsize + 13 : i * self.symtab.entsize + 14],
+                    byteorder=endian,
+                    signed=False,
+                )
+                shndx = int.from_bytes(
+                    symtab_buf[i * self.symtab.entsize + 14 : i * self.symtab.entsize + 16],
+                    byteorder=endian,
+                    signed=False,
                 )
             elif bitness == 64:
-                name_offset, info, other, shndx, value, size = struct.unpack_from(
-                    endian + "IBBHQQ", symtab_buf, i * self.symtab.entsize
+                name_offset = int.from_bytes(
+                    symtab_buf[i * self.symtab.entsize : i * self.symtab.entsize + 4], byteorder=endian, signed=False
+                )
+                info = int.from_bytes(
+                    symtab_buf[i * self.symtab.entsize + 4 : i * self.symtab.entsize + 5],
+                    byteorder=endian,
+                    signed=False,
+                )
+                other = int.from_bytes(
+                    symtab_buf[i * self.symtab.entsize + 5 : i * self.symtab.entsize + 6],
+                    byteorder=endian,
+                    signed=False,
+                )
+                shndx = int.from_bytes(
+                    symtab_buf[i * self.symtab.entsize + 6 : i * self.symtab.entsize + 8],
+                    byteorder=endian,
+                    signed=False,
+                )
+                value = int.from_bytes(
+                    symtab_buf[i * self.symtab.entsize + 8 : i * self.symtab.entsize + 16],
+                    byteorder=endian,
+                    signed=False,
+                )
+                size = int.from_bytes(
+                    symtab_buf[i * self.symtab.entsize + 16 : i * self.symtab.entsize + 24],
+                    byteorder=endian,
+                    signed=False,
                 )
 
             self.symbols.append(Symbol(name_offset, value, size, info, other, shndx))
@@ -739,7 +829,7 @@ def get_symbols(self) -> Iterator[Symbol]:
 
     @classmethod
     def from_viv(cls, elf: "Elf.Elf") -> Optional["SymTab"]:
-        endian = "<" if elf.getEndian() == 0 else ">"
+        endian: Literal["big", "little"] = "little" if elf.getEndian() == 0 else "big"
         bitness = elf.bits
 
         SHT_SYMTAB = 0x2
@@ -1034,12 +1124,13 @@ def read_data(elf: ELF, rva: int, size: int) -> Optional[bytes]:
 
 
 def read_go_slice(elf: ELF, rva: int) -> Optional[bytes]:
+    psize: int = 0
     if elf.bitness == 32:
         struct_size = 8
-        struct_format = elf.endian + "II"
+        psize = 4
     elif elf.bitness == 64:
         struct_size = 16
-        struct_format = elf.endian + "QQ"
+        psize = 8
     else:
         raise ValueError("invalid psize")
 
@@ -1047,7 +1138,8 @@ def read_go_slice(elf: ELF, rva: int) -> Optional[bytes]:
     if not struct_buf:
         return None
 
-    addr, length = struct.unpack_from(struct_format, struct_buf, 0)
+    addr = int.from_bytes(struct_buf[0:psize], byteorder=elf.endian, signed=False)
+    length = int.from_bytes(struct_buf[psize : psize * 2], byteorder=elf.endian, signed=False)
 
     return read_data(elf, addr, length)
 
@@ -1096,7 +1188,12 @@ def guess_os_from_go_buildinfo(elf: ELF) -> Optional[OS]:
         logger.debug("go buildinfo: no buildinfo magic")
         return None
 
-    psize, flags = struct.unpack_from("<bb", buf, index + len(BUILDINFO_MAGIC))
+    psize = int.from_bytes(
+        buf[index + len(BUILDINFO_MAGIC) : index + len(BUILDINFO_MAGIC) + 1], byteorder="little", signed=True
+    )
+    flags = int.from_bytes(
+        buf[index + len(BUILDINFO_MAGIC) + 1 : index + len(BUILDINFO_MAGIC) + 2], byteorder="little", signed=True
+    )
     assert psize in (4, 8)
     is_big_endian = flags & 0b01
     has_inline_strings = flags & 0b10
@@ -1143,27 +1240,29 @@ def guess_os_from_go_buildinfo(elf: ELF) -> Optional[OS]:
         # This is the uncommon path. Most samples will have an inline GOOS string.
         #
         # To find samples on VT, use the referenced VTGrep content searches.
-        info_format = {
-            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 00}
-            # like: 71e617e5cc7fda89bf67422ff60f437e9d54622382c5ed6ff31f75e601f9b22e
-            # in which the modinfo doesn't have GOOS.
-            (4, False): "<II",
-            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 00}
-            # like: 93d3b3e2a904c6c909e20f2f76c3c2e8d0c81d535eb46e5493b5701f461816c3
-            # in which the modinfo doesn't have GOOS.
-            (8, False): "<QQ",
-            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 01}
-            # (no matches on VT today)
-            (4, True): ">II",
-            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 01}
-            # like: d44ba497964050c0e3dd2a192c511e4c3c4f17717f0322a554d64b797ee4690a
-            # in which the modinfo doesn't have GOOS.
-            (8, True): ">QQ",
-        }
-
-        build_version_address, modinfo_address = struct.unpack_from(
-            info_format[(psize, is_big_endian)], buf, index + 0x10
-        )
+        # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 00}
+        # like: 71e617e5cc7fda89bf67422ff60f437e9d54622382c5ed6ff31f75e601f9b22e
+        # in which the modinfo doesn't have GOOS.
+        # 4 byte size and little endian
+        # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 00}
+        # like: 93d3b3e2a904c6c909e20f2f76c3c2e8d0c81d535eb46e5493b5701f461816c3
+        # in which the modinfo doesn't have GOOS.
+        # 8 byte size and little endian
+        # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 01}
+        # (no matches on VT today)
+        # 4 byte size and little endian
+        # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 01}
+        # like: d44ba497964050c0e3dd2a192c511e4c3c4f17717f0322a554d64b797ee4690a
+        # in which the modinfo doesn't have GOOS.
+        # 8 byte size and big endian
+
+        endian: Literal["big", "little"] = "big" if is_big_endian else "little"
+        if psize == 4:
+            build_version_address = int.from_bytes(buf[index + 0x10 : index + 0x14], byteorder=endian, signed=False)
+            modinfo_address = int.from_bytes(buf[index + 0x14 : index + 0x18], byteorder=endian, signed=False)
+        else:  # psize == 8
+            build_version_address = int.from_bytes(buf[index + 0x10 : index + 0x18], byteorder=endian, signed=False)
+            modinfo_address = int.from_bytes(buf[index + 0x18 : index + 0x20], byteorder=endian, signed=False)
         logger.debug("go buildinfo: build version address: 0x%x", build_version_address)
         logger.debug("go buildinfo: modinfo address: 0x%x", modinfo_address)
 
diff --git a/capa/features/extractors/ghidra/basicblock.py b/capa/features/extractors/ghidra/basicblock.py
index 25b73ee43..904c20e2e 100644
--- a/capa/features/extractors/ghidra/basicblock.py
+++ b/capa/features/extractors/ghidra/basicblock.py
@@ -14,7 +14,6 @@
 
 
 import string
-import struct
 from typing import Iterator
 
 import ghidra
@@ -35,13 +34,13 @@ def get_printable_len(op: ghidra.program.model.scalar.Scalar) -> int:
     op_val = op.getValue()
 
     if op_bit_len == 8:
-        chars = struct.pack("<B", op_val & 0xFF)
+        chars = (op_val & 0xFF).to_bytes(1, "little")
     elif op_bit_len == 16:
-        chars = struct.pack("<H", op_val & 0xFFFF)
+        chars = (op_val & 0xFFFF).to_bytes(2, "little")
     elif op_bit_len == 32:
-        chars = struct.pack("<I", op_val & 0xFFFFFFFF)
+        chars = (op_val & 0xFFFFFFFF).to_bytes(4, "little")
     elif op_bit_len == 64:
-        chars = struct.pack("<Q", op_val & 0xFFFFFFFFFFFFFFFF)
+        chars = (op_val & 0xFFFFFFFFFFFFFFFF).to_bytes(8, "little")
     else:
         raise ValueError(f"Unhandled operand data type 0x{op_bit_len:x}.")
 
diff --git a/capa/features/extractors/ghidra/file.py b/capa/features/extractors/ghidra/file.py
index a1c088c32..d509a0f47 100644
--- a/capa/features/extractors/ghidra/file.py
+++ b/capa/features/extractors/ghidra/file.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import re
-import struct
 from typing import Iterator
 
 from ghidra.program.model.symbol import SourceType, SymbolType
@@ -52,7 +51,7 @@ def find_embedded_pe(block_bytez: bytes, mz_xor: list[tuple[bytes, bytes, int]])
             continue
 
         e_lfanew_bytes = block_bytez[e_lfanew : e_lfanew + 4]
-        newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(e_lfanew_bytes, i))[0]
+        newoff = int.from_bytes(capa.features.extractors.helpers.xor_static(e_lfanew_bytes, i), "little")
 
         # assume XOR'd "PE" bytes exist within threshold
         if newoff > MAX_OFFSET_PE_AFTER_MZ:
diff --git a/capa/features/extractors/helpers.py b/capa/features/extractors/helpers.py
index eb546f504..f764a64ad 100644
--- a/capa/features/extractors/helpers.py
+++ b/capa/features/extractors/helpers.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 
-import struct
 import builtins
 from typing import Iterator
 
@@ -157,7 +156,7 @@ def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[tuple[int, int]]:
         if pblen < (e_lfanew + 4):
             continue
 
-        newoff = struct.unpack("<I", xor_static(pbytes[e_lfanew : e_lfanew + 4], key))[0]
+        newoff = int.from_bytes(xor_static(pbytes[e_lfanew : e_lfanew + 4], key), "little")
 
         nextres = pbytes.find(mzx, off + 1)
         if nextres != -1:
diff --git a/capa/features/extractors/ida/basicblock.py b/capa/features/extractors/ida/basicblock.py
index 97da4ddae..f45024140 100644
--- a/capa/features/extractors/ida/basicblock.py
+++ b/capa/features/extractors/ida/basicblock.py
@@ -14,7 +14,6 @@
 
 
 import string
-import struct
 from typing import Iterator
 
 import idaapi
@@ -33,13 +32,13 @@ def get_printable_len(op: idaapi.op_t) -> int:
     op_val = capa.features.extractors.ida.helpers.mask_op_val(op)
 
     if op.dtype == idaapi.dt_byte:
-        chars = struct.pack("<B", op_val)
+        chars = (op_val).to_bytes(1, "little")
     elif op.dtype == idaapi.dt_word:
-        chars = struct.pack("<H", op_val)
+        chars = (op_val).to_bytes(2, "little")
     elif op.dtype == idaapi.dt_dword:
-        chars = struct.pack("<I", op_val)
+        chars = (op_val).to_bytes(4, "little")
     elif op.dtype == idaapi.dt_qword:
-        chars = struct.pack("<Q", op_val)
+        chars = (op_val).to_bytes(8, "little")
     else:
         raise ValueError(f"Unhandled operand data type 0x{op.dtype:x}.")
 
diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py
index a47f1524c..ad70b3e29 100644
--- a/capa/features/extractors/ida/file.py
+++ b/capa/features/extractors/ida/file.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 
-import struct
 from typing import Iterator
 
 import idc
@@ -64,7 +63,7 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[tuple[int, int]]:
         if seg_max < (e_lfanew + 4):
             continue
 
-        newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i))[0]
+        newoff = int.from_bytes(capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i), "little")
 
         # assume XOR'd "PE" bytes exist within threshold
         if newoff > MAX_OFFSET_PE_AFTER_MZ:
diff --git a/capa/features/extractors/viv/basicblock.py b/capa/features/extractors/viv/basicblock.py
index 0f95bdef1..65c1f7d0d 100644
--- a/capa/features/extractors/viv/basicblock.py
+++ b/capa/features/extractors/viv/basicblock.py
@@ -14,7 +14,6 @@
 
 
 import string
-import struct
 from typing import Iterator
 
 import envi
@@ -119,13 +118,13 @@ def get_printable_len(oper: envi.archs.i386.disasm.i386ImmOper) -> int:
     Return string length if all operand bytes are ascii or utf16-le printable
     """
     if oper.tsize == 1:
-        chars = struct.pack("<B", oper.imm)
+        chars = (oper.imm).to_bytes(1, "little")
     elif oper.tsize == 2:
-        chars = struct.pack("<H", oper.imm)
+        chars = (oper.imm).to_bytes(2, "little")
     elif oper.tsize == 4:
-        chars = struct.pack("<I", oper.imm)
+        chars = (oper.imm).to_bytes(4, "little")
     elif oper.tsize == 8:
-        chars = struct.pack("<Q", oper.imm)
+        chars = (oper.imm).to_bytes(8, "little")
     else:
         raise ValueError(f"unexpected oper.tsize: {oper.tsize}")
 

From ea42b5522ea19c2e0abcd433a4a1c25e6e8f6979 Mon Sep 17 00:00:00 2001
From: vibhatsu <maulikbarot2915@gmail.com>
Date: Fri, 31 Jan 2025 00:03:18 +0530
Subject: [PATCH 3/5] simplify byte extraction for ELF header

Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>
---
 capa/features/extractors/elf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py
index 15e655b23..e1cb4bf30 100644
--- a/capa/features/extractors/elf.py
+++ b/capa/features/extractors/elf.py
@@ -149,7 +149,7 @@ def _parse(self):
         if not self.file_header.startswith(b"\x7fELF"):
             raise CorruptElfFile("missing magic header")
 
-        ei_class, ei_data = int.from_bytes(self.file_header[4:5]), int.from_bytes(self.file_header[5:6])
+        ei_class, ei_data = self.file_header[4], self.file_header[5]
         logger.debug("ei_class: 0x%02x ei_data: 0x%02x", ei_class, ei_data)
         if ei_class == 1:
             self.bitness = 32

From 84580ad51a5b2bd0f660d1a0234062e67370826a Mon Sep 17 00:00:00 2001
From: vibhatsu <maulikbarot2915@gmail.com>
Date: Fri, 31 Jan 2025 13:03:54 +0530
Subject: [PATCH 4/5] Revert "refactor: replace struct unpacking with bytes
 conversion"

This reverts commit 483f8c9a85879d0d2eb0ac34fd5a5c634e4836f8.
---
 capa/features/extractors/common.py            |   5 +-
 capa/features/extractors/elf.py               | 247 ++++++------------
 capa/features/extractors/ghidra/basicblock.py |   9 +-
 capa/features/extractors/ghidra/file.py       |   3 +-
 capa/features/extractors/helpers.py           |   3 +-
 capa/features/extractors/ida/basicblock.py    |   9 +-
 capa/features/extractors/ida/file.py          |   3 +-
 capa/features/extractors/viv/basicblock.py    |   9 +-
 8 files changed, 98 insertions(+), 190 deletions(-)

diff --git a/capa/features/extractors/common.py b/capa/features/extractors/common.py
index 9f25243d9..f8918b8d8 100644
--- a/capa/features/extractors/common.py
+++ b/capa/features/extractors/common.py
@@ -15,6 +15,7 @@
 import io
 import re
 import logging
+import binascii
 import contextlib
 from typing import Iterator
 
@@ -113,7 +114,7 @@ def extract_arch(buf) -> Iterator[tuple[Feature, Address]]:
         # rules that rely on arch conditions will fail to match on shellcode.
         #
         # for (2), this logic will need to be updated as the format is implemented.
-        logger.debug("unsupported file format: %s, will not guess Arch", bytes.hex(buf[:4]))
+        logger.debug("unsupported file format: %s, will not guess Arch", binascii.hexlify(buf[:4]).decode("ascii"))
         return
 
 
@@ -144,5 +145,5 @@ def extract_os(buf, os=OS_AUTO) -> Iterator[tuple[Feature, Address]]:
         # rules that rely on OS conditions will fail to match on shellcode.
         #
         # for (2), this logic will need to be updated as the format is implemented.
-        logger.debug("unsupported file format: %s, will not guess OS", bytes.hex(buf[:4]))
+        logger.debug("unsupported file format: %s, will not guess OS", binascii.hexlify(buf[:4]).decode("ascii"))
         return
diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py
index e1cb4bf30..a3d52082e 100644
--- a/capa/features/extractors/elf.py
+++ b/capa/features/extractors/elf.py
@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import struct
 import logging
 import itertools
 import collections
 from enum import Enum
-from typing import TYPE_CHECKING, Literal, BinaryIO, Iterator, Optional
+from typing import TYPE_CHECKING, BinaryIO, Iterator, Optional
 from dataclasses import dataclass
 
 if TYPE_CHECKING:
@@ -131,7 +132,7 @@ def __init__(self, f: BinaryIO):
 
         # these will all be initialized in `_parse()`
         self.bitness: int
-        self.endian: Literal["little", "big"]
+        self.endian: str
         self.e_phentsize: int
         self.e_phnum: int
         self.e_shentsize: int
@@ -149,7 +150,7 @@ def _parse(self):
         if not self.file_header.startswith(b"\x7fELF"):
             raise CorruptElfFile("missing magic header")
 
-        ei_class, ei_data = self.file_header[4], self.file_header[5]
+        ei_class, ei_data = struct.unpack_from("BB", self.file_header, 4)
         logger.debug("ei_class: 0x%02x ei_data: 0x%02x", ei_class, ei_data)
         if ei_class == 1:
             self.bitness = 32
@@ -159,28 +160,24 @@ def _parse(self):
             raise CorruptElfFile(f"invalid ei_class: 0x{ei_class:02x}")
 
         if ei_data == 1:
-            self.endian = "little"
+            self.endian = "<"
         elif ei_data == 2:
-            self.endian = "big"
+            self.endian = ">"
         else:
             raise CorruptElfFile(f"not an ELF file: invalid ei_data: 0x{ei_data:02x}")
 
         if self.bitness == 32:
-            e_phoff = int.from_bytes(self.file_header[0x1C:0x20], byteorder=self.endian, signed=False)
-            e_shoff = int.from_bytes(self.file_header[0x20:0x24], byteorder=self.endian, signed=False)
-            self.e_phentsize = int.from_bytes(self.file_header[0x2A:0x2C], byteorder=self.endian, signed=False)
-            self.e_phnum = int.from_bytes(self.file_header[0x2C:0x2E], byteorder=self.endian, signed=False)
-            self.e_shentsize = int.from_bytes(self.file_header[0x2E:0x30], byteorder=self.endian, signed=False)
-            self.e_shnum = int.from_bytes(self.file_header[0x30:0x32], byteorder=self.endian, signed=False)
-            self.e_shstrndx = int.from_bytes(self.file_header[0x32:0x34], byteorder=self.endian, signed=False)
+            e_phoff, e_shoff = struct.unpack_from(self.endian + "II", self.file_header, 0x1C)
+            self.e_phentsize, self.e_phnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x2A)
+            self.e_shentsize, self.e_shnum, self.e_shstrndx = struct.unpack_from(
+                self.endian + "HHH", self.file_header, 0x2E
+            )
         elif self.bitness == 64:
-            e_phoff = int.from_bytes(self.file_header[0x20:0x28], byteorder=self.endian, signed=False)
-            e_shoff = int.from_bytes(self.file_header[0x28:0x30], byteorder=self.endian, signed=False)
-            self.e_phentsize = int.from_bytes(self.file_header[0x36:0x38], byteorder=self.endian, signed=False)
-            self.e_phnum = int.from_bytes(self.file_header[0x38:0x3A], byteorder=self.endian, signed=False)
-            self.e_shentsize = int.from_bytes(self.file_header[0x3A:0x3C], byteorder=self.endian, signed=False)
-            self.e_shnum = int.from_bytes(self.file_header[0x3C:0x3E], byteorder=self.endian, signed=False)
-            self.e_shstrndx = int.from_bytes(self.file_header[0x3E:0x40], byteorder=self.endian, signed=False)
+            e_phoff, e_shoff = struct.unpack_from(self.endian + "QQ", self.file_header, 0x20)
+            self.e_phentsize, self.e_phnum = struct.unpack_from(self.endian + "HH", self.file_header, 0x36)
+            self.e_shentsize, self.e_shnum, self.e_shstrndx = struct.unpack_from(
+                self.endian + "HHH", self.file_header, 0x3A
+            )
         else:
             raise NotImplementedError()
 
@@ -230,7 +227,7 @@ def _parse(self):
 
     @property
     def ei_osabi(self) -> Optional[OS]:
-        ei_osabi = int.from_bytes(self.file_header[7:8], byteorder=self.endian, signed=False)
+        (ei_osabi,) = struct.unpack_from(self.endian + "B", self.file_header, 7)
         return ELF.OSABI.get(ei_osabi)
 
     MACHINE = {
@@ -327,7 +324,7 @@ def ei_osabi(self) -> Optional[OS]:
 
     @property
     def e_machine(self) -> Optional[str]:
-        (e_machine,) = (int.from_bytes(self.file_header[0x12:0x14], byteorder=self.endian, signed=False),)
+        (e_machine,) = struct.unpack_from(self.endian + "H", self.file_header, 0x12)
         return ELF.MACHINE.get(e_machine)
 
     def parse_program_header(self, i) -> Phdr:
@@ -335,21 +332,13 @@ def parse_program_header(self, i) -> Phdr:
         phent = self.phbuf[phent_offset : phent_offset + self.e_phentsize]
 
         if self.bitness == 32:
-            p_type = int.from_bytes(phent[0:4], byteorder=self.endian, signed=False)
-            p_offset = int.from_bytes(phent[4:8], byteorder=self.endian, signed=False)
-            p_vaddr = int.from_bytes(phent[8:12], byteorder=self.endian, signed=False)
-            p_paddr = int.from_bytes(phent[12:16], byteorder=self.endian, signed=False)
-            p_filesz = int.from_bytes(phent[16:20], byteorder=self.endian, signed=False)
-            p_memsz = int.from_bytes(phent[20:24], byteorder=self.endian, signed=False)
-            p_flags = int.from_bytes(phent[24:28], byteorder=self.endian, signed=False)
+            p_type, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz, p_flags = struct.unpack_from(
+                self.endian + "IIIIIII", phent, 0x0
+            )
         elif self.bitness == 64:
-            p_type = int.from_bytes(phent[0:4], byteorder=self.endian, signed=False)
-            p_flags = int.from_bytes(phent[4:8], byteorder=self.endian, signed=False)
-            p_offset = int.from_bytes(phent[8:16], byteorder=self.endian, signed=False)
-            p_vaddr = int.from_bytes(phent[16:24], byteorder=self.endian, signed=False)
-            p_paddr = int.from_bytes(phent[24:32], byteorder=self.endian, signed=False)
-            p_filesz = int.from_bytes(phent[32:40], byteorder=self.endian, signed=False)
-            p_memsz = int.from_bytes(phent[40:48], byteorder=self.endian, signed=False)
+            p_type, p_flags, p_offset, p_vaddr, p_paddr, p_filesz, p_memsz = struct.unpack_from(
+                self.endian + "IIQQQQQ", phent, 0x0
+            )
         else:
             raise NotImplementedError()
 
@@ -373,23 +362,13 @@ def parse_section_header(self, i) -> Shdr:
         shent = self.shbuf[shent_offset : shent_offset + self.e_shentsize]
 
         if self.bitness == 32:
-            sh_name = int.from_bytes(shent[0:4], byteorder=self.endian, signed=False)
-            sh_type = int.from_bytes(shent[4:8], byteorder=self.endian, signed=False)
-            sh_flags = int.from_bytes(shent[8:12], byteorder=self.endian, signed=False)
-            sh_addr = int.from_bytes(shent[12:16], byteorder=self.endian, signed=False)
-            sh_offset = int.from_bytes(shent[16:20], byteorder=self.endian, signed=False)
-            sh_size = int.from_bytes(shent[20:24], byteorder=self.endian, signed=False)
-            sh_link = int.from_bytes(shent[24:28], byteorder=self.endian, signed=False)
-            sh_entsize = int.from_bytes(shent[36:40], byteorder=self.endian, signed=False)
+            sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link, _, _, sh_entsize = struct.unpack_from(
+                self.endian + "IIIIIIIIII", shent, 0x0
+            )
         elif self.bitness == 64:
-            sh_name = int.from_bytes(shent[0:4], byteorder=self.endian, signed=False)
-            sh_type = int.from_bytes(shent[4:8], byteorder=self.endian, signed=False)
-            sh_flags = int.from_bytes(shent[8:16], byteorder=self.endian, signed=False)
-            sh_addr = int.from_bytes(shent[16:24], byteorder=self.endian, signed=False)
-            sh_offset = int.from_bytes(shent[24:32], byteorder=self.endian, signed=False)
-            sh_size = int.from_bytes(shent[32:40], byteorder=self.endian, signed=False)
-            sh_link = int.from_bytes(shent[40:44], byteorder=self.endian, signed=False)
-            sh_entsize = int.from_bytes(shent[56:64], byteorder=self.endian, signed=False)
+            sh_name, sh_type, sh_flags, sh_addr, sh_offset, sh_size, sh_link, _, _, sh_entsize = struct.unpack_from(
+                self.endian + "IIQQQQIIQQ", shent, 0x0
+            )
         else:
             raise NotImplementedError()
 
@@ -447,11 +426,9 @@ def versions_needed(self) -> dict[str, set[str]]:
             vn_offset = 0x0
             while True:
                 # ElfXX_Verneed layout is the same on 32 and 64 bit
-                vn_version = int.from_bytes(shdr.buf[vn_offset : vn_offset + 2], byteorder=self.endian, signed=False)
-                vn_cnt = int.from_bytes(shdr.buf[vn_offset + 2 : vn_offset + 4], byteorder=self.endian, signed=False)
-                vn_file = int.from_bytes(shdr.buf[vn_offset + 4 : vn_offset + 8], byteorder=self.endian, signed=False)
-                vn_aux = int.from_bytes(shdr.buf[vn_offset + 8 : vn_offset + 12], byteorder=self.endian, signed=False)
-                vn_next = int.from_bytes(shdr.buf[vn_offset + 12 : vn_offset + 16], byteorder=self.endian, signed=False)
+                vn_version, vn_cnt, vn_file, vn_aux, vn_next = struct.unpack_from(
+                    self.endian + "HHIII", shdr.buf, vn_offset
+                )
                 if vn_version != 1:
                     # unexpected format, don't try to keep parsing
                     break
@@ -465,12 +442,7 @@ def versions_needed(self) -> dict[str, set[str]]:
                 vna_offset = vn_offset + vn_aux
                 for _ in range(vn_cnt):
                     # ElfXX_Vernaux layout is the same on 32 and 64 bit
-                    vna_name = int.from_bytes(
-                        shdr.buf[vna_offset + 8 : vna_offset + 12], byteorder=self.endian, signed=False
-                    )
-                    vna_next = int.from_bytes(
-                        shdr.buf[vna_offset + 12 : vna_offset + 16], byteorder=self.endian, signed=False
-                    )
+                    _, _, _, vna_name, vna_next = struct.unpack_from(self.endian + "IHHII", shdr.buf, vna_offset)
 
                     # ABI names, like: "GLIBC_2.2.5"
                     abi = read_cstr(linked_shdr.buf, vna_name)
@@ -501,12 +473,10 @@ def dynamic_entries(self) -> Iterator[tuple[int, int]]:
             offset = 0x0
             while True:
                 if self.bitness == 32:
-                    d_tag = int.from_bytes(phdr.buf[offset : offset + 4], byteorder=self.endian, signed=False)
-                    d_val = int.from_bytes(phdr.buf[offset + 4 : offset + 8], byteorder=self.endian, signed=False)
+                    d_tag, d_val = struct.unpack_from(self.endian + "II", phdr.buf, offset)
                     offset += 8
                 elif self.bitness == 64:
-                    d_tag = int.from_bytes(phdr.buf[offset : offset + 8], byteorder=self.endian, signed=False)
-                    d_val = int.from_bytes(phdr.buf[offset + 8 : offset + 16], byteorder=self.endian, signed=False)
+                    d_tag, d_val = struct.unpack_from(self.endian + "QQ", phdr.buf, offset)
                     offset += 16
                 else:
                     raise NotImplementedError()
@@ -610,7 +580,7 @@ class ABITag:
 
 
 class PHNote:
-    def __init__(self, endian: Literal["big", "little"], buf: bytes):
+    def __init__(self, endian: str, buf: bytes):
         self.endian = endian
         self.buf = buf
 
@@ -622,9 +592,7 @@ def __init__(self, endian: Literal["big", "little"], buf: bytes):
         self._parse()
 
     def _parse(self):
-        namesz = int.from_bytes(self.buf[0x0:0x4], byteorder=self.endian, signed=False)
-        self.descsz = int.from_bytes(self.buf[0x4:0x8], byteorder=self.endian, signed=False)
-        self.type_ = int.from_bytes(self.buf[0x8:0xC], byteorder=self.endian, signed=False)
+        namesz, self.descsz, self.type_ = struct.unpack_from(self.endian + "III", self.buf, 0x0)
         name_offset = 0xC
         self.desc_offset = name_offset + align(namesz, 0x4)
 
@@ -648,10 +616,7 @@ def abi_tag(self) -> Optional[ABITag]:
             return None
 
         desc = self.buf[self.desc_offset : self.desc_offset + self.descsz]
-        abi_tag = int.from_bytes(desc[0:4], byteorder=self.endian, signed=False)
-        kmajor = int.from_bytes(desc[4:8], byteorder=self.endian, signed=False)
-        kminor = int.from_bytes(desc[8:12], byteorder=self.endian, signed=False)
-        kpatch = int.from_bytes(desc[12:16], byteorder=self.endian, signed=False)
+        abi_tag, kmajor, kminor, kpatch = struct.unpack_from(self.endian + "IIII", desc, 0x0)
         logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag)
 
         os = GNU_ABI_TAG.get(abi_tag)
@@ -664,7 +629,7 @@ def abi_tag(self) -> Optional[ABITag]:
 
 
 class SHNote:
-    def __init__(self, endian: Literal["big", "little"], buf: bytes):
+    def __init__(self, endian: str, buf: bytes):
         self.endian = endian
         self.buf = buf
 
@@ -676,9 +641,7 @@ def __init__(self, endian: Literal["big", "little"], buf: bytes):
         self._parse()
 
     def _parse(self):
-        namesz = int.from_bytes(self.buf[0x0:0x4], byteorder=self.endian, signed=False)
-        self.descsz = int.from_bytes(self.buf[0x4:0x8], byteorder=self.endian, signed=False)
-        self.type_ = int.from_bytes(self.buf[0x8:0xC], byteorder=self.endian, signed=False)
+        namesz, self.descsz, self.type_ = struct.unpack_from(self.endian + "III", self.buf, 0x0)
         name_offset = 0xC
         self.desc_offset = name_offset + align(namesz, 0x4)
 
@@ -697,10 +660,7 @@ def abi_tag(self) -> Optional[ABITag]:
             return None
 
         desc = self.buf[self.desc_offset : self.desc_offset + self.descsz]
-        abi_tag = int.from_bytes(desc[0:4], byteorder=self.endian, signed=False)
-        kmajor = int.from_bytes(desc[4:8], byteorder=self.endian, signed=False)
-        kminor = int.from_bytes(desc[8:12], byteorder=self.endian, signed=False)
-        kpatch = int.from_bytes(desc[12:16], byteorder=self.endian, signed=False)
+        abi_tag, kmajor, kminor, kpatch = struct.unpack_from(self.endian + "IIII", desc, 0x0)
         logger.debug("GNU_ABI_TAG: 0x%02x", abi_tag)
 
         os = GNU_ABI_TAG.get(abi_tag)
@@ -724,7 +684,7 @@ class Symbol:
 class SymTab:
     def __init__(
         self,
-        endian: Literal["big", "little"],
+        endian: str,
         bitness: int,
         symtab: Shdr,
         strtab: Shdr,
@@ -736,7 +696,7 @@ def __init__(
 
         self._parse(endian, bitness, symtab.buf)
 
-    def _parse(self, endian: Literal["big", "little"], bitness: int, symtab_buf: bytes) -> None:
+    def _parse(self, endian: str, bitness: int, symtab_buf: bytes) -> None:
         """
         return the symbol's information in
         the order specified by sys/elf32.h
@@ -746,62 +706,12 @@ def _parse(self, endian: Literal["big", "little"], bitness: int, symtab_buf: byt
 
         for i in range(int(len(self.symtab.buf) / self.symtab.entsize)):
             if bitness == 32:
-                name_offset = int.from_bytes(
-                    symtab_buf[i * self.symtab.entsize : i * self.symtab.entsize + 4], byteorder=endian, signed=False
-                )
-                value = int.from_bytes(
-                    symtab_buf[i * self.symtab.entsize + 4 : i * self.symtab.entsize + 8],
-                    byteorder=endian,
-                    signed=False,
-                )
-                size = int.from_bytes(
-                    symtab_buf[i * self.symtab.entsize + 8 : i * self.symtab.entsize + 12],
-                    byteorder=endian,
-                    signed=False,
-                )
-                info = int.from_bytes(
-                    symtab_buf[i * self.symtab.entsize + 12 : i * self.symtab.entsize + 13],
-                    byteorder=endian,
-                    signed=False,
-                )
-                other = int.from_bytes(
-                    symtab_buf[i * self.symtab.entsize + 13 : i * self.symtab.entsize + 14],
-                    byteorder=endian,
-                    signed=False,
-                )
-                shndx = int.from_bytes(
-                    symtab_buf[i * self.symtab.entsize + 14 : i * self.symtab.entsize + 16],
-                    byteorder=endian,
-                    signed=False,
+                name_offset, value, size, info, other, shndx = struct.unpack_from(
+                    endian + "IIIBBH", symtab_buf, i * self.symtab.entsize
                 )
             elif bitness == 64:
-                name_offset = int.from_bytes(
-                    symtab_buf[i * self.symtab.entsize : i * self.symtab.entsize + 4], byteorder=endian, signed=False
-                )
-                info = int.from_bytes(
-                    symtab_buf[i * self.symtab.entsize + 4 : i * self.symtab.entsize + 5],
-                    byteorder=endian,
-                    signed=False,
-                )
-                other = int.from_bytes(
-                    symtab_buf[i * self.symtab.entsize + 5 : i * self.symtab.entsize + 6],
-                    byteorder=endian,
-                    signed=False,
-                )
-                shndx = int.from_bytes(
-                    symtab_buf[i * self.symtab.entsize + 6 : i * self.symtab.entsize + 8],
-                    byteorder=endian,
-                    signed=False,
-                )
-                value = int.from_bytes(
-                    symtab_buf[i * self.symtab.entsize + 8 : i * self.symtab.entsize + 16],
-                    byteorder=endian,
-                    signed=False,
-                )
-                size = int.from_bytes(
-                    symtab_buf[i * self.symtab.entsize + 16 : i * self.symtab.entsize + 24],
-                    byteorder=endian,
-                    signed=False,
+                name_offset, info, other, shndx, value, size = struct.unpack_from(
+                    endian + "IBBHQQ", symtab_buf, i * self.symtab.entsize
                 )
 
             self.symbols.append(Symbol(name_offset, value, size, info, other, shndx))
@@ -829,7 +739,7 @@ def get_symbols(self) -> Iterator[Symbol]:
 
     @classmethod
     def from_viv(cls, elf: "Elf.Elf") -> Optional["SymTab"]:
-        endian: Literal["big", "little"] = "little" if elf.getEndian() == 0 else "big"
+        endian = "<" if elf.getEndian() == 0 else ">"
         bitness = elf.bits
 
         SHT_SYMTAB = 0x2
@@ -1124,13 +1034,12 @@ def read_data(elf: ELF, rva: int, size: int) -> Optional[bytes]:
 
 
 def read_go_slice(elf: ELF, rva: int) -> Optional[bytes]:
-    psize: int = 0
     if elf.bitness == 32:
         struct_size = 8
-        psize = 4
+        struct_format = elf.endian + "II"
     elif elf.bitness == 64:
         struct_size = 16
-        psize = 8
+        struct_format = elf.endian + "QQ"
     else:
         raise ValueError("invalid psize")
 
@@ -1138,8 +1047,7 @@ def read_go_slice(elf: ELF, rva: int) -> Optional[bytes]:
     if not struct_buf:
         return None
 
-    addr = int.from_bytes(struct_buf[0:psize], byteorder=elf.endian, signed=False)
-    length = int.from_bytes(struct_buf[psize : psize * 2], byteorder=elf.endian, signed=False)
+    addr, length = struct.unpack_from(struct_format, struct_buf, 0)
 
     return read_data(elf, addr, length)
 
@@ -1188,12 +1096,7 @@ def guess_os_from_go_buildinfo(elf: ELF) -> Optional[OS]:
         logger.debug("go buildinfo: no buildinfo magic")
         return None
 
-    psize = int.from_bytes(
-        buf[index + len(BUILDINFO_MAGIC) : index + len(BUILDINFO_MAGIC) + 1], byteorder="little", signed=True
-    )
-    flags = int.from_bytes(
-        buf[index + len(BUILDINFO_MAGIC) + 1 : index + len(BUILDINFO_MAGIC) + 2], byteorder="little", signed=True
-    )
+    psize, flags = struct.unpack_from("<bb", buf, index + len(BUILDINFO_MAGIC))
     assert psize in (4, 8)
     is_big_endian = flags & 0b01
     has_inline_strings = flags & 0b10
@@ -1240,29 +1143,27 @@ def guess_os_from_go_buildinfo(elf: ELF) -> Optional[OS]:
         # This is the uncommon path. Most samples will have an inline GOOS string.
         #
         # To find samples on VT, use the referenced VTGrep content searches.
-        # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 00}
-        # like: 71e617e5cc7fda89bf67422ff60f437e9d54622382c5ed6ff31f75e601f9b22e
-        # in which the modinfo doesn't have GOOS.
-        # 4 byte size and little endian
-        # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 00}
-        # like: 93d3b3e2a904c6c909e20f2f76c3c2e8d0c81d535eb46e5493b5701f461816c3
-        # in which the modinfo doesn't have GOOS.
-        # 8 byte size and little endian
-        # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 01}
-        # (no matches on VT today)
-        # 4 byte size and little endian
-        # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 01}
-        # like: d44ba497964050c0e3dd2a192c511e4c3c4f17717f0322a554d64b797ee4690a
-        # in which the modinfo doesn't have GOOS.
-        # 8 byte size and big endian
-
-        endian: Literal["big", "little"] = "big" if is_big_endian else "little"
-        if psize == 4:
-            build_version_address = int.from_bytes(buf[index + 0x10 : index + 0x14], byteorder=endian, signed=False)
-            modinfo_address = int.from_bytes(buf[index + 0x14 : index + 0x18], byteorder=endian, signed=False)
-        else:  # psize == 8
-            build_version_address = int.from_bytes(buf[index + 0x10 : index + 0x18], byteorder=endian, signed=False)
-            modinfo_address = int.from_bytes(buf[index + 0x18 : index + 0x20], byteorder=endian, signed=False)
+        info_format = {
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 00}
+            # like: 71e617e5cc7fda89bf67422ff60f437e9d54622382c5ed6ff31f75e601f9b22e
+            # in which the modinfo doesn't have GOOS.
+            (4, False): "<II",
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 00}
+            # like: 93d3b3e2a904c6c909e20f2f76c3c2e8d0c81d535eb46e5493b5701f461816c3
+            # in which the modinfo doesn't have GOOS.
+            (8, False): "<QQ",
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 04 01}
+            # (no matches on VT today)
+            (4, True): ">II",
+            # content: {ff 20 47 6f 20 62 75 69 6c 64 69 6e 66 3a 08 01}
+            # like: d44ba497964050c0e3dd2a192c511e4c3c4f17717f0322a554d64b797ee4690a
+            # in which the modinfo doesn't have GOOS.
+            (8, True): ">QQ",
+        }
+
+        build_version_address, modinfo_address = struct.unpack_from(
+            info_format[(psize, is_big_endian)], buf, index + 0x10
+        )
         logger.debug("go buildinfo: build version address: 0x%x", build_version_address)
         logger.debug("go buildinfo: modinfo address: 0x%x", modinfo_address)
 
diff --git a/capa/features/extractors/ghidra/basicblock.py b/capa/features/extractors/ghidra/basicblock.py
index 904c20e2e..25b73ee43 100644
--- a/capa/features/extractors/ghidra/basicblock.py
+++ b/capa/features/extractors/ghidra/basicblock.py
@@ -14,6 +14,7 @@
 
 
 import string
+import struct
 from typing import Iterator
 
 import ghidra
@@ -34,13 +35,13 @@ def get_printable_len(op: ghidra.program.model.scalar.Scalar) -> int:
     op_val = op.getValue()
 
     if op_bit_len == 8:
-        chars = (op_val & 0xFF).to_bytes(1, "little")
+        chars = struct.pack("<B", op_val & 0xFF)
     elif op_bit_len == 16:
-        chars = (op_val & 0xFFFF).to_bytes(2, "little")
+        chars = struct.pack("<H", op_val & 0xFFFF)
     elif op_bit_len == 32:
-        chars = (op_val & 0xFFFFFFFF).to_bytes(4, "little")
+        chars = struct.pack("<I", op_val & 0xFFFFFFFF)
     elif op_bit_len == 64:
-        chars = (op_val & 0xFFFFFFFFFFFFFFFF).to_bytes(8, "little")
+        chars = struct.pack("<Q", op_val & 0xFFFFFFFFFFFFFFFF)
     else:
         raise ValueError(f"Unhandled operand data type 0x{op_bit_len:x}.")
 
diff --git a/capa/features/extractors/ghidra/file.py b/capa/features/extractors/ghidra/file.py
index d509a0f47..a1c088c32 100644
--- a/capa/features/extractors/ghidra/file.py
+++ b/capa/features/extractors/ghidra/file.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import re
+import struct
 from typing import Iterator
 
 from ghidra.program.model.symbol import SourceType, SymbolType
@@ -51,7 +52,7 @@ def find_embedded_pe(block_bytez: bytes, mz_xor: list[tuple[bytes, bytes, int]])
             continue
 
         e_lfanew_bytes = block_bytez[e_lfanew : e_lfanew + 4]
-        newoff = int.from_bytes(capa.features.extractors.helpers.xor_static(e_lfanew_bytes, i), "little")
+        newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(e_lfanew_bytes, i))[0]
 
         # assume XOR'd "PE" bytes exist within threshold
         if newoff > MAX_OFFSET_PE_AFTER_MZ:
diff --git a/capa/features/extractors/helpers.py b/capa/features/extractors/helpers.py
index f764a64ad..eb546f504 100644
--- a/capa/features/extractors/helpers.py
+++ b/capa/features/extractors/helpers.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 
+import struct
 import builtins
 from typing import Iterator
 
@@ -156,7 +157,7 @@ def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[tuple[int, int]]:
         if pblen < (e_lfanew + 4):
             continue
 
-        newoff = int.from_bytes(xor_static(pbytes[e_lfanew : e_lfanew + 4], key), "little")
+        newoff = struct.unpack("<I", xor_static(pbytes[e_lfanew : e_lfanew + 4], key))[0]
 
         nextres = pbytes.find(mzx, off + 1)
         if nextres != -1:
diff --git a/capa/features/extractors/ida/basicblock.py b/capa/features/extractors/ida/basicblock.py
index f45024140..97da4ddae 100644
--- a/capa/features/extractors/ida/basicblock.py
+++ b/capa/features/extractors/ida/basicblock.py
@@ -14,6 +14,7 @@
 
 
 import string
+import struct
 from typing import Iterator
 
 import idaapi
@@ -32,13 +33,13 @@ def get_printable_len(op: idaapi.op_t) -> int:
     op_val = capa.features.extractors.ida.helpers.mask_op_val(op)
 
     if op.dtype == idaapi.dt_byte:
-        chars = (op_val).to_bytes(1, "little")
+        chars = struct.pack("<B", op_val)
     elif op.dtype == idaapi.dt_word:
-        chars = (op_val).to_bytes(2, "little")
+        chars = struct.pack("<H", op_val)
     elif op.dtype == idaapi.dt_dword:
-        chars = (op_val).to_bytes(4, "little")
+        chars = struct.pack("<I", op_val)
     elif op.dtype == idaapi.dt_qword:
-        chars = (op_val).to_bytes(8, "little")
+        chars = struct.pack("<Q", op_val)
     else:
         raise ValueError(f"Unhandled operand data type 0x{op.dtype:x}.")
 
diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py
index ad70b3e29..a47f1524c 100644
--- a/capa/features/extractors/ida/file.py
+++ b/capa/features/extractors/ida/file.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 
+import struct
 from typing import Iterator
 
 import idc
@@ -63,7 +64,7 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[tuple[int, int]]:
         if seg_max < (e_lfanew + 4):
             continue
 
-        newoff = int.from_bytes(capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i), "little")
+        newoff = struct.unpack("<I", capa.features.extractors.helpers.xor_static(idc.get_bytes(e_lfanew, 4), i))[0]
 
         # assume XOR'd "PE" bytes exist within threshold
         if newoff > MAX_OFFSET_PE_AFTER_MZ:
diff --git a/capa/features/extractors/viv/basicblock.py b/capa/features/extractors/viv/basicblock.py
index 65c1f7d0d..0f95bdef1 100644
--- a/capa/features/extractors/viv/basicblock.py
+++ b/capa/features/extractors/viv/basicblock.py
@@ -14,6 +14,7 @@
 
 
 import string
+import struct
 from typing import Iterator
 
 import envi
@@ -118,13 +119,13 @@ def get_printable_len(oper: envi.archs.i386.disasm.i386ImmOper) -> int:
     Return string length if all operand bytes are ascii or utf16-le printable
     """
     if oper.tsize == 1:
-        chars = (oper.imm).to_bytes(1, "little")
+        chars = struct.pack("<B", oper.imm)
     elif oper.tsize == 2:
-        chars = (oper.imm).to_bytes(2, "little")
+        chars = struct.pack("<H", oper.imm)
     elif oper.tsize == 4:
-        chars = (oper.imm).to_bytes(4, "little")
+        chars = struct.pack("<I", oper.imm)
     elif oper.tsize == 8:
-        chars = (oper.imm).to_bytes(8, "little")
+        chars = struct.pack("<Q", oper.imm)
     else:
         raise ValueError(f"unexpected oper.tsize: {oper.tsize}")
 

From b07a9c5780b691cf0897e9390dcf0170ef64bbdf Mon Sep 17 00:00:00 2001
From: vibhatsu <maulikbarot2915@gmail.com>
Date: Fri, 31 Jan 2025 14:44:30 +0530
Subject: [PATCH 5/5] update CHANGELOG

Signed-off-by: vibhatsu <maulikbarot2915@gmail.com>
---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9113f6dac..1f09f8f08 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,7 @@
 - strings: add type hints and fix uncovered bugs @williballenthin #2555
 - elffile: handle symbols without a name @williballenthin #2553
 - project: remove pytest-cov that wasn't used @williballenthin @2491
+- replace binascii methods with native Python methods @v1bh475u #2582
 
 ### capa Explorer Web