diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst new file mode 100644 index 00000000000000..daf9c8ea09f081 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst @@ -0,0 +1 @@ +Optimize the AArch64 code generation for the JIT. Patch by Diego Russo diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 4ddbe967438bd1..8faa9e8cac2d85 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -209,7 +209,24 @@ def pad(self, alignment: int) -> None: self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}") self.body.extend([0] * padding) - def remove_jump(self, *, alignment: int = 1) -> None: + def add_nops(self, nop: bytes, alignment: int) -> None: + """Add NOPs until there is alignment. Fail if it is not possible.""" + offset = len(self.body) + nop_size = len(nop) + + # Calculate the gap to the next multiple of alignment. + gap = -offset % alignment + if gap: + if gap % nop_size == 0: + count = gap // nop_size + self.body.extend(nop * count) + else: + raise ValueError( + f"Cannot add nops of size '{nop_size}' to a body with " + f"offset '{offset}' to align with '{alignment}'" + ) + + def remove_jump(self) -> None: """Remove a zero-length continuation jump, if it exists.""" hole = max(self.holes, key=lambda hole: hole.offset) match hole: @@ -244,7 +261,7 @@ def remove_jump(self, *, alignment: int = 1) -> None: jump = b"\x00\x00\x00\x14" case _: return - if self.body[offset:] == jump and offset % alignment == 0: + if self.body[offset:] == jump: self.body = self.body[:offset] self.holes.remove(hole) @@ -266,10 +283,7 @@ class StencilGroup: _trampolines: set[int] = dataclasses.field(default_factory=set, init=False) def process_relocations( - self, - known_symbols: dict[str, int], - *, - alignment: int = 1, + self, known_symbols: dict[str, int], *, alignment: int = 1, nop: bytes = b"" ) -> None: """Fix up all GOT and internal relocations for this stencil group.""" for hole in self.code.holes.copy(): @@ -289,8 +303,8 @@ def process_relocations( self._trampolines.add(ordinal) hole.addend = ordinal hole.symbol = None - self.code.remove_jump(alignment=alignment) - self.code.pad(alignment) + self.code.remove_jump() + self.code.add_nops(nop=nop, alignment=alignment) self.data.pad(8) for stencil in [self.code, self.data]: for hole in stencil.holes: diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index aa2b56abf446b1..b5a839e07d4daf 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -44,6 +44,15 @@ class _Target(typing.Generic[_S, _R]): verbose: bool = False known_symbols: dict[str, int] = dataclasses.field(default_factory=dict) + def _get_nop(self) -> bytes: + if re.fullmatch(r"aarch64-.*", self.triple): + nop = b"\x1f\x20\x03\xD5" + elif re.fullmatch(r"x86_64-.*|i686.*", self.triple): + nop = b"\x90" + else: + raise ValueError(f"NOP not defined for {self.triple}") + return nop + def _compute_digest(self, out: pathlib.Path) -> str: hasher = hashlib.sha256() hasher.update(self.triple.encode()) @@ -172,7 +181,9 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: stencil_groups = {task.get_name(): task.result() for task in tasks} for stencil_group in stencil_groups.values(): stencil_group.process_relocations( - known_symbols=self.known_symbols, alignment=self.alignment + known_symbols=self.known_symbols, + alignment=self.alignment, + nop=self._get_nop(), ) return stencil_groups