From e47f74378909c7c94b620e9a4f1cff404ffb113b Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Fri, 7 Mar 2025 13:16:40 +0000 Subject: [PATCH 1/3] gh-130956: emit AArch64 trampolines only for long jumps Emit the AArch64 trampoline only if the address is more than 27 bits range. Enable the PLT for Linux AArch64: without it no trampolines are emitted at all and symbols are referenced via the GOT. --- Python/jit.c | 12 +++++++++++- Tools/jit/_targets.py | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/Python/jit.c b/Python/jit.c index 092b873bc734e1..95b5a1b52b8b65 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -430,6 +430,17 @@ void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *s void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state) { + + uint64_t value = (uintptr_t)symbols_map[ordinal]; + int64_t range = value - (uintptr_t)location; + + // If we are in range of 28 signed bits, we patch the instruction with + // the address of the symbol. + if (range >= -(1 << 27) && range < (1 << 27)) { + patch_aarch64_26r(location, (uintptr_t)value); + return; + } + // Masking is done modulo 32 as the mask is stored as an array of uint32_t const uint32_t symbol_mask = 1 << (ordinal % 32); const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32]; @@ -445,7 +456,6 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state) uint32_t *p = (uint32_t*)(state->trampolines.mem + index * TRAMPOLINE_SIZE); assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size); - uint64_t value = (uintptr_t)symbols_map[ordinal]; /* Generate the trampoline 0: 58000048 ldr x8, 8 diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index aa2b56abf446b1..da0a28619bff55 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -507,6 +507,7 @@ def get_target(host: str) -> _COFF | _ELF | _MachO: # On aarch64 Linux, intrinsics were being emitted and this flag # was required to disable them. "-mno-outline-atomics", + "-fplt", ] target = _ELF(host, alignment=8, args=args) elif re.fullmatch(r"i686-pc-windows-msvc", host): From efdef5ff469c9f553a6fba174c9a5fa15b5741b0 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Mon, 10 Mar 2025 12:05:48 +0000 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2025-03-10-12-05-45.gh-issue-130956.f823Ih.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-05-45.gh-issue-130956.f823Ih.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-05-45.gh-issue-130956.f823Ih.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-05-45.gh-issue-130956.f823Ih.rst new file mode 100644 index 00000000000000..daf9c8ea09f081 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-05-45.gh-issue-130956.f823Ih.rst @@ -0,0 +1 @@ +Optimize the AArch64 code generation for the JIT. Patch by Diego Russo From c3108897413d11e46e1c79e5b05cb9742078a9e5 Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Wed, 19 Mar 2025 15:38:00 +0000 Subject: [PATCH 3/3] Remove -fplt flag on Linux --- Tools/jit/_targets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index da0a28619bff55..aa2b56abf446b1 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -507,7 +507,6 @@ def get_target(host: str) -> _COFF | _ELF | _MachO: # On aarch64 Linux, intrinsics were being emitted and this flag # was required to disable them. "-mno-outline-atomics", - "-fplt", ] target = _ELF(host, alignment=8, args=args) elif re.fullmatch(r"i686-pc-windows-msvc", host):