From 11902be7a57c0ccf03786aa0255fffaf0f54dbf9 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Tue, 14 Mar 2023 11:41:51 -0700 Subject: [PATCH] x86: Properly find the maximum stack slot alignment Don't assume that stack slots can only be accessed by stack or frame registers. We first find all registers defined by stack or frame registers. Then check memory accesses by such registers, including stack and frame registers. gcc/ PR target/109780 PR target/109093 * config/i386/i386.cc (ix86_update_stack_alignment): New. (ix86_find_all_reg_use_1): Likewise. (ix86_find_all_reg_use): Likewise. (ix86_find_max_used_stack_alignment): Also check memory accesses from registers defined by stack or frame registers. gcc/testsuite/ PR target/109780 PR target/109093 * g++.target/i386/pr109780-1.C: New test. * gcc.target/i386/pr109093-1.c: Likewise. * gcc.target/i386/pr109780-1.c: Likewise. * gcc.target/i386/pr109780-2.c: Likewise. * gcc.target/i386/pr109780-3.c: Likewise. Signed-off-by: H.J. Lu --- gcc/config/i386/i386.cc | 177 ++++++++++++++++++--- gcc/testsuite/g++.target/i386/pr109780-1.C | 72 +++++++++ gcc/testsuite/gcc.target/i386/pr109093-1.c | 33 ++++ gcc/testsuite/gcc.target/i386/pr109780-1.c | 14 ++ gcc/testsuite/gcc.target/i386/pr109780-2.c | 21 +++ gcc/testsuite/gcc.target/i386/pr109780-3.c | 46 ++++++ 6 files changed, 342 insertions(+), 21 deletions(-) create mode 100644 gcc/testsuite/g++.target/i386/pr109780-1.C create mode 100644 gcc/testsuite/gcc.target/i386/pr109093-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr109780-3.c diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 3128973ba79..fafd4a511a3 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -8466,6 +8466,107 @@ output_probe_stack_range (rtx reg, rtx end) return ""; } +/* Update the maximum stack slot alignment from memory alignment in + PAT. */ + +static void +ix86_update_stack_alignment (rtx, const_rtx pat, void *data) +{ + /* This insn may reference stack slot. Update the maximum stack slot + alignment. */ + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, pat, ALL) + if (MEM_P (*iter)) + { + unsigned int alignment = MEM_ALIGN (*iter); + unsigned int *stack_alignment + = (unsigned int *) data; + if (alignment > *stack_alignment) + *stack_alignment = alignment; + break; + } +} + +/* Helper function for ix86_find_all_reg_use. */ + +static void +ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access, + auto_bitmap &worklist) +{ + rtx dest = SET_DEST (set); + if (!REG_P (dest)) + return; + + rtx src = SET_SRC (set); + if (MEM_P (src) || CONST_SCALAR_INT_P (src)) + return; + + if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest))) + return; + + /* Add this register to stack_slot_access. */ + add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest)); + bitmap_set_bit (worklist, REGNO (dest)); +} + +/* Find all registers defined with REG. */ + +static void +ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access, + unsigned int reg, auto_bitmap &worklist) +{ + for (df_ref ref = DF_REG_USE_CHAIN (reg); + ref != NULL; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + continue; + + rtx_insn *insn = DF_REF_INSN (ref); + + if (!NONJUMP_INSN_P (insn)) + continue; + + rtx set = single_set (insn); + if (set) + ix86_find_all_reg_use_1 (set, stack_slot_access, worklist); + + rtx pat = PATTERN (insn); + if (GET_CODE (pat) != PARALLEL) + continue; + + for (int i = 0; i < XVECLEN (pat, 0); i++) + { + rtx exp = XVECEXP (pat, 0, i); + switch (GET_CODE (exp)) + { + case ASM_OPERANDS: + case CLOBBER: + case PREFETCH: + case USE: + break; + case UNSPEC: + case UNSPEC_VOLATILE: + for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--) + { + rtx x = XVECEXP (exp, 0, j); + if (GET_CODE (x) == SET) + ix86_find_all_reg_use_1 (x, stack_slot_access, + worklist); + } + break; + case SET: + ix86_find_all_reg_use_1 (exp, stack_slot_access, + worklist); + break; + default: + gcc_unreachable (); + break; + } + } + } +} + /* Set stack_frame_required to false if stack frame isn't required. Update STACK_ALIGNMENT to the largest alignment, in bits, of stack slot used if stack frame is required and CHECK_STACK_SLOT is true. */ @@ -8484,10 +8585,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, add_to_hard_reg_set (&set_up_by_prologue, Pmode, HARD_FRAME_POINTER_REGNUM); - /* The preferred stack alignment is the minimum stack alignment. */ - if (stack_alignment > crtl->preferred_stack_boundary) - stack_alignment = crtl->preferred_stack_boundary; - bool require_stack_frame = false; FOR_EACH_BB_FN (bb, cfun) @@ -8499,27 +8596,65 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, set_up_by_prologue)) { require_stack_frame = true; - - if (check_stack_slot) - { - /* Find the maximum stack alignment. */ - subrtx_iterator::array_type array; - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) - if (MEM_P (*iter) - && (reg_mentioned_p (stack_pointer_rtx, - *iter) - || reg_mentioned_p (frame_pointer_rtx, - *iter))) - { - unsigned int alignment = MEM_ALIGN (*iter); - if (alignment > stack_alignment) - stack_alignment = alignment; - } - } + break; } } cfun->machine->stack_frame_required = require_stack_frame; + + /* Stop if we don't need to check stack slot. */ + if (!check_stack_slot) + return; + + /* The preferred stack alignment is the minimum stack alignment. */ + if (stack_alignment > crtl->preferred_stack_boundary) + stack_alignment = crtl->preferred_stack_boundary; + + HARD_REG_SET stack_slot_access; + CLEAR_HARD_REG_SET (stack_slot_access); + + /* Stack slot can be accessed by stack pointer, frame pointer or + registers defined by stack pointer or frame pointer. */ + auto_bitmap worklist; + + add_to_hard_reg_set (&stack_slot_access, Pmode, + STACK_POINTER_REGNUM); + bitmap_set_bit (worklist, STACK_POINTER_REGNUM); + + if (frame_pointer_needed) + { + add_to_hard_reg_set (&stack_slot_access, Pmode, + HARD_FRAME_POINTER_REGNUM); + bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM); + } + + unsigned int reg; + + do + { + reg = bitmap_clear_first_set_bit (worklist); + ix86_find_all_reg_use (stack_slot_access, reg, worklist); + } + while (!bitmap_empty_p (worklist)); + + hard_reg_set_iterator hrsi; + + EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi) + for (df_ref ref = DF_REG_USE_CHAIN (reg); + ref != NULL; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + continue; + + rtx_insn *insn = DF_REF_INSN (ref); + + if (!NONJUMP_INSN_P (insn)) + continue; + + note_stores (insn, ix86_update_stack_alignment, + &stack_alignment); + } } /* Finalize stack_realign_needed and frame_pointer_needed flags, which diff --git a/gcc/testsuite/g++.target/i386/pr109780-1.C b/gcc/testsuite/g++.target/i386/pr109780-1.C new file mode 100644 index 00000000000..7e3eabdec94 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr109780-1.C @@ -0,0 +1,72 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target c++17 } */ +/* { dg-options "-O2 -mavx2 -mtune=haswell" } */ + +template struct remove_reference { + using type = __remove_reference(_Tp); +}; +template struct MaybeStorageBase { + T val; + struct Union { + ~Union(); + } mStorage; +}; +template struct MaybeStorage : MaybeStorageBase { + char mIsSome; +}; +template ::type> +constexpr MaybeStorage Some(T &&); +template constexpr MaybeStorage Some(T &&aValue) { + return {aValue}; +} +template struct Span { + int operator[](long idx) { + int *__trans_tmp_4; + if (__builtin_expect(idx, 0)) + *(int *)__null = false; + __trans_tmp_4 = storage_.data(); + return __trans_tmp_4[idx]; + } + struct { + int *data() { return data_; } + int *data_; + } storage_; +}; +struct Variant { + template Variant(RefT) {} +}; +long from_i, from___trans_tmp_9; +namespace js::intl { +struct DecimalNumber { + Variant string_; + unsigned long significandStart_; + unsigned long significandEnd_; + bool zero_ = false; + bool negative_; + template DecimalNumber(CharT string) : string_(string) {} + template + static MaybeStorage from(Span); + void from(); +}; +} // namespace js::intl +void js::intl::DecimalNumber::from() { + Span __trans_tmp_3; + from(__trans_tmp_3); +} +template +MaybeStorage +js::intl::DecimalNumber::from(Span chars) { + DecimalNumber number(chars); + if (auto ch = chars[from_i]) { + from_i++; + number.negative_ = ch == '-'; + } + while (from___trans_tmp_9 && chars[from_i]) + ; + if (chars[from_i]) + while (chars[from_i - 1]) + number.zero_ = true; + return Some(number); +} + +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr109093-1.c b/gcc/testsuite/gcc.target/i386/pr109093-1.c new file mode 100644 index 00000000000..58a7b006c8a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109093-1.c @@ -0,0 +1,33 @@ +/* { dg-do run { target avx2_runtime } } */ +/* { dg-options "-O2 -mavx2 -mtune=znver1 -ftrivial-auto-var-init=zero -fno-stack-protector" } */ + +int a, b, c, d; +char e, f = 1; +short g, h, i; + +__attribute__ ((weak)) +void +run (void) +{ + short j; + + for (; g >= 0; --g) + { + int *k[10]; + + for (d = 0; d < 10; d++) + k[d] = &b; + + c = *k[1]; + + for (; a;) + j = i - c / f || (e ^= h); + } +} + +int +main (void) +{ + run (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr109780-1.c b/gcc/testsuite/gcc.target/i386/pr109780-1.c new file mode 100644 index 00000000000..6b06947f2a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109780-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=skylake" } */ + +char perm[64]; + +void +__attribute__((noipa)) +foo (int n) +{ + for (int i = 0; i < n; ++i) + perm[i] = i; +} + +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr109780-2.c b/gcc/testsuite/gcc.target/i386/pr109780-2.c new file mode 100644 index 00000000000..152da06c6ad --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109780-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=skylake" } */ + +#define N 9 + +void +f (double x, double y, double *res) +{ + y = -y; + for (int i = 0; i < N; ++i) + { + double tmp = y; + y = x; + x = tmp; + res[i] = i; + } + res[N] = y * y; + res[N + 1] = x; +} + +/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr109780-3.c b/gcc/testsuite/gcc.target/i386/pr109780-3.c new file mode 100644 index 00000000000..a3a770a80e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109780-3.c @@ -0,0 +1,46 @@ +/* { dg-do run { target avx2_runtime } } */ +/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector -fno-stack-clash-protection" } */ + +char a; +static int b, c, f; +char *d = &a; +static char *e = &a; + +__attribute__ ((weak)) +void +g (int h, int i) +{ + int j = 1; + for (; c != -3; c = c - 1) + { + int k[10]; + f = 0; + for (; f < 10; f++) + k[f] = 0; + *d = k[1]; + if (i < *d) + { + *e = h; + for (; j < 9; j++) + { + b = 1; + for (; b < 7; b++) + ; + } + } + } +} + +__attribute__ ((weak)) +void +run (void) +{ + g (1, 1); +} + +int +main (void) +{ + run (); + return 0; +}