Skip to content

Commit

Permalink
x86: Properly find the maximum stack slot alignment
Browse files Browse the repository at this point in the history
Don't assume that stack slots can only be accessed by stack or frame
registers.  We first find all registers defined by stack or frame
registers.  Then check memory accesses by such registers, including
stack and frame registers.

gcc/

	PR target/109780
	PR target/109093
	* config/i386/i386.cc (ix86_update_stack_alignment): New.
	(ix86_find_all_reg_use_1): Likewise.
	(ix86_find_all_reg_use): Likewise.
	(ix86_find_max_used_stack_alignment): Also check memory accesses
	from registers defined by stack or frame registers.

gcc/testsuite/

	PR target/109780
	PR target/109093
	* g++.target/i386/pr109780-1.C: New test.
	* gcc.target/i386/pr109093-1.c: Likewise.
	* gcc.target/i386/pr109780-1.c: Likewise.
	* gcc.target/i386/pr109780-2.c: Likewise.
	* gcc.target/i386/pr109780-3.c: Likewise.

Signed-off-by: H.J. Lu <[email protected]>
  • Loading branch information
hjl-tools committed Feb 16, 2025
1 parent fa699c1 commit 11902be
Show file tree
Hide file tree
Showing 6 changed files with 342 additions and 21 deletions.
177 changes: 156 additions & 21 deletions gcc/config/i386/i386.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8466,6 +8466,107 @@ output_probe_stack_range (rtx reg, rtx end)
return "";
}

/* Update the maximum stack slot alignment from memory alignment in
PAT. */

static void
ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
{
/* This insn may reference stack slot. Update the maximum stack slot
alignment. */
subrtx_iterator::array_type array;
FOR_EACH_SUBRTX (iter, array, pat, ALL)
if (MEM_P (*iter))
{
unsigned int alignment = MEM_ALIGN (*iter);
unsigned int *stack_alignment
= (unsigned int *) data;
if (alignment > *stack_alignment)
*stack_alignment = alignment;
break;
}
}

/* Helper function for ix86_find_all_reg_use. */

static void
ix86_find_all_reg_use_1 (rtx set, HARD_REG_SET &stack_slot_access,
auto_bitmap &worklist)
{
rtx dest = SET_DEST (set);
if (!REG_P (dest))
return;

rtx src = SET_SRC (set);
if (MEM_P (src) || CONST_SCALAR_INT_P (src))
return;

if (TEST_HARD_REG_BIT (stack_slot_access, REGNO (dest)))
return;

/* Add this register to stack_slot_access. */
add_to_hard_reg_set (&stack_slot_access, Pmode, REGNO (dest));
bitmap_set_bit (worklist, REGNO (dest));
}

/* Find all registers defined with REG. */

static void
ix86_find_all_reg_use (HARD_REG_SET &stack_slot_access,
unsigned int reg, auto_bitmap &worklist)
{
for (df_ref ref = DF_REG_USE_CHAIN (reg);
ref != NULL;
ref = DF_REF_NEXT_REG (ref))
{
if (DF_REF_IS_ARTIFICIAL (ref))
continue;

rtx_insn *insn = DF_REF_INSN (ref);

if (!NONJUMP_INSN_P (insn))
continue;

rtx set = single_set (insn);
if (set)
ix86_find_all_reg_use_1 (set, stack_slot_access, worklist);

rtx pat = PATTERN (insn);
if (GET_CODE (pat) != PARALLEL)
continue;

for (int i = 0; i < XVECLEN (pat, 0); i++)
{
rtx exp = XVECEXP (pat, 0, i);
switch (GET_CODE (exp))
{
case ASM_OPERANDS:
case CLOBBER:
case PREFETCH:
case USE:
break;
case UNSPEC:
case UNSPEC_VOLATILE:
for (int j = XVECLEN (exp, 0) - 1; j >= 0; j--)
{
rtx x = XVECEXP (exp, 0, j);
if (GET_CODE (x) == SET)
ix86_find_all_reg_use_1 (x, stack_slot_access,
worklist);
}
break;
case SET:
ix86_find_all_reg_use_1 (exp, stack_slot_access,
worklist);
break;
default:
gcc_unreachable ();
break;
}
}
}
}

/* Set stack_frame_required to false if stack frame isn't required.
Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
slot used if stack frame is required and CHECK_STACK_SLOT is true. */
Expand All @@ -8484,10 +8585,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
add_to_hard_reg_set (&set_up_by_prologue, Pmode,
HARD_FRAME_POINTER_REGNUM);

/* The preferred stack alignment is the minimum stack alignment. */
if (stack_alignment > crtl->preferred_stack_boundary)
stack_alignment = crtl->preferred_stack_boundary;

bool require_stack_frame = false;

FOR_EACH_BB_FN (bb, cfun)
Expand All @@ -8499,27 +8596,65 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
set_up_by_prologue))
{
require_stack_frame = true;

if (check_stack_slot)
{
/* Find the maximum stack alignment. */
subrtx_iterator::array_type array;
FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
if (MEM_P (*iter)
&& (reg_mentioned_p (stack_pointer_rtx,
*iter)
|| reg_mentioned_p (frame_pointer_rtx,
*iter)))
{
unsigned int alignment = MEM_ALIGN (*iter);
if (alignment > stack_alignment)
stack_alignment = alignment;
}
}
break;
}
}

cfun->machine->stack_frame_required = require_stack_frame;

/* Stop if we don't need to check stack slot. */
if (!check_stack_slot)
return;

/* The preferred stack alignment is the minimum stack alignment. */
if (stack_alignment > crtl->preferred_stack_boundary)
stack_alignment = crtl->preferred_stack_boundary;

HARD_REG_SET stack_slot_access;
CLEAR_HARD_REG_SET (stack_slot_access);

/* Stack slot can be accessed by stack pointer, frame pointer or
registers defined by stack pointer or frame pointer. */
auto_bitmap worklist;

add_to_hard_reg_set (&stack_slot_access, Pmode,
STACK_POINTER_REGNUM);
bitmap_set_bit (worklist, STACK_POINTER_REGNUM);

if (frame_pointer_needed)
{
add_to_hard_reg_set (&stack_slot_access, Pmode,
HARD_FRAME_POINTER_REGNUM);
bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
}

unsigned int reg;

do
{
reg = bitmap_clear_first_set_bit (worklist);
ix86_find_all_reg_use (stack_slot_access, reg, worklist);
}
while (!bitmap_empty_p (worklist));

hard_reg_set_iterator hrsi;

EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, reg, hrsi)
for (df_ref ref = DF_REG_USE_CHAIN (reg);
ref != NULL;
ref = DF_REF_NEXT_REG (ref))
{
if (DF_REF_IS_ARTIFICIAL (ref))
continue;

rtx_insn *insn = DF_REF_INSN (ref);

if (!NONJUMP_INSN_P (insn))
continue;

note_stores (insn, ix86_update_stack_alignment,
&stack_alignment);
}
}

/* Finalize stack_realign_needed and frame_pointer_needed flags, which
Expand Down
72 changes: 72 additions & 0 deletions gcc/testsuite/g++.target/i386/pr109780-1.C
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/* { dg-do compile } */
/* { dg-require-effective-target c++17 } */
/* { dg-options "-O2 -mavx2 -mtune=haswell" } */

template <typename _Tp> struct remove_reference {
using type = __remove_reference(_Tp);
};
template <typename T> struct MaybeStorageBase {
T val;
struct Union {
~Union();
} mStorage;
};
template <typename T> struct MaybeStorage : MaybeStorageBase<T> {
char mIsSome;
};
template <typename T, typename U = typename remove_reference<T>::type>
constexpr MaybeStorage<U> Some(T &&);
template <typename T, typename U> constexpr MaybeStorage<U> Some(T &&aValue) {
return {aValue};
}
template <class> struct Span {
int operator[](long idx) {
int *__trans_tmp_4;
if (__builtin_expect(idx, 0))
*(int *)__null = false;
__trans_tmp_4 = storage_.data();
return __trans_tmp_4[idx];
}
struct {
int *data() { return data_; }
int *data_;
} storage_;
};
struct Variant {
template <typename RefT> Variant(RefT) {}
};
long from_i, from___trans_tmp_9;
namespace js::intl {
struct DecimalNumber {
Variant string_;
unsigned long significandStart_;
unsigned long significandEnd_;
bool zero_ = false;
bool negative_;
template <typename CharT> DecimalNumber(CharT string) : string_(string) {}
template <typename CharT>
static MaybeStorage<DecimalNumber> from(Span<const CharT>);
void from();
};
} // namespace js::intl
void js::intl::DecimalNumber::from() {
Span<const char16_t> __trans_tmp_3;
from(__trans_tmp_3);
}
template <typename CharT>
MaybeStorage<js::intl::DecimalNumber>
js::intl::DecimalNumber::from(Span<const CharT> chars) {
DecimalNumber number(chars);
if (auto ch = chars[from_i]) {
from_i++;
number.negative_ = ch == '-';
}
while (from___trans_tmp_9 && chars[from_i])
;
if (chars[from_i])
while (chars[from_i - 1])
number.zero_ = true;
return Some(number);
}

/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
33 changes: 33 additions & 0 deletions gcc/testsuite/gcc.target/i386/pr109093-1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/* { dg-do run { target avx2_runtime } } */
/* { dg-options "-O2 -mavx2 -mtune=znver1 -ftrivial-auto-var-init=zero -fno-stack-protector" } */

int a, b, c, d;
char e, f = 1;
short g, h, i;

__attribute__ ((weak))
void
run (void)
{
short j;

for (; g >= 0; --g)
{
int *k[10];

for (d = 0; d < 10; d++)
k[d] = &b;

c = *k[1];

for (; a;)
j = i - c / f || (e ^= h);
}
}

int
main (void)
{
run ();
return 0;
}
14 changes: 14 additions & 0 deletions gcc/testsuite/gcc.target/i386/pr109780-1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/* { dg-do compile } */
/* { dg-options "-O3 -march=skylake" } */

char perm[64];

void
__attribute__((noipa))
foo (int n)
{
for (int i = 0; i < n; ++i)
perm[i] = i;
}

/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
21 changes: 21 additions & 0 deletions gcc/testsuite/gcc.target/i386/pr109780-2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/* { dg-do compile } */
/* { dg-options "-O3 -march=skylake" } */

#define N 9

void
f (double x, double y, double *res)
{
y = -y;
for (int i = 0; i < N; ++i)
{
double tmp = y;
y = x;
x = tmp;
res[i] = i;
}
res[N] = y * y;
res[N + 1] = x;
}

/* { dg-final { scan-assembler-not "and\[lq\]?\[^\\n\]*-32,\[^\\n\]*sp" } } */
46 changes: 46 additions & 0 deletions gcc/testsuite/gcc.target/i386/pr109780-3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/* { dg-do run { target avx2_runtime } } */
/* { dg-options "-O2 -mavx2 -mtune=znver1 -fno-stack-protector -fno-stack-clash-protection" } */

char a;
static int b, c, f;
char *d = &a;
static char *e = &a;

__attribute__ ((weak))
void
g (int h, int i)
{
int j = 1;
for (; c != -3; c = c - 1)
{
int k[10];
f = 0;
for (; f < 10; f++)
k[f] = 0;
*d = k[1];
if (i < *d)
{
*e = h;
for (; j < 9; j++)
{
b = 1;
for (; b < 7; b++)
;
}
}
}
}

__attribute__ ((weak))
void
run (void)
{
g (1, 1);
}

int
main (void)
{
run ();
return 0;
}

0 comments on commit 11902be

Please sign in to comment.