From 659517606b10f77e967d3ac5aca5c37585e315bd Mon Sep 17 00:00:00 2001 From: dfukalov Date: Mon, 27 Jan 2025 19:31:50 +0100 Subject: [PATCH 1/3] [AMDGPU][GlobalISel] Fix assert on APInt creation. Since 3494ee95902cef62f767489802e469c58a13ea04 APInt stopped to implicitly truncate values, therefore it asserts on a big signed value converted to (implicitly) unsigned APInt. The change explicitly marks offset as a signed value. --- llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp | 2 +- .../AMDGPU/GlobalISel/assert-signed-apint.ll | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp index d64337c4cb909..0b18c6b0e923a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp @@ -56,7 +56,7 @@ AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, Register Base; if (KnownBits && mi_match(Reg, MRI, m_GOr(m_Reg(Base), m_ICst(Offset))) && - KnownBits->maskedValueIsZero(Base, APInt(32, Offset))) + KnownBits->maskedValueIsZero(Base, APInt(32, Offset, /*isSigned=*/true))) return std::pair(Base, Offset); // Handle G_PTRTOINT (G_PTR_ADD base, const) case diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll new file mode 100644 index 0000000000000..33bdd67a42782 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll @@ -0,0 +1,14 @@ +; REQUIRES: asserts +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck %s + +; + +; CHECK-LABEL: @test +; CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM + +define amdgpu_cs void @test(<4 x i32> inreg %base, i32 inreg %i, ptr addrspace(1) inreg %out) { + %off = or i32 %i, -2147483648 + %v = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %base, i32 %off, i32 0) + store i32 %v, ptr addrspace(1) %out, align 4 + ret void +} From 3023d8aa6b61ee0dd94720c51f711bb1992e64ac Mon Sep 17 00:00:00 2001 From: dfukalov Date: Mon, 27 Jan 2025 19:50:24 +0100 Subject: [PATCH 2/3] fixup! [AMDGPU][GlobalISel] Fix assert on APInt creation. Add comment to test. --- llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll index 33bdd67a42782..49344f7a0fd59 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll @@ -1,7 +1,7 @@ ; REQUIRES: asserts ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck %s -; +; Check that APInt doesn't assert on creation from -2147483648 value. ; CHECK-LABEL: @test ; CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM From 2ea9ccd0e934918f3f007483b749ad4a6975d439 Mon Sep 17 00:00:00 2001 From: dfukalov Date: Tue, 28 Jan 2025 14:25:22 +0100 Subject: [PATCH 3/3] fixup! fixup! [AMDGPU][GlobalISel] Fix assert on APInt creation. Addressed comments. --- .../AMDGPU/GlobalISel/assert-signed-apint.ll | 14 ---- .../GlobalISel/llvm.amdgcn.s.buffer.load.ll | 69 +++++++++++++++++++ 2 files changed, 69 insertions(+), 14 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll deleted file mode 100644 index 49344f7a0fd59..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-signed-apint.ll +++ /dev/null @@ -1,14 +0,0 @@ -; REQUIRES: asserts -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select -o - %s | FileCheck %s - -; Check that APInt doesn't assert on creation from -2147483648 value. - -; CHECK-LABEL: @test -; CHECK: S_BUFFER_LOAD_DWORD_SGPR_IMM - -define amdgpu_cs void @test(<4 x i32> inreg %base, i32 inreg %i, ptr addrspace(1) inreg %out) { - %off = or i32 %i, -2147483648 - %v = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %base, i32 %off, i32 0) - store i32 %v, ptr addrspace(1) %out, align 4 - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll index 91cde52cd2d67..79b333c08cb2d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -6846,6 +6846,75 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr ret float %val } +define amdgpu_ps float @s_buffer_load_f32_offset_or_vgpr_imm(<4 x i32> inreg %rsrc, i32 inreg %offset.s) { + ; GFX6-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm + ; GFX6: bb.1 (%ir-block.0): + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; GFX6-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; + ; GFX7-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm + ; GFX7: bb.1 (%ir-block.0): + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; GFX7-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; + ; GFX8-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm + ; GFX8: bb.1 (%ir-block.0): + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; + ; GFX12-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm + ; GFX12: bb.1 (%ir-block.0): + ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; GFX12-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc + ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_OR_B32_]], 0, 0 :: (dereferenceable invariant load (s32)) + ; GFX12-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]] + ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + %offset = or i32 %offset.s, -2147483648 + %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) + ret float %val +} + declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg) declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg)