@@ -6846,6 +6846,75 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr
6846
6846
ret float %val
6847
6847
}
6848
6848
6849
+ define amdgpu_ps float @s_buffer_load_f32_offset_or_vgpr_imm (<4 x i32 > inreg %rsrc , i32 inreg %offset.s ) {
6850
+ ; GFX6-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm
6851
+ ; GFX6: bb.1 (%ir-block.0):
6852
+ ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
6853
+ ; GFX6-NEXT: {{ $}}
6854
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
6855
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
6856
+ ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
6857
+ ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
6858
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
6859
+ ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
6860
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
6861
+ ; GFX6-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
6862
+ ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32))
6863
+ ; GFX6-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
6864
+ ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
6865
+ ;
6866
+ ; GFX7-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm
6867
+ ; GFX7: bb.1 (%ir-block.0):
6868
+ ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
6869
+ ; GFX7-NEXT: {{ $}}
6870
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
6871
+ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
6872
+ ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
6873
+ ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
6874
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
6875
+ ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
6876
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
6877
+ ; GFX7-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
6878
+ ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32))
6879
+ ; GFX7-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
6880
+ ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
6881
+ ;
6882
+ ; GFX8-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm
6883
+ ; GFX8: bb.1 (%ir-block.0):
6884
+ ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
6885
+ ; GFX8-NEXT: {{ $}}
6886
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
6887
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
6888
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
6889
+ ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
6890
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
6891
+ ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
6892
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
6893
+ ; GFX8-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
6894
+ ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_OR_B32_]], 0 :: (dereferenceable invariant load (s32))
6895
+ ; GFX8-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]]
6896
+ ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
6897
+ ;
6898
+ ; GFX12-LABEL: name: s_buffer_load_f32_offset_or_vgpr_imm
6899
+ ; GFX12: bb.1 (%ir-block.0):
6900
+ ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6
6901
+ ; GFX12-NEXT: {{ $}}
6902
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
6903
+ ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
6904
+ ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
6905
+ ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
6906
+ ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
6907
+ ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
6908
+ ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
6909
+ ; GFX12-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY4]], [[S_MOV_B32_]], implicit-def dead $scc
6910
+ ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[REG_SEQUENCE]], [[S_OR_B32_]], 0, 0 :: (dereferenceable invariant load (s32))
6911
+ ; GFX12-NEXT: $vgpr0 = COPY [[S_BUFFER_LOAD_DWORD_SGPR_IMM]]
6912
+ ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
6913
+ %offset = or i32 %offset.s , -2147483648
6914
+ %val = call float @llvm.amdgcn.s.buffer.load.f32 (<4 x i32 > %rsrc , i32 %offset , i32 0 )
6915
+ ret float %val
6916
+ }
6917
+
6849
6918
declare i32 @llvm.amdgcn.s.buffer.load.i32 (<4 x i32 >, i32 , i32 immarg)
6850
6919
declare <2 x i32 > @llvm.amdgcn.s.buffer.load.v2i32 (<4 x i32 >, i32 , i32 immarg)
6851
6920
declare <3 x i32 > @llvm.amdgcn.s.buffer.load.v3i32 (<4 x i32 >, i32 , i32 immarg)
0 commit comments