Skip to content

Commit c57002f

Browse files
committed
scratch
1 parent 08028d6 commit c57002f

File tree

8 files changed

+127
-103
lines changed

8 files changed

+127
-103
lines changed

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/IR/Value.h"
2121
#include "llvm/Support/Casting.h"
2222
#include "llvm/Support/ErrorHandling.h"
23+
#include "llvm/Support/NVPTXAddrSpace.h"
2324
#include "llvm/Transforms/InstCombine/InstCombiner.h"
2425
#include <optional>
2526
using namespace llvm;
@@ -562,4 +563,11 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
562563
}
563564
}
564565
return nullptr;
565-
}
566+
}
567+
568+
unsigned NVPTXTTIImpl::getAssumedAddrSpace(const Value *V) const {
569+
if (isa<AllocaInst>(V))
570+
return ADDRESS_SPACE_LOCAL;
571+
572+
return -1;
573+
}

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h

+2
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
129129

130130
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
131131
Value *NewV) const;
132+
133+
unsigned getAssumedAddrSpace(const Value *V) const;
132134
};
133135

134136
} // end namespace llvm

llvm/test/CodeGen/NVPTX/local-stack-frame.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
; Ensure we access the local stack properly
77

88
; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
9-
; PTX32: cvta.local.u32 %SP, %SPL;
109
; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
11-
; PTX32: st.volatile.u32 [%SP], %r{{[0-9]+}};
10+
; PTX32: add.u32 %r[[SP_REG:[0-9]+]], %SPL, 0;
11+
; PTX32: st.local.u32 [%r[[SP_REG]]], %r{{[0-9]+}};
1212
; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}};
13-
; PTX64: cvta.local.u64 %SP, %SPL;
1413
; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
15-
; PTX64: st.volatile.u32 [%SP], %r{{[0-9]+}};
14+
; PTX64: add.u64 %rd[[SP_REG:[0-9]+]], %SPL, 0;
15+
; PTX64: st.local.u32 [%rd[[SP_REG]]], %r{{[0-9]+}};
1616
define void @foo(i32 %a) {
1717
%local = alloca i32, align 4
1818
store volatile i32 %a, ptr %local

llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll

+16-27
Original file line numberDiff line numberDiff line change
@@ -29,31 +29,32 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
2929
; PTX-NEXT: .reg .pred %p<2>;
3030
; PTX-NEXT: .reg .b16 %rs<3>;
3131
; PTX-NEXT: .reg .b32 %r<11>;
32-
; PTX-NEXT: .reg .b64 %rd<9>;
32+
; PTX-NEXT: .reg .b64 %rd<10>;
3333
; PTX-EMPTY:
3434
; PTX-NEXT: // %bb.0: // %entry
3535
; PTX-NEXT: mov.u64 %SPL, __local_depot0;
3636
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
3737
; PTX-NEXT: ld.param.u8 %rs1, [non_kernel_function_param_1];
3838
; PTX-NEXT: and.b16 %rs2, %rs1, 1;
3939
; PTX-NEXT: setp.eq.b16 %p1, %rs2, 1;
40-
; PTX-NEXT: ld.param.s32 %rd1, [non_kernel_function_param_2];
41-
; PTX-NEXT: ld.param.u64 %rd2, [non_kernel_function_param_0+8];
42-
; PTX-NEXT: st.u64 [%SP+8], %rd2;
43-
; PTX-NEXT: ld.param.u64 %rd3, [non_kernel_function_param_0];
44-
; PTX-NEXT: st.u64 [%SP], %rd3;
45-
; PTX-NEXT: mov.u64 %rd4, gi;
46-
; PTX-NEXT: cvta.global.u64 %rd5, %rd4;
47-
; PTX-NEXT: add.u64 %rd6, %SP, 0;
48-
; PTX-NEXT: selp.b64 %rd7, %rd6, %rd5, %p1;
49-
; PTX-NEXT: add.s64 %rd8, %rd7, %rd1;
50-
; PTX-NEXT: ld.u8 %r1, [%rd8];
51-
; PTX-NEXT: ld.u8 %r2, [%rd8+1];
40+
; PTX-NEXT: add.u64 %rd1, %SP, 0;
41+
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
42+
; PTX-NEXT: ld.param.s32 %rd3, [non_kernel_function_param_2];
43+
; PTX-NEXT: ld.param.u64 %rd4, [non_kernel_function_param_0+8];
44+
; PTX-NEXT: st.local.u64 [%rd2+8], %rd4;
45+
; PTX-NEXT: ld.param.u64 %rd5, [non_kernel_function_param_0];
46+
; PTX-NEXT: st.local.u64 [%rd2], %rd5;
47+
; PTX-NEXT: mov.u64 %rd6, gi;
48+
; PTX-NEXT: cvta.global.u64 %rd7, %rd6;
49+
; PTX-NEXT: selp.b64 %rd8, %rd1, %rd7, %p1;
50+
; PTX-NEXT: add.s64 %rd9, %rd8, %rd3;
51+
; PTX-NEXT: ld.u8 %r1, [%rd9];
52+
; PTX-NEXT: ld.u8 %r2, [%rd9+1];
5253
; PTX-NEXT: shl.b32 %r3, %r2, 8;
5354
; PTX-NEXT: or.b32 %r4, %r3, %r1;
54-
; PTX-NEXT: ld.u8 %r5, [%rd8+2];
55+
; PTX-NEXT: ld.u8 %r5, [%rd9+2];
5556
; PTX-NEXT: shl.b32 %r6, %r5, 16;
56-
; PTX-NEXT: ld.u8 %r7, [%rd8+3];
57+
; PTX-NEXT: ld.u8 %r7, [%rd9+3];
5758
; PTX-NEXT: shl.b32 %r8, %r7, 24;
5859
; PTX-NEXT: or.b32 %r9, %r8, %r6;
5960
; PTX-NEXT: or.b32 %r10, %r9, %r4;
@@ -90,7 +91,6 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
9091
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
9192
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT3]], align 4
9293
; OPT-NEXT: ret void
93-
;
9494
%tmp = load i32, ptr %input1, align 4
9595
%add = add i32 %tmp, %input2
9696
store i32 %add, ptr %out
@@ -125,7 +125,6 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
125125
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
126126
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT5]], align 4
127127
; OPT-NEXT: ret void
128-
;
129128
%gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
130129
%gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
131130
%int1 = load i32, ptr %gep1
@@ -166,7 +165,6 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
166165
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
167166
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
168167
; OPT-NEXT: ret void
169-
;
170168
%call = call i32 @escape(ptr %input)
171169
ret void
172170
}
@@ -224,7 +222,6 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
224222
; OPT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
225223
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape3(ptr [[INPUT_PARAM_GEN]], ptr [[A_ADDR]], ptr [[B_PARAM_GEN]])
226224
; OPT-NEXT: ret void
227-
;
228225
%a.addr = alloca i32, align 4
229226
store i32 %a, ptr %a.addr, align 4
230227
%call = call i32 @escape3(ptr %input, ptr %a.addr, ptr %b)
@@ -252,7 +249,6 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
252249
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
253250
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR5]], align 8
254251
; OPT-NEXT: ret void
255-
;
256252
store ptr %input, ptr %addr, align 8
257253
ret void
258254
}
@@ -286,7 +282,6 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
286282
; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
287283
; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT5]], align 8
288284
; OPT-NEXT: ret void
289-
;
290285
%tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
291286
%tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
292287
%1 = call i64 asm "add.s64 $0, $1, $2;", "=l,l,l"(ptr %tmpptr1, ptr %tmpptr2) #1
@@ -335,7 +330,6 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
335330
; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT5]], align 4
336331
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
337332
; OPT-NEXT: ret void
338-
;
339333
%val = load i32, ptr %input
340334
%twice = add i32 %val, %val
341335
store i32 %twice, ptr %output
@@ -389,7 +383,6 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
389383
; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
390384
; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
391385
; OPT-NEXT: ret i32 [[ADD]]
392-
;
393386
%ptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
394387
%val1 = load i32, ptr %ptr1
395388
%ptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -442,7 +435,6 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
442435
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
443436
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
444437
; OPT-NEXT: ret void
445-
;
446438

447439
%val = load i32, ptr %inout
448440
%less = icmp slt i32 %val, 0
@@ -508,7 +500,6 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
508500
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
509501
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
510502
; OPT-NEXT: ret void
511-
;
512503
%val = load i32, ptr %inout
513504
%less = icmp slt i32 %val, 0
514505
br i1 %less, label %first, label %second
@@ -562,7 +553,6 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
562553
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
563554
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
564555
; OPT-NEXT: ret void
565-
;
566556
%val = load i32, ptr %inout
567557
%less = icmp slt i32 %val, 0
568558
%ptrnew = select i1 %less, ptr %input1, ptr %input2
@@ -594,7 +584,6 @@ define ptx_kernel i32 @grid_const_ptrtoint(ptr byval(i32) %input) {
594584
; OPT-NEXT: [[PTRVAL:%.*]] = ptrtoint ptr [[INPUT1]] to i32
595585
; OPT-NEXT: [[KEEPALIVE:%.*]] = add i32 [[INPUT3]], [[PTRVAL]]
596586
; OPT-NEXT: ret i32 [[KEEPALIVE]]
597-
;
598587
%val = load i32, ptr %input
599588
%ptrval = ptrtoint ptr %input to i32
600589
%keepalive = add i32 %val, %ptrval

llvm/test/CodeGen/NVPTX/lower-args.ll

+7-6
Original file line numberDiff line numberDiff line change
@@ -40,24 +40,25 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
4040
; PTX-NEXT: .local .align 8 .b8 __local_depot1[8];
4141
; PTX-NEXT: .reg .b64 %SP;
4242
; PTX-NEXT: .reg .b64 %SPL;
43-
; PTX-NEXT: .reg .b64 %rd<5>;
43+
; PTX-NEXT: .reg .b64 %rd<6>;
4444
; PTX-EMPTY:
4545
; PTX-NEXT: // %bb.0:
4646
; PTX-NEXT: mov.u64 %SPL, __local_depot1;
4747
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
48-
; PTX-NEXT: ld.param.u64 %rd1, [load_padding_param_0];
49-
; PTX-NEXT: st.u64 [%SP], %rd1;
50-
; PTX-NEXT: add.u64 %rd2, %SP, 0;
48+
; PTX-NEXT: add.u64 %rd1, %SP, 0;
49+
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
50+
; PTX-NEXT: ld.param.u64 %rd3, [load_padding_param_0];
51+
; PTX-NEXT: st.local.u64 [%rd2], %rd3;
5152
; PTX-NEXT: { // callseq 1, 0
5253
; PTX-NEXT: .param .b64 param0;
53-
; PTX-NEXT: st.param.b64 [param0], %rd2;
54+
; PTX-NEXT: st.param.b64 [param0], %rd1;
5455
; PTX-NEXT: .param .b64 retval0;
5556
; PTX-NEXT: call.uni (retval0),
5657
; PTX-NEXT: escape,
5758
; PTX-NEXT: (
5859
; PTX-NEXT: param0
5960
; PTX-NEXT: );
60-
; PTX-NEXT: ld.param.b64 %rd3, [retval0];
61+
; PTX-NEXT: ld.param.b64 %rd4, [retval0];
6162
; PTX-NEXT: } // callseq 1
6263
; PTX-NEXT: ret;
6364
%tmp = call ptr @escape(ptr nonnull align 16 %arg)

llvm/test/CodeGen/NVPTX/variadics-backend.ll

+53-51
Original file line numberDiff line numberDiff line change
@@ -148,35 +148,34 @@ entry:
148148
define dso_local i32 @variadics2(i32 noundef %first, ...) {
149149
; CHECK-PTX-LABEL: variadics2(
150150
; CHECK-PTX: {
151-
; CHECK-PTX-NEXT: .local .align 2 .b8 __local_depot2[4];
151+
; CHECK-PTX-NEXT: .local .align 1 .b8 __local_depot2[3];
152152
; CHECK-PTX-NEXT: .reg .b64 %SP;
153153
; CHECK-PTX-NEXT: .reg .b64 %SPL;
154-
; CHECK-PTX-NEXT: .reg .b16 %rs<6>;
154+
; CHECK-PTX-NEXT: .reg .b16 %rs<4>;
155155
; CHECK-PTX-NEXT: .reg .b32 %r<7>;
156-
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
156+
; CHECK-PTX-NEXT: .reg .b64 %rd<9>;
157157
; CHECK-PTX-EMPTY:
158158
; CHECK-PTX-NEXT: // %bb.0: // %entry
159159
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot2;
160-
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
161160
; CHECK-PTX-NEXT: ld.param.u32 %r1, [variadics2_param_0];
162161
; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics2_param_1];
163-
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
164-
; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8;
165-
; CHECK-PTX-NEXT: ld.u32 %r2, [%rd3];
166-
; CHECK-PTX-NEXT: ld.s8 %r3, [%rd3+4];
167-
; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd3+7];
168-
; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs1;
169-
; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd3+5];
170-
; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd3+6];
171-
; CHECK-PTX-NEXT: shl.b16 %rs4, %rs3, 8;
172-
; CHECK-PTX-NEXT: or.b16 %rs5, %rs4, %rs2;
173-
; CHECK-PTX-NEXT: st.u16 [%SP], %rs5;
174-
; CHECK-PTX-NEXT: ld.u64 %rd4, [%rd3+8];
162+
; CHECK-PTX-NEXT: add.u64 %rd3, %SPL, 0;
163+
; CHECK-PTX-NEXT: add.s64 %rd4, %rd1, 7;
164+
; CHECK-PTX-NEXT: and.b64 %rd5, %rd4, -8;
165+
; CHECK-PTX-NEXT: ld.u32 %r2, [%rd5];
166+
; CHECK-PTX-NEXT: ld.s8 %r3, [%rd5+4];
167+
; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd5+7];
168+
; CHECK-PTX-NEXT: st.local.u8 [%rd3+2], %rs1;
169+
; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd5+6];
170+
; CHECK-PTX-NEXT: st.local.u8 [%rd3+1], %rs2;
171+
; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd5+5];
172+
; CHECK-PTX-NEXT: st.local.u8 [%rd3], %rs3;
173+
; CHECK-PTX-NEXT: ld.u64 %rd6, [%rd5+8];
175174
; CHECK-PTX-NEXT: add.s32 %r4, %r1, %r2;
176175
; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3;
177-
; CHECK-PTX-NEXT: cvt.u64.u32 %rd5, %r5;
178-
; CHECK-PTX-NEXT: add.s64 %rd6, %rd5, %rd4;
179-
; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd6;
176+
; CHECK-PTX-NEXT: cvt.u64.u32 %rd7, %r5;
177+
; CHECK-PTX-NEXT: add.s64 %rd8, %rd7, %rd6;
178+
; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd8;
180179
; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r6;
181180
; CHECK-PTX-NEXT: ret;
182181
entry:
@@ -213,39 +212,39 @@ define dso_local i32 @bar() {
213212
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot3[24];
214213
; CHECK-PTX-NEXT: .reg .b64 %SP;
215214
; CHECK-PTX-NEXT: .reg .b64 %SPL;
216-
; CHECK-PTX-NEXT: .reg .b16 %rs<10>;
215+
; CHECK-PTX-NEXT: .reg .b16 %rs<8>;
217216
; CHECK-PTX-NEXT: .reg .b32 %r<4>;
218-
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
217+
; CHECK-PTX-NEXT: .reg .b64 %rd<9>;
219218
; CHECK-PTX-EMPTY:
220219
; CHECK-PTX-NEXT: // %bb.0: // %entry
221220
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot3;
222221
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
223-
; CHECK-PTX-NEXT: mov.u64 %rd1, __const_$_bar_$_s1;
224-
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
225-
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd2];
222+
; CHECK-PTX-NEXT: add.u64 %rd2, %SPL, 0;
223+
; CHECK-PTX-NEXT: mov.u64 %rd3, __const_$_bar_$_s1;
224+
; CHECK-PTX-NEXT: add.s64 %rd4, %rd3, 7;
225+
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd4];
226226
; CHECK-PTX-NEXT: cvt.u16.u8 %rs2, %rs1;
227-
; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs2;
228-
; CHECK-PTX-NEXT: add.s64 %rd3, %rd1, 5;
229-
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd3];
227+
; CHECK-PTX-NEXT: st.local.u8 [%rd2+2], %rs2;
228+
; CHECK-PTX-NEXT: add.s64 %rd5, %rd3, 6;
229+
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd5];
230230
; CHECK-PTX-NEXT: cvt.u16.u8 %rs4, %rs3;
231-
; CHECK-PTX-NEXT: add.s64 %rd4, %rd1, 6;
232-
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd4];
231+
; CHECK-PTX-NEXT: st.local.u8 [%rd2+1], %rs4;
232+
; CHECK-PTX-NEXT: add.s64 %rd6, %rd3, 5;
233+
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd6];
233234
; CHECK-PTX-NEXT: cvt.u16.u8 %rs6, %rs5;
234-
; CHECK-PTX-NEXT: shl.b16 %rs7, %rs6, 8;
235-
; CHECK-PTX-NEXT: or.b16 %rs8, %rs7, %rs4;
236-
; CHECK-PTX-NEXT: st.u16 [%SP], %rs8;
235+
; CHECK-PTX-NEXT: st.local.u8 [%rd2], %rs6;
237236
; CHECK-PTX-NEXT: mov.b32 %r1, 1;
238237
; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1;
239-
; CHECK-PTX-NEXT: mov.b16 %rs9, 1;
240-
; CHECK-PTX-NEXT: st.u8 [%SP+12], %rs9;
241-
; CHECK-PTX-NEXT: mov.b64 %rd5, 1;
242-
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
243-
; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 8;
238+
; CHECK-PTX-NEXT: mov.b16 %rs7, 1;
239+
; CHECK-PTX-NEXT: st.u8 [%SP+12], %rs7;
240+
; CHECK-PTX-NEXT: mov.b64 %rd7, 1;
241+
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd7;
242+
; CHECK-PTX-NEXT: add.u64 %rd8, %SP, 8;
244243
; CHECK-PTX-NEXT: { // callseq 1, 0
245244
; CHECK-PTX-NEXT: .param .b32 param0;
246245
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
247246
; CHECK-PTX-NEXT: .param .b64 param1;
248-
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6;
247+
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd8;
249248
; CHECK-PTX-NEXT: .param .b32 retval0;
250249
; CHECK-PTX-NEXT: call.uni (retval0),
251250
; CHECK-PTX-NEXT: variadics2,
@@ -384,26 +383,29 @@ define dso_local void @qux() {
384383
; CHECK-PTX-NEXT: .reg .b64 %SP;
385384
; CHECK-PTX-NEXT: .reg .b64 %SPL;
386385
; CHECK-PTX-NEXT: .reg .b32 %r<3>;
387-
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
386+
; CHECK-PTX-NEXT: .reg .b64 %rd<11>;
388387
; CHECK-PTX-EMPTY:
389388
; CHECK-PTX-NEXT: // %bb.0: // %entry
390389
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot7;
391390
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
392-
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd1, [__const_$_qux_$_s];
393-
; CHECK-PTX-NEXT: st.u64 [%SP], %rd1;
394-
; CHECK-PTX-NEXT: mov.u64 %rd2, __const_$_qux_$_s;
395-
; CHECK-PTX-NEXT: add.s64 %rd3, %rd2, 8;
396-
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd4, [%rd3];
397-
; CHECK-PTX-NEXT: st.u64 [%SP+8], %rd4;
398-
; CHECK-PTX-NEXT: mov.b64 %rd5, 1;
399-
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
400-
; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 16;
391+
; CHECK-PTX-NEXT: add.u64 %rd2, %SPL, 0;
392+
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd3, [__const_$_qux_$_s];
393+
; CHECK-PTX-NEXT: st.local.u64 [%rd2], %rd3;
394+
; CHECK-PTX-NEXT: mov.u64 %rd4, __const_$_qux_$_s;
395+
; CHECK-PTX-NEXT: add.s64 %rd5, %rd4, 8;
396+
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd6, [%rd5];
397+
; CHECK-PTX-NEXT: st.local.u64 [%rd2+8], %rd6;
398+
; CHECK-PTX-NEXT: mov.b64 %rd7, 1;
399+
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd7;
400+
; CHECK-PTX-NEXT: ld.u64 %rd8, [%SP];
401+
; CHECK-PTX-NEXT: ld.u64 %rd9, [%SP+8];
402+
; CHECK-PTX-NEXT: add.u64 %rd10, %SP, 16;
401403
; CHECK-PTX-NEXT: { // callseq 3, 0
402404
; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16];
403-
; CHECK-PTX-NEXT: st.param.b64 [param0], %rd1;
404-
; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd4;
405+
; CHECK-PTX-NEXT: st.param.b64 [param0], %rd8;
406+
; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd9;
405407
; CHECK-PTX-NEXT: .param .b64 param1;
406-
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6;
408+
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd10;
407409
; CHECK-PTX-NEXT: .param .b32 retval0;
408410
; CHECK-PTX-NEXT: call.uni (retval0),
409411
; CHECK-PTX-NEXT: variadics4,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=infer-address-spaces %s | FileCheck %s
3+
4+
target triple = "nvptx64-nvidia-cuda"
5+
6+
7+
define float @load_alloca() {
8+
; CHECK-LABEL: define float @load_alloca() {
9+
; CHECK-NEXT: [[ADDR:%.*]] = alloca float, align 4
10+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
11+
; CHECK-NEXT: [[VAL:%.*]] = load float, ptr addrspace(5) [[TMP1]], align 4
12+
; CHECK-NEXT: ret float [[VAL]]
13+
;
14+
%addr = alloca float
15+
%val = load float, ptr %addr
16+
ret float %val
17+
}

0 commit comments

Comments
 (0)