@@ -110,3 +110,196 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
110
110
ret <1 x bfloat> %ret
111
111
}
112
112
113
+ define <1 x double > @atomic_vec1_double (ptr %x ) {
114
+ ; CHECK-LABEL: atomic_vec1_double:
115
+ ; CHECK: ## %bb.0:
116
+ ; CHECK-NEXT: pushq %rax
117
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
118
+ ; CHECK-NEXT: movq %rdi, %rsi
119
+ ; CHECK-NEXT: movq %rsp, %rdx
120
+ ; CHECK-NEXT: movl $8, %edi
121
+ ; CHECK-NEXT: movl $2, %ecx
122
+ ; CHECK-NEXT: callq ___atomic_load
123
+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
124
+ ; CHECK-NEXT: popq %rax
125
+ ; CHECK-NEXT: retq
126
+ ;
127
+ ; CHECK0-LABEL: atomic_vec1_double:
128
+ ; CHECK0: ## %bb.0:
129
+ ; CHECK0-NEXT: pushq %rax
130
+ ; CHECK0-NEXT: .cfi_def_cfa_offset 16
131
+ ; CHECK0-NEXT: movq %rdi, %rsi
132
+ ; CHECK0-NEXT: movl $8, %edi
133
+ ; CHECK0-NEXT: movq %rsp, %rdx
134
+ ; CHECK0-NEXT: movl $2, %ecx
135
+ ; CHECK0-NEXT: callq ___atomic_load
136
+ ; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
137
+ ; CHECK0-NEXT: popq %rax
138
+ ; CHECK0-NEXT: retq
139
+ %ret = load atomic <1 x double >, ptr %x acquire , align 4
140
+ ret <1 x double > %ret
141
+ }
142
+
143
+ define <2 x i32 > @atomic_vec2_i32 (ptr %x ) {
144
+ ; CHECK-LABEL: atomic_vec2_i32:
145
+ ; CHECK: ## %bb.0:
146
+ ; CHECK-NEXT: pushq %rax
147
+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
148
+ ; CHECK-NEXT: movq %rdi, %rsi
149
+ ; CHECK-NEXT: movq %rsp, %rdx
150
+ ; CHECK-NEXT: movl $8, %edi
151
+ ; CHECK-NEXT: movl $2, %ecx
152
+ ; CHECK-NEXT: callq ___atomic_load
153
+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
154
+ ; CHECK-NEXT: popq %rax
155
+ ; CHECK-NEXT: retq
156
+ ;
157
+ ; CHECK0-LABEL: atomic_vec2_i32:
158
+ ; CHECK0: ## %bb.0:
159
+ ; CHECK0-NEXT: pushq %rax
160
+ ; CHECK0-NEXT: .cfi_def_cfa_offset 16
161
+ ; CHECK0-NEXT: movq %rdi, %rsi
162
+ ; CHECK0-NEXT: movl $8, %edi
163
+ ; CHECK0-NEXT: movq %rsp, %rdx
164
+ ; CHECK0-NEXT: movl $2, %ecx
165
+ ; CHECK0-NEXT: callq ___atomic_load
166
+ ; CHECK0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
167
+ ; CHECK0-NEXT: popq %rax
168
+ ; CHECK0-NEXT: retq
169
+ %ret = load atomic <2 x i32 >, ptr %x acquire , align 4
170
+ ret <2 x i32 > %ret
171
+ }
172
+
173
+ define <4 x float > @atomic_vec4_float (ptr %x ) {
174
+ ; CHECK-LABEL: atomic_vec4_float:
175
+ ; CHECK: ## %bb.0:
176
+ ; CHECK-NEXT: subq $24, %rsp
177
+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
178
+ ; CHECK-NEXT: movq %rdi, %rsi
179
+ ; CHECK-NEXT: movq %rsp, %rdx
180
+ ; CHECK-NEXT: movl $16, %edi
181
+ ; CHECK-NEXT: movl $2, %ecx
182
+ ; CHECK-NEXT: callq ___atomic_load
183
+ ; CHECK-NEXT: movaps (%rsp), %xmm0
184
+ ; CHECK-NEXT: addq $24, %rsp
185
+ ; CHECK-NEXT: retq
186
+ ;
187
+ ; CHECK0-LABEL: atomic_vec4_float:
188
+ ; CHECK0: ## %bb.0:
189
+ ; CHECK0-NEXT: subq $24, %rsp
190
+ ; CHECK0-NEXT: .cfi_def_cfa_offset 32
191
+ ; CHECK0-NEXT: movq %rdi, %rsi
192
+ ; CHECK0-NEXT: movl $16, %edi
193
+ ; CHECK0-NEXT: movq %rsp, %rdx
194
+ ; CHECK0-NEXT: movl $2, %ecx
195
+ ; CHECK0-NEXT: callq ___atomic_load
196
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
197
+ ; CHECK0-NEXT: addq $24, %rsp
198
+ ; CHECK0-NEXT: retq
199
+ %ret = load atomic <4 x float >, ptr %x acquire , align 4
200
+ ret <4 x float > %ret
201
+ }
202
+
203
+ define <8 x double > @atomic_vec8_double (ptr %x ) {
204
+ ; CHECK-LABEL: atomic_vec8_double:
205
+ ; CHECK: ## %bb.0:
206
+ ; CHECK-NEXT: subq $72, %rsp
207
+ ; CHECK-NEXT: .cfi_def_cfa_offset 80
208
+ ; CHECK-NEXT: movq %rdi, %rsi
209
+ ; CHECK-NEXT: movq %rsp, %rdx
210
+ ; CHECK-NEXT: movl $64, %edi
211
+ ; CHECK-NEXT: movl $2, %ecx
212
+ ; CHECK-NEXT: callq ___atomic_load
213
+ ; CHECK-NEXT: movaps (%rsp), %xmm0
214
+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
215
+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
216
+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
217
+ ; CHECK-NEXT: addq $72, %rsp
218
+ ; CHECK-NEXT: retq
219
+ ;
220
+ ; CHECK0-LABEL: atomic_vec8_double:
221
+ ; CHECK0: ## %bb.0:
222
+ ; CHECK0-NEXT: subq $72, %rsp
223
+ ; CHECK0-NEXT: .cfi_def_cfa_offset 80
224
+ ; CHECK0-NEXT: movq %rdi, %rsi
225
+ ; CHECK0-NEXT: movl $64, %edi
226
+ ; CHECK0-NEXT: movq %rsp, %rdx
227
+ ; CHECK0-NEXT: movl $2, %ecx
228
+ ; CHECK0-NEXT: callq ___atomic_load
229
+ ; CHECK0-NEXT: movapd (%rsp), %xmm0
230
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
231
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
232
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
233
+ ; CHECK0-NEXT: addq $72, %rsp
234
+ ; CHECK0-NEXT: retq
235
+ %ret = load atomic <8 x double >, ptr %x acquire , align 4
236
+ ret <8 x double > %ret
237
+ }
238
+
239
+ define <16 x bfloat> @atomic_vec16_bfloat (ptr %x ) {
240
+ ; CHECK-LABEL: atomic_vec16_bfloat:
241
+ ; CHECK: ## %bb.0:
242
+ ; CHECK-NEXT: subq $40, %rsp
243
+ ; CHECK-NEXT: .cfi_def_cfa_offset 48
244
+ ; CHECK-NEXT: movq %rdi, %rsi
245
+ ; CHECK-NEXT: movq %rsp, %rdx
246
+ ; CHECK-NEXT: movl $32, %edi
247
+ ; CHECK-NEXT: movl $2, %ecx
248
+ ; CHECK-NEXT: callq ___atomic_load
249
+ ; CHECK-NEXT: movaps (%rsp), %xmm0
250
+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
251
+ ; CHECK-NEXT: addq $40, %rsp
252
+ ; CHECK-NEXT: retq
253
+ ;
254
+ ; CHECK0-LABEL: atomic_vec16_bfloat:
255
+ ; CHECK0: ## %bb.0:
256
+ ; CHECK0-NEXT: subq $40, %rsp
257
+ ; CHECK0-NEXT: .cfi_def_cfa_offset 48
258
+ ; CHECK0-NEXT: movq %rdi, %rsi
259
+ ; CHECK0-NEXT: movl $32, %edi
260
+ ; CHECK0-NEXT: movq %rsp, %rdx
261
+ ; CHECK0-NEXT: movl $2, %ecx
262
+ ; CHECK0-NEXT: callq ___atomic_load
263
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
264
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
265
+ ; CHECK0-NEXT: addq $40, %rsp
266
+ ; CHECK0-NEXT: retq
267
+ %ret = load atomic <16 x bfloat>, ptr %x acquire , align 4
268
+ ret <16 x bfloat> %ret
269
+ }
270
+
271
+ define <32 x half > @atomic_vec32_half (ptr %x ) {
272
+ ; CHECK-LABEL: atomic_vec32_half:
273
+ ; CHECK: ## %bb.0:
274
+ ; CHECK-NEXT: subq $72, %rsp
275
+ ; CHECK-NEXT: .cfi_def_cfa_offset 80
276
+ ; CHECK-NEXT: movq %rdi, %rsi
277
+ ; CHECK-NEXT: movq %rsp, %rdx
278
+ ; CHECK-NEXT: movl $64, %edi
279
+ ; CHECK-NEXT: movl $2, %ecx
280
+ ; CHECK-NEXT: callq ___atomic_load
281
+ ; CHECK-NEXT: movaps (%rsp), %xmm0
282
+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
283
+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
284
+ ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
285
+ ; CHECK-NEXT: addq $72, %rsp
286
+ ; CHECK-NEXT: retq
287
+ ;
288
+ ; CHECK0-LABEL: atomic_vec32_half:
289
+ ; CHECK0: ## %bb.0:
290
+ ; CHECK0-NEXT: subq $72, %rsp
291
+ ; CHECK0-NEXT: .cfi_def_cfa_offset 80
292
+ ; CHECK0-NEXT: movq %rdi, %rsi
293
+ ; CHECK0-NEXT: movl $64, %edi
294
+ ; CHECK0-NEXT: movq %rsp, %rdx
295
+ ; CHECK0-NEXT: movl $2, %ecx
296
+ ; CHECK0-NEXT: callq ___atomic_load
297
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
298
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
299
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
300
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
301
+ ; CHECK0-NEXT: addq $72, %rsp
302
+ ; CHECK0-NEXT: retq
303
+ %ret = load atomic <32 x half >, ptr %x acquire , align 4
304
+ ret <32 x half > %ret
305
+ }
0 commit comments