Skip to content

Commit b2b23bf

Browse files
committed
[X86] Remove extra MOV after widening atomic load
This change adds patterns to optimize out an extra MOV present after widening the atomic load. commit-id:45989503
1 parent 768b1a9 commit b2b23bf

File tree

2 files changed

+35
-164
lines changed

2 files changed

+35
-164
lines changed

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,6 +1204,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>;
12041204
def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>;
12051205
def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>;
12061206

1207+
def : Pat<(v4i32 (scalar_to_vector (i32 (zext (i16 (atomic_load_16 addr:$src)))))),
1208+
(MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8>
1209+
def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))),
1210+
(MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16>
1211+
def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
1212+
(MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float>
1213+
12071214
// Floating point loads/stores.
12081215
def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
12091216
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;

llvm/test/CodeGen/X86/atomic-load-store.ll

Lines changed: 28 additions & 164 deletions
Original file line numberDiff line numberDiff line change
@@ -319,159 +319,60 @@ define <2 x i8> @atomic_vec2_i8(ptr %x) {
319319
define <2 x i16> @atomic_vec2_i16(ptr %x) {
320320
; CHECK-O3-LABEL: atomic_vec2_i16:
321321
; CHECK-O3: # %bb.0:
322-
; CHECK-O3-NEXT: movl (%rdi), %eax
323-
; CHECK-O3-NEXT: movd %eax, %xmm0
322+
; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
324323
; CHECK-O3-NEXT: retq
325324
;
326325
; CHECK-SSE-O3-LABEL: atomic_vec2_i16:
327326
; CHECK-SSE-O3: # %bb.0:
328-
; CHECK-SSE-O3-NEXT: movl (%rdi), %eax
329-
; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
327+
; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
330328
; CHECK-SSE-O3-NEXT: retq
331329
;
332330
; CHECK-AVX-O3-LABEL: atomic_vec2_i16:
333331
; CHECK-AVX-O3: # %bb.0:
334-
; CHECK-AVX-O3-NEXT: movl (%rdi), %eax
335-
; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
332+
; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
336333
; CHECK-AVX-O3-NEXT: retq
337334
;
338335
; CHECK-O0-LABEL: atomic_vec2_i16:
339336
; CHECK-O0: # %bb.0:
340-
; CHECK-O0-NEXT: movl (%rdi), %eax
341-
; CHECK-O0-NEXT: movd %eax, %xmm0
337+
; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
342338
; CHECK-O0-NEXT: retq
343339
;
344340
; CHECK-SSE-O0-LABEL: atomic_vec2_i16:
345341
; CHECK-SSE-O0: # %bb.0:
346-
; CHECK-SSE-O0-NEXT: movl (%rdi), %eax
347-
; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
342+
; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
348343
; CHECK-SSE-O0-NEXT: retq
349344
;
350345
; CHECK-AVX-O0-LABEL: atomic_vec2_i16:
351346
; CHECK-AVX-O0: # %bb.0:
352-
; CHECK-AVX-O0-NEXT: movl (%rdi), %eax
353-
; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
347+
; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
354348
; CHECK-AVX-O0-NEXT: retq
355349
%ret = load atomic <2 x i16>, ptr %x acquire, align 4
356350
ret <2 x i16> %ret
357351
}
358352

359353
define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) {
360-
; CHECK-O3-LABEL: atomic_vec2_ptr270:
361-
; CHECK-O3: # %bb.0:
362-
; CHECK-O3-NEXT: movq (%rdi), %rax
363-
; CHECK-O3-NEXT: movq %rax, %xmm0
364-
; CHECK-O3-NEXT: retq
365-
;
366-
; CHECK-SSE-O3-LABEL: atomic_vec2_ptr270:
367-
; CHECK-SSE-O3: # %bb.0:
368-
; CHECK-SSE-O3-NEXT: movq (%rdi), %rax
369-
; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
370-
; CHECK-SSE-O3-NEXT: retq
371-
;
372-
; CHECK-AVX-O3-LABEL: atomic_vec2_ptr270:
373-
; CHECK-AVX-O3: # %bb.0:
374-
; CHECK-AVX-O3-NEXT: movq (%rdi), %rax
375-
; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0
376-
; CHECK-AVX-O3-NEXT: retq
377-
;
378-
; CHECK-O0-LABEL: atomic_vec2_ptr270:
379-
; CHECK-O0: # %bb.0:
380-
; CHECK-O0-NEXT: movq (%rdi), %rax
381-
; CHECK-O0-NEXT: movq %rax, %xmm0
382-
; CHECK-O0-NEXT: retq
383-
;
384-
; CHECK-SSE-O0-LABEL: atomic_vec2_ptr270:
385-
; CHECK-SSE-O0: # %bb.0:
386-
; CHECK-SSE-O0-NEXT: movq (%rdi), %rax
387-
; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
388-
; CHECK-SSE-O0-NEXT: retq
389-
;
390-
; CHECK-AVX-O0-LABEL: atomic_vec2_ptr270:
391-
; CHECK-AVX-O0: # %bb.0:
392-
; CHECK-AVX-O0-NEXT: movq (%rdi), %rax
393-
; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0
394-
; CHECK-AVX-O0-NEXT: retq
354+
; CHECK-LABEL: atomic_vec2_ptr270:
355+
; CHECK: # %bb.0:
356+
; CHECK-NEXT: movq (%rdi), %xmm0
357+
; CHECK-NEXT: retq
395358
%ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8
396359
ret <2 x ptr addrspace(270)> %ret
397360
}
398361

399362
define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
400-
; CHECK-O3-LABEL: atomic_vec2_i32_align:
401-
; CHECK-O3: # %bb.0:
402-
; CHECK-O3-NEXT: movq (%rdi), %rax
403-
; CHECK-O3-NEXT: movq %rax, %xmm0
404-
; CHECK-O3-NEXT: retq
405-
;
406-
; CHECK-SSE-O3-LABEL: atomic_vec2_i32_align:
407-
; CHECK-SSE-O3: # %bb.0:
408-
; CHECK-SSE-O3-NEXT: movq (%rdi), %rax
409-
; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
410-
; CHECK-SSE-O3-NEXT: retq
411-
;
412-
; CHECK-AVX-O3-LABEL: atomic_vec2_i32_align:
413-
; CHECK-AVX-O3: # %bb.0:
414-
; CHECK-AVX-O3-NEXT: movq (%rdi), %rax
415-
; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0
416-
; CHECK-AVX-O3-NEXT: retq
417-
;
418-
; CHECK-O0-LABEL: atomic_vec2_i32_align:
419-
; CHECK-O0: # %bb.0:
420-
; CHECK-O0-NEXT: movq (%rdi), %rax
421-
; CHECK-O0-NEXT: movq %rax, %xmm0
422-
; CHECK-O0-NEXT: retq
423-
;
424-
; CHECK-SSE-O0-LABEL: atomic_vec2_i32_align:
425-
; CHECK-SSE-O0: # %bb.0:
426-
; CHECK-SSE-O0-NEXT: movq (%rdi), %rax
427-
; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
428-
; CHECK-SSE-O0-NEXT: retq
429-
;
430-
; CHECK-AVX-O0-LABEL: atomic_vec2_i32_align:
431-
; CHECK-AVX-O0: # %bb.0:
432-
; CHECK-AVX-O0-NEXT: movq (%rdi), %rax
433-
; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0
434-
; CHECK-AVX-O0-NEXT: retq
363+
; CHECK-LABEL: atomic_vec2_i32_align:
364+
; CHECK: # %bb.0:
365+
; CHECK-NEXT: movq (%rdi), %xmm0
366+
; CHECK-NEXT: retq
435367
%ret = load atomic <2 x i32>, ptr %x acquire, align 8
436368
ret <2 x i32> %ret
437369
}
438370

439371
define <2 x float> @atomic_vec2_float_align(ptr %x) {
440-
; CHECK-O3-LABEL: atomic_vec2_float_align:
441-
; CHECK-O3: # %bb.0:
442-
; CHECK-O3-NEXT: movq (%rdi), %rax
443-
; CHECK-O3-NEXT: movq %rax, %xmm0
444-
; CHECK-O3-NEXT: retq
445-
;
446-
; CHECK-SSE-O3-LABEL: atomic_vec2_float_align:
447-
; CHECK-SSE-O3: # %bb.0:
448-
; CHECK-SSE-O3-NEXT: movq (%rdi), %rax
449-
; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
450-
; CHECK-SSE-O3-NEXT: retq
451-
;
452-
; CHECK-AVX-O3-LABEL: atomic_vec2_float_align:
453-
; CHECK-AVX-O3: # %bb.0:
454-
; CHECK-AVX-O3-NEXT: movq (%rdi), %rax
455-
; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0
456-
; CHECK-AVX-O3-NEXT: retq
457-
;
458-
; CHECK-O0-LABEL: atomic_vec2_float_align:
459-
; CHECK-O0: # %bb.0:
460-
; CHECK-O0-NEXT: movq (%rdi), %rax
461-
; CHECK-O0-NEXT: movq %rax, %xmm0
462-
; CHECK-O0-NEXT: retq
463-
;
464-
; CHECK-SSE-O0-LABEL: atomic_vec2_float_align:
465-
; CHECK-SSE-O0: # %bb.0:
466-
; CHECK-SSE-O0-NEXT: movq (%rdi), %rax
467-
; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
468-
; CHECK-SSE-O0-NEXT: retq
469-
;
470-
; CHECK-AVX-O0-LABEL: atomic_vec2_float_align:
471-
; CHECK-AVX-O0: # %bb.0:
472-
; CHECK-AVX-O0-NEXT: movq (%rdi), %rax
473-
; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0
474-
; CHECK-AVX-O0-NEXT: retq
372+
; CHECK-LABEL: atomic_vec2_float_align:
373+
; CHECK: # %bb.0:
374+
; CHECK-NEXT: movq (%rdi), %xmm0
375+
; CHECK-NEXT: retq
475376
%ret = load atomic <2 x float>, ptr %x acquire, align 8
476377
ret <2 x float> %ret
477378
}
@@ -900,79 +801,42 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
900801
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
901802
; CHECK-O3-LABEL: atomic_vec4_i8:
902803
; CHECK-O3: # %bb.0:
903-
; CHECK-O3-NEXT: movl (%rdi), %eax
904-
; CHECK-O3-NEXT: movd %eax, %xmm0
804+
; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
905805
; CHECK-O3-NEXT: retq
906806
;
907807
; CHECK-SSE-O3-LABEL: atomic_vec4_i8:
908808
; CHECK-SSE-O3: # %bb.0:
909-
; CHECK-SSE-O3-NEXT: movl (%rdi), %eax
910-
; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
809+
; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
911810
; CHECK-SSE-O3-NEXT: retq
912811
;
913812
; CHECK-AVX-O3-LABEL: atomic_vec4_i8:
914813
; CHECK-AVX-O3: # %bb.0:
915-
; CHECK-AVX-O3-NEXT: movl (%rdi), %eax
916-
; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
814+
; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
917815
; CHECK-AVX-O3-NEXT: retq
918816
;
919817
; CHECK-O0-LABEL: atomic_vec4_i8:
920818
; CHECK-O0: # %bb.0:
921-
; CHECK-O0-NEXT: movl (%rdi), %eax
922-
; CHECK-O0-NEXT: movd %eax, %xmm0
819+
; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
923820
; CHECK-O0-NEXT: retq
924821
;
925822
; CHECK-SSE-O0-LABEL: atomic_vec4_i8:
926823
; CHECK-SSE-O0: # %bb.0:
927-
; CHECK-SSE-O0-NEXT: movl (%rdi), %eax
928-
; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
824+
; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
929825
; CHECK-SSE-O0-NEXT: retq
930826
;
931827
; CHECK-AVX-O0-LABEL: atomic_vec4_i8:
932828
; CHECK-AVX-O0: # %bb.0:
933-
; CHECK-AVX-O0-NEXT: movl (%rdi), %eax
934-
; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
829+
; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
935830
; CHECK-AVX-O0-NEXT: retq
936831
%ret = load atomic <4 x i8>, ptr %x acquire, align 4
937832
ret <4 x i8> %ret
938833
}
939834

940835
define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
941-
; CHECK-O3-LABEL: atomic_vec4_i16:
942-
; CHECK-O3: # %bb.0:
943-
; CHECK-O3-NEXT: movq (%rdi), %rax
944-
; CHECK-O3-NEXT: movq %rax, %xmm0
945-
; CHECK-O3-NEXT: retq
946-
;
947-
; CHECK-SSE-O3-LABEL: atomic_vec4_i16:
948-
; CHECK-SSE-O3: # %bb.0:
949-
; CHECK-SSE-O3-NEXT: movq (%rdi), %rax
950-
; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
951-
; CHECK-SSE-O3-NEXT: retq
952-
;
953-
; CHECK-AVX-O3-LABEL: atomic_vec4_i16:
954-
; CHECK-AVX-O3: # %bb.0:
955-
; CHECK-AVX-O3-NEXT: movq (%rdi), %rax
956-
; CHECK-AVX-O3-NEXT: vmovq %rax, %xmm0
957-
; CHECK-AVX-O3-NEXT: retq
958-
;
959-
; CHECK-O0-LABEL: atomic_vec4_i16:
960-
; CHECK-O0: # %bb.0:
961-
; CHECK-O0-NEXT: movq (%rdi), %rax
962-
; CHECK-O0-NEXT: movq %rax, %xmm0
963-
; CHECK-O0-NEXT: retq
964-
;
965-
; CHECK-SSE-O0-LABEL: atomic_vec4_i16:
966-
; CHECK-SSE-O0: # %bb.0:
967-
; CHECK-SSE-O0-NEXT: movq (%rdi), %rax
968-
; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
969-
; CHECK-SSE-O0-NEXT: retq
970-
;
971-
; CHECK-AVX-O0-LABEL: atomic_vec4_i16:
972-
; CHECK-AVX-O0: # %bb.0:
973-
; CHECK-AVX-O0-NEXT: movq (%rdi), %rax
974-
; CHECK-AVX-O0-NEXT: vmovq %rax, %xmm0
975-
; CHECK-AVX-O0-NEXT: retq
836+
; CHECK-LABEL: atomic_vec4_i16:
837+
; CHECK: # %bb.0:
838+
; CHECK-NEXT: movq (%rdi), %xmm0
839+
; CHECK-NEXT: retq
976840
%ret = load atomic <4 x i16>, ptr %x acquire, align 8
977841
ret <4 x i16> %ret
978842
}

0 commit comments

Comments
 (0)