diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 79bdd25c18f1f..32ba5ebdec6d3 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -10956,8 +10956,8 @@ If the ``load`` is marked as ``atomic``, it takes an extra :ref:`ordering ` and optional ``syncscope("")`` argument. The ``release`` and ``acq_rel`` orderings are not valid on ``load`` instructions. Atomic loads produce :ref:`defined ` results when they may see -multiple atomic stores. The type of the pointee must be an integer, pointer, or -floating-point type whose bit width is a power of two greater than or equal to +multiple atomic stores. The type of the pointee must be an integer, pointer, +floating-point, or vector type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. ``align`` must be explicitly specified on atomic loads. Note: if the alignment is not greater or equal to the size of the `` type, the atomic operation is likely to @@ -11097,8 +11097,8 @@ If the ``store`` is marked as ``atomic``, it takes an extra :ref:`ordering ` and optional ``syncscope("")`` argument. The ``acquire`` and ``acq_rel`` orderings aren't valid on ``store`` instructions. Atomic loads produce :ref:`defined ` results when they may see -multiple atomic stores. The type of the pointee must be an integer, pointer, or -floating-point type whose bit width is a power of two greater than or equal to +multiple atomic stores. The type of the pointee must be an integer, pointer, +floating-point, or vector type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. ``align`` must be explicitly specified on atomic stores. Note: if the alignment is not greater or equal to the size of the `` type, the atomic operation is likely to diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 1703149aca746..0086405825cd5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -860,6 +860,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_ExpOp(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 465128099f444..bdd71b251f394 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -60,6 +60,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: + R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); + break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -451,6 +454,19 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + + SDValue Result = DAG.getAtomic( + ISD::ATOMIC_LOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 55de486e90e19..6f847e3b3fc70 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4255,9 +4255,10 @@ void Verifier::visitLoadInst(LoadInst &LI) { Check(LI.getOrdering() != AtomicOrdering::Release && LI.getOrdering() != AtomicOrdering::AcquireRelease, "Load cannot have Release ordering", &LI); - Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(), - "atomic load operand must have integer, pointer, or floating point " - "type!", + Check(ElTy->getScalarType()->isIntOrPtrTy() || + ElTy->getScalarType()->isFloatingPointTy(), + "atomic load operand must have integer, pointer, floating point, " + "or vector type!", ElTy, &LI); checkAtomicMemAccessSize(ElTy, &LI); } else { @@ -4281,9 +4282,10 @@ void Verifier::visitStoreInst(StoreInst &SI) { Check(SI.getOrdering() != AtomicOrdering::Acquire && SI.getOrdering() != AtomicOrdering::AcquireRelease, "Store cannot have Acquire ordering", &SI); - Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(), - "atomic store operand must have integer, pointer, or floating point " - "type!", + Check(ElTy->getScalarType()->isIntOrPtrTy() || + ElTy->getScalarType()->isFloatingPointTy(), + "atomic store operand must have integer, pointer, floating point, " + "or vector type!", ElTy, &SI); checkAtomicMemAccessSize(ElTy, &SI); } else { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9048d1d83f187..7f4021f4beedc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2589,6 +2589,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/Assembler/atomic.ll b/llvm/test/Assembler/atomic.ll index a44dcccc16bef..f1027d5d3fbde 100644 --- a/llvm/test/Assembler/atomic.ll +++ b/llvm/test/Assembler/atomic.ll @@ -52,6 +52,15 @@ define void @f(ptr %x) { ; CHECK: atomicrmw volatile usub_sat ptr %x, i32 10 syncscope("agent") monotonic atomicrmw volatile usub_sat ptr %x, i32 10 syncscope("agent") monotonic + ; CHECK : load atomic <1 x i32>, ptr %x unordered, align 4 + load atomic <1 x i32>, ptr %x unordered, align 4 + ; CHECK : store atomic <1 x i32> splat (i32 3), ptr %x release, align 4 + store atomic <1 x i32> , ptr %x release, align 4 + ; CHECK : load atomic <2 x i32>, ptr %x unordered, align 4 + load atomic <2 x i32>, ptr %x unordered, align 4 + ; CHECK : store atomic <2 x i32> , ptr %x release, align 4 + store atomic <2 x i32> , ptr %x release, align 4 + ; CHECK: fence syncscope("singlethread") release fence syncscope("singlethread") release ; CHECK: fence seq_cst diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb..2bde0d2ffd06a 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,12 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefix=CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: ; CHECK: ## %bb.0: ; CHECK-NEXT: xchgl %esi, (%rdi) ; CHECK-NEXT: retq +; +; CHECK0-LABEL: test1: +; CHECK0: ## %bb.0: +; CHECK0-NEXT: xchgl %esi, (%rdi) +; CHECK0-NEXT: retq store atomic i32 %val1, ptr %ptr seq_cst, align 4 ret void } @@ -16,6 +21,11 @@ define void @test2(ptr %ptr, i32 %val1) { ; CHECK: ## %bb.0: ; CHECK-NEXT: movl %esi, (%rdi) ; CHECK-NEXT: retq +; +; CHECK0-LABEL: test2: +; CHECK0: ## %bb.0: +; CHECK0-NEXT: movl %esi, (%rdi) +; CHECK0-NEXT: retq store atomic i32 %val1, ptr %ptr release, align 4 ret void } @@ -25,6 +35,78 @@ define i32 @test3(ptr %ptr) { ; CHECK: ## %bb.0: ; CHECK-NEXT: movl (%rdi), %eax ; CHECK-NEXT: retq +; +; CHECK0-LABEL: test3: +; CHECK0: ## %bb.0: +; CHECK0-NEXT: movl (%rdi), %eax +; CHECK0-NEXT: retq %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: retq +; +; CHECK0-LABEL: atomic_vec1_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT: movl (%rdi), %eax +; CHECK0-NEXT: retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK-LABEL: atomic_vec1_half: +; CHECK: ## %bb.0: +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-NEXT: retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT: movw (%rdi), %cx +; CHECK0-NEXT: ## implicit-def: $eax +; CHECK0-NEXT: movw %cx, %ax +; CHECK0-NEXT: ## implicit-def: $xmm0 +; CHECK0-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT: retq + %ret = load atomic <1 x half>, ptr %x acquire, align 4 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: retq +; +; CHECK0-LABEL: atomic_vec1_float: +; CHECK0: ## %bb.0: +; CHECK0-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT: retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { +; CHECK-LABEL: atomic_vec1_bfloat: +; CHECK: ## %bb.0: +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-NEXT: retq +; +; CHECK0-LABEL: atomic_vec1_bfloat: +; CHECK0: ## %bb.0: +; CHECK0-NEXT: movw (%rdi), %cx +; CHECK0-NEXT: ## implicit-def: $eax +; CHECK0-NEXT: movw %cx, %ax +; CHECK0-NEXT: ## implicit-def: $xmm0 +; CHECK0-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT: retq + %ret = load atomic <1 x bfloat>, ptr %x acquire, align 4 + ret <1 x bfloat> %ret +} + diff --git a/llvm/test/Verifier/atomics.ll b/llvm/test/Verifier/atomics.ll index f835b98b24345..17bf5a0528d73 100644 --- a/llvm/test/Verifier/atomics.ll +++ b/llvm/test/Verifier/atomics.ll @@ -1,14 +1,15 @@ ; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s +; CHECK: atomic store operand must have integer, pointer, floating point, or vector type! +; CHECK: atomic load operand must have integer, pointer, floating point, or vector type! -; CHECK: atomic store operand must have integer, pointer, or floating point type! -; CHECK: atomic load operand must have integer, pointer, or floating point type! +%ty = type { i32 }; -define void @foo(ptr %P, <1 x i64> %v) { - store atomic <1 x i64> %v, ptr %P unordered, align 8 +define void @foo(ptr %P, %ty %v) { + store atomic %ty %v, ptr %P unordered, align 8 ret void } -define <1 x i64> @bar(ptr %P) { - %v = load atomic <1 x i64>, ptr %P unordered, align 8 - ret <1 x i64> %v +define %ty @bar(ptr %P) { + %v = load atomic %ty, ptr %P unordered, align 8 + ret %ty %v }