Skip to content

Commit b5dc647

Browse files
authored
Merge pull request #19 from schweitzpgi/release_50
Release 50 - fix for PowerPC cmpxchg (D41856)
2 parents 3efbc3c + 7dd23b3 commit b5dc647

File tree

3 files changed

+169
-0
lines changed

3 files changed

+169
-0
lines changed

lib/Target/PowerPC/PPCISelLowering.cpp

+74
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
140140
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
141141
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
142142

143+
// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
144+
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
145+
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
146+
143147
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
144148
for (MVT VT : MVT::integer_valuetypes()) {
145149
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
@@ -8495,6 +8499,73 @@ SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const {
84958499
return Op;
84968500
}
84978501

8502+
// Test if an SDValue is zero-extended from \p From bits to \p To bits.
8503+
static bool isZeroExtended(SDValue Op, unsigned From, unsigned To) {
8504+
if (To < From)
8505+
return false;
8506+
if (To == From)
8507+
return true;
8508+
unsigned OpWidth = Op.getValueType().getSizeInBits();
8509+
if (OpWidth != To)
8510+
return false;
8511+
8512+
// Explicitly zero-extended values.
8513+
if (Op.getOpcode() == ISD::ZERO_EXTEND)
8514+
return true;
8515+
if (Op.getOpcode() == ISD::AssertZext &&
8516+
cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits() == From)
8517+
return true;
8518+
8519+
// Masked values.
8520+
if (Op.getOpcode() == ISD::AND)
8521+
if (ConstantSDNode *Mask = isConstOrConstSplat(Op.getOperand(1)))
8522+
return Mask->getZExtValue() == ((1U << From) - 1);
8523+
8524+
// ZExt load.
8525+
if (Op.getOpcode() == ISD::LOAD)
8526+
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op))
8527+
return LD->getExtensionType() == ISD::ZEXTLOAD;
8528+
return false;
8529+
}
8530+
8531+
// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
8532+
// compared to a value that is atomically loaded (atomic loads zero-extend).
8533+
SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
8534+
SelectionDAG &DAG) const {
8535+
unsigned Opc = Op.getOpcode();
8536+
assert((Opc == ISD::ATOMIC_CMP_SWAP ||
8537+
Opc == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS) &&
8538+
"Expecting an atomic compare-and-swap here.");
8539+
SDLoc dl(Op);
8540+
auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
8541+
EVT MemVT = AtomicNode->getMemoryVT();
8542+
bool ToExpand = Opc == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
8543+
if (MemVT.getSizeInBits() >= 32)
8544+
return ToExpand ? SDValue() : Op;
8545+
8546+
SDValue CmpOp = Op.getOperand(2);
8547+
// If this is already correctly zero-extended, leave it alone.
8548+
if (isZeroExtended(CmpOp, MemVT.getSizeInBits(), 32))
8549+
return ToExpand ? SDValue() : Op;
8550+
8551+
// Clear the high bits of the compare operand.
8552+
unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
8553+
SDValue NewCmpOp =
8554+
DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
8555+
DAG.getConstant(MaskVal, dl, MVT::i32));
8556+
8557+
// Replace the existing compare operand with the properly zero-extended one.
8558+
SmallVector<SDValue, 4> Ops;
8559+
for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
8560+
Ops.push_back(AtomicNode->getOperand(i));
8561+
Ops[2] = NewCmpOp;
8562+
DAG.UpdateNodeOperands(AtomicNode, Ops);
8563+
8564+
// ATOMIC_CMP_SWAP is Legal and ATOMIC_CMP_SWAP_WITH_SUCCESS needs to be
8565+
// Expanded.
8566+
return ToExpand ? SDValue() : Op;
8567+
}
8568+
84988569
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
84998570
SelectionDAG &DAG) const {
85008571
SDLoc dl(Op);
@@ -8966,6 +9037,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
89669037
case ISD::SREM:
89679038
case ISD::UREM:
89689039
return LowerREM(Op, DAG);
9040+
case ISD::ATOMIC_CMP_SWAP:
9041+
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
9042+
return LowerATOMIC_CMP_SWAP(Op, DAG);
89699043
}
89709044
}
89719045

lib/Target/PowerPC/PPCISelLowering.h

+1
Original file line numberDiff line numberDiff line change
@@ -944,6 +944,7 @@ namespace llvm {
944944
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
945945
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
946946
SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
947+
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
947948
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
948949
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
949950
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; Make sure that a negative value for the compare-and-swap is zero extended
3+
; from i8/i16 to i32 since it will be compared for equality.
4+
; RUN: llc -mtriple=powerpc64le-linux-gnu -verify-machineinstrs < %s | FileCheck %s
5+
; RUN: llc -mtriple=powerpc64le-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-P7
6+
7+
@str = private unnamed_addr constant [46 x i8] c"FAILED: __atomic_compare_exchange_n() failed.\00"
8+
@str.1 = private unnamed_addr constant [59 x i8] c"FAILED: __atomic_compare_exchange_n() set the wrong value.\00"
9+
@str.2 = private unnamed_addr constant [7 x i8] c"PASSED\00"
10+
11+
define signext i32 @main() {
12+
; CHECK-LABEL: main:
13+
; CHECK: li 3, -32477
14+
; CHECK: lis 12, 0
15+
; CHECK: li 6, 234
16+
; CHECK: sth 3, 46(1)
17+
; CHECK: ori 4, 12, 33059
18+
; CHECK: sync
19+
; CHECK: .LBB0_1: # %L.entry
20+
; CHECK: lharx 3, 0, 5
21+
; CHECK: cmpw 4, 3
22+
; CHECK: bne 0, .LBB0_3
23+
; CHECK: sthcx. 6, 0, 5
24+
; CHECK: bne 0, .LBB0_1
25+
; CHECK: b .LBB0_4
26+
; CHECK: .LBB0_3: # %L.entry
27+
; CHECK: sthcx. 3, 0, 5
28+
; CHECK: .LBB0_4: # %L.entry
29+
; CHECK: cmplwi 3, 33059
30+
; CHECK: lwsync
31+
; CHECK: lhz 3, 46(1)
32+
; CHECK: cmplwi 3, 234
33+
;
34+
; CHECK-P7-LABEL: main:
35+
; CHECK-P7: lis 4, 0
36+
; CHECK-P7: li 7, 0
37+
; CHECK-P7: li 3, -32477
38+
; CHECK-P7: sth 3, 46(1)
39+
; CHECK-P7: li 5, 234
40+
; CHECK-P7: ori 4, 4, 33059
41+
; CHECK-P7: rlwinm 3, 6, 3, 27, 27
42+
; CHECK-P7: ori 7, 7, 65535
43+
; CHECK-P7: sync
44+
; CHECK-P7: slw 8, 5, 3
45+
; CHECK-P7: slw 5, 7, 3
46+
; CHECK-P7: slw 9, 4, 3
47+
; CHECK-P7: and 7, 8, 5
48+
; CHECK-P7: rldicr 4, 6, 0, 61
49+
; CHECK-P7: and 8, 9, 5
50+
; CHECK-P7: .LBB0_1: # %L.entry
51+
; CHECK-P7: lwarx 9, 0, 4
52+
; CHECK-P7: and 6, 9, 5
53+
; CHECK-P7: cmpw 0, 6, 8
54+
; CHECK-P7: bne 0, .LBB0_3
55+
; CHECK-P7: andc 9, 9, 5
56+
; CHECK-P7: or 9, 9, 7
57+
; CHECK-P7: stwcx. 9, 0, 4
58+
; CHECK-P7: bne 0, .LBB0_1
59+
; CHECK-P7: b .LBB0_4
60+
; CHECK-P7: .LBB0_3: # %L.entry
61+
; CHECK-P7: stwcx. 9, 0, 4
62+
; CHECK-P7: .LBB0_4: # %L.entry
63+
; CHECK-P7: srw 3, 6, 3
64+
; CHECK-P7: lwsync
65+
; CHECK-P7: cmplwi 3, 33059
66+
; CHECK-P7: lhz 3, 46(1)
67+
; CHECK-P7: cmplwi 3, 234
68+
L.entry:
69+
%value.addr = alloca i16, align 2
70+
store i16 -32477, i16* %value.addr, align 2
71+
%0 = cmpxchg i16* %value.addr, i16 -32477, i16 234 seq_cst seq_cst
72+
%1 = extractvalue { i16, i1 } %0, 1
73+
br i1 %1, label %L.B0000, label %L.B0003
74+
75+
L.B0003: ; preds = %L.entry
76+
%puts = call i32 @puts(i8* getelementptr inbounds ([46 x i8], [46 x i8]* @str, i64 0, i64 0))
77+
ret i32 1
78+
79+
L.B0000: ; preds = %L.entry
80+
%2 = load i16, i16* %value.addr, align 2
81+
%3 = icmp eq i16 %2, 234
82+
br i1 %3, label %L.B0001, label %L.B0005
83+
84+
L.B0005: ; preds = %L.B0000
85+
%puts1 = call i32 @puts(i8* getelementptr inbounds ([59 x i8], [59 x i8]* @str.1, i64 0, i64 0))
86+
ret i32 1
87+
88+
L.B0001: ; preds = %L.B0000
89+
%puts2 = call i32 @puts(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @str.2, i64 0, i64 0))
90+
ret i32 0
91+
}
92+
93+
; Function Attrs: nounwind
94+
declare i32 @puts(i8* nocapture readonly) #0

0 commit comments

Comments
 (0)