Skip to content

Commit 4158932

Browse files
authored
Merge pull request #34 from schweitzpgi/release_50
[not flang] merge LLVM Release 50 changes
2 parents c492f2d + a984ae3 commit 4158932

7 files changed

+164
-134
lines changed

lib/Target/X86/X86AsmPrinter.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,8 @@ static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI,
344344
static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
345345
char Mode, raw_ostream &O) {
346346
unsigned Reg = MO.getReg();
347+
bool EmitPercent = true;
348+
347349
switch (Mode) {
348350
default: return true; // Unknown mode.
349351
case 'b': // Print QImode register
@@ -358,14 +360,20 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
358360
case 'k': // Print SImode register
359361
Reg = getX86SubSuperRegister(Reg, 32);
360362
break;
363+
case 'V':
364+
EmitPercent = false;
365+
LLVM_FALLTHROUGH;
361366
case 'q':
362367
// Print 64-bit register names if 64-bit integer registers are available.
363368
// Otherwise, print 32-bit register names.
364369
Reg = getX86SubSuperRegister(Reg, P.getSubtarget().is64Bit() ? 64 : 32);
365370
break;
366371
}
367372

368-
O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
373+
if (EmitPercent)
374+
O << '%';
375+
376+
O << X86ATTInstPrinter::getRegisterName(Reg);
369377
return false;
370378
}
371379

@@ -438,6 +446,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
438446
case 'w': // Print HImode register
439447
case 'k': // Print SImode register
440448
case 'q': // Print DImode register
449+
case 'V': // Print native register without '%'
441450
if (MO.isReg())
442451
return printAsmMRegister(*this, MO, ExtraCode[0], O);
443452
printOperand(*this, MI, OpNo, O);

lib/Target/X86/X86ISelLowering.cpp

+58-43
Original file line numberDiff line numberDiff line change
@@ -26250,28 +26250,57 @@ static unsigned getOpcodeForRetpoline(unsigned RPOpc) {
2625026250

2625126251
static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
2625226252
unsigned Reg) {
26253+
if (Subtarget.useRetpolineExternalThunk()) {
26254+
// When using an external thunk for retpolines, we pick names that match the
26255+
// names GCC happens to use as well. This helps simplify the implementation
26256+
// of the thunks for kernels where they have no easy ability to create
26257+
// aliases and are doing non-trivial configuration of the thunk's body. For
26258+
// example, the Linux kernel will do boot-time hot patching of the thunk
26259+
// bodies and cannot easily export aliases of these to loaded modules.
26260+
//
26261+
// Note that at any point in the future, we may need to change the semantics
26262+
// of how we implement retpolines and at that time will likely change the
26263+
// name of the called thunk. Essentially, there is no hard guarantee that
26264+
// LLVM will generate calls to specific thunks, we merely make a best-effort
26265+
// attempt to help out kernels and other systems where duplicating the
26266+
// thunks is costly.
26267+
switch (Reg) {
26268+
case X86::EAX:
26269+
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26270+
return "__x86_indirect_thunk_eax";
26271+
case X86::ECX:
26272+
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26273+
return "__x86_indirect_thunk_ecx";
26274+
case X86::EDX:
26275+
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26276+
return "__x86_indirect_thunk_edx";
26277+
case X86::EDI:
26278+
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26279+
return "__x86_indirect_thunk_edi";
26280+
case X86::R11:
26281+
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
26282+
return "__x86_indirect_thunk_r11";
26283+
}
26284+
llvm_unreachable("unexpected reg for retpoline");
26285+
}
26286+
26287+
// When targeting an internal COMDAT thunk use an LLVM-specific name.
2625326288
switch (Reg) {
26254-
case 0:
26255-
assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
26256-
return Subtarget.useRetpolineExternalThunk()
26257-
? "__llvm_external_retpoline_push"
26258-
: "__llvm_retpoline_push";
2625926289
case X86::EAX:
26260-
return Subtarget.useRetpolineExternalThunk()
26261-
? "__llvm_external_retpoline_eax"
26262-
: "__llvm_retpoline_eax";
26290+
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26291+
return "__llvm_retpoline_eax";
2626326292
case X86::ECX:
26264-
return Subtarget.useRetpolineExternalThunk()
26265-
? "__llvm_external_retpoline_ecx"
26266-
: "__llvm_retpoline_ecx";
26293+
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26294+
return "__llvm_retpoline_ecx";
2626726295
case X86::EDX:
26268-
return Subtarget.useRetpolineExternalThunk()
26269-
? "__llvm_external_retpoline_edx"
26270-
: "__llvm_retpoline_edx";
26296+
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26297+
return "__llvm_retpoline_edx";
26298+
case X86::EDI:
26299+
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26300+
return "__llvm_retpoline_edi";
2627126301
case X86::R11:
26272-
return Subtarget.useRetpolineExternalThunk()
26273-
? "__llvm_external_retpoline_r11"
26274-
: "__llvm_retpoline_r11";
26302+
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
26303+
return "__llvm_retpoline_r11";
2627526304
}
2627626305
llvm_unreachable("unexpected reg for retpoline");
2627726306
}
@@ -26290,15 +26319,13 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
2629026319
// just use R11, but we scan for uses anyway to ensure we don't generate
2629126320
// incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
2629226321
// already a register use operand to the call to hold the callee. If none
26293-
// are available, push the callee instead. This is less efficient, but is
26294-
// necessary for functions using 3 regparms. Such function calls are
26295-
// (currently) not eligible for tail call optimization, because there is no
26296-
// scratch register available to hold the address of the callee.
26322+
// are available, use EDI instead. EDI is chosen because EBX is the PIC base
26323+
// register and ESI is the base pointer to realigned stack frames with VLAs.
2629726324
SmallVector<unsigned, 3> AvailableRegs;
2629826325
if (Subtarget.is64Bit())
2629926326
AvailableRegs.push_back(X86::R11);
2630026327
else
26301-
AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX});
26328+
AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});
2630226329

2630326330
// Zero out any registers that are already used.
2630426331
for (const auto &MO : MI.operands()) {
@@ -26316,30 +26343,18 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
2631626343
break;
2631726344
}
2631826345
}
26346+
if (!AvailableReg)
26347+
report_fatal_error("calling convention incompatible with retpoline, no "
26348+
"available registers");
2631926349

2632026350
const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
2632126351

26322-
if (AvailableReg == 0) {
26323-
// No register available. Use PUSH. This must not be a tailcall, and this
26324-
// must not be x64.
26325-
if (Subtarget.is64Bit())
26326-
report_fatal_error(
26327-
"Cannot make an indirect call on x86-64 using both retpoline and a "
26328-
"calling convention that preservers r11");
26329-
if (Opc != X86::CALLpcrel32)
26330-
report_fatal_error("Cannot make an indirect tail call on x86 using "
26331-
"retpoline without a preserved register");
26332-
BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg);
26333-
MI.getOperand(0).ChangeToES(Symbol);
26334-
MI.setDesc(TII->get(Opc));
26335-
} else {
26336-
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
26337-
.addReg(CalleeVReg);
26338-
MI.getOperand(0).ChangeToES(Symbol);
26339-
MI.setDesc(TII->get(Opc));
26340-
MachineInstrBuilder(*BB->getParent(), &MI)
26341-
.addReg(AvailableReg, RegState::Implicit | RegState::Kill);
26342-
}
26352+
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
26353+
.addReg(CalleeVReg);
26354+
MI.getOperand(0).ChangeToES(Symbol);
26355+
MI.setDesc(TII->get(Opc));
26356+
MachineInstrBuilder(*BB->getParent(), &MI)
26357+
.addReg(AvailableReg, RegState::Implicit | RegState::Kill);
2634326358
return BB;
2634426359
}
2634526360

lib/Target/X86/X86RetpolineThunks.cpp

+11-57
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ static const char R11ThunkName[] = "__llvm_retpoline_r11";
4343
static const char EAXThunkName[] = "__llvm_retpoline_eax";
4444
static const char ECXThunkName[] = "__llvm_retpoline_ecx";
4545
static const char EDXThunkName[] = "__llvm_retpoline_edx";
46-
static const char PushThunkName[] = "__llvm_retpoline_push";
46+
static const char EDIThunkName[] = "__llvm_retpoline_edi";
4747

4848
namespace {
4949
class X86RetpolineThunks : public MachineFunctionPass {
@@ -74,7 +74,6 @@ class X86RetpolineThunks : public MachineFunctionPass {
7474

7575
void createThunkFunction(Module &M, StringRef Name);
7676
void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
77-
void insert32BitPushReturnAddrClobber(MachineBasicBlock &MBB);
7877
void populateThunk(MachineFunction &MF, Optional<unsigned> Reg = None);
7978
};
8079

@@ -127,7 +126,7 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
127126
createThunkFunction(M, R11ThunkName);
128127
else
129128
for (StringRef Name :
130-
{EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName})
129+
{EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
131130
createThunkFunction(M, Name);
132131
InsertedThunks = true;
133132
return true;
@@ -151,9 +150,8 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
151150
populateThunk(MF, X86::R11);
152151
} else {
153152
// For 32-bit targets we need to emit a collection of thunks for various
154-
// possible scratch registers as well as a fallback that is used when
155-
// there are no scratch registers and assumes the retpoline target has
156-
// been pushed.
153+
// possible scratch registers as well as a fallback that uses EDI, which is
154+
// normally callee saved.
157155
// __llvm_retpoline_eax:
158156
// calll .Leax_call_target
159157
// .Leax_capture_spec:
@@ -174,32 +172,18 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
174172
// movl %edx, (%esp)
175173
// retl
176174
//
177-
// This last one is a bit more special and so needs a little extra
178-
// handling.
179-
// __llvm_retpoline_push:
180-
// calll .Lpush_call_target
181-
// .Lpush_capture_spec:
182-
// pause
183-
// lfence
184-
// jmp .Lpush_capture_spec
185-
// .align 16
186-
// .Lpush_call_target:
187-
// # Clear pause_loop return address.
188-
// addl $4, %esp
189-
// # Top of stack words are: Callee, RA. Exchange Callee and RA.
190-
// pushl 4(%esp) # Push callee
191-
// pushl 4(%esp) # Push RA
192-
// popl 8(%esp) # Pop RA to final RA
193-
// popl (%esp) # Pop callee to next top of stack
194-
// retl # Ret to callee
175+
// __llvm_retpoline_edi:
176+
// ... # Same setup
177+
// movl %edi, (%esp)
178+
// retl
195179
if (MF.getName() == EAXThunkName)
196180
populateThunk(MF, X86::EAX);
197181
else if (MF.getName() == ECXThunkName)
198182
populateThunk(MF, X86::ECX);
199183
else if (MF.getName() == EDXThunkName)
200184
populateThunk(MF, X86::EDX);
201-
else if (MF.getName() == PushThunkName)
202-
populateThunk(MF);
185+
else if (MF.getName() == EDIThunkName)
186+
populateThunk(MF, X86::EDI);
203187
else
204188
llvm_unreachable("Invalid thunk name on x86-32!");
205189
}
@@ -240,31 +224,6 @@ void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
240224
.addReg(Reg);
241225
}
242226

243-
void X86RetpolineThunks::insert32BitPushReturnAddrClobber(
244-
MachineBasicBlock &MBB) {
245-
// The instruction sequence we use to replace the return address without
246-
// a scratch register is somewhat complicated:
247-
// # Clear capture_spec from return address.
248-
// addl $4, %esp
249-
// # Top of stack words are: Callee, RA. Exchange Callee and RA.
250-
// pushl 4(%esp) # Push callee
251-
// pushl 4(%esp) # Push RA
252-
// popl 8(%esp) # Pop RA to final RA
253-
// popl (%esp) # Pop callee to next top of stack
254-
// retl # Ret to callee
255-
BuildMI(&MBB, DebugLoc(), TII->get(X86::ADD32ri), X86::ESP)
256-
.addReg(X86::ESP)
257-
.addImm(4);
258-
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP,
259-
false, 4);
260-
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP,
261-
false, 4);
262-
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP,
263-
false, 8);
264-
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP,
265-
false, 0);
266-
}
267-
268227
void X86RetpolineThunks::populateThunk(MachineFunction &MF,
269228
Optional<unsigned> Reg) {
270229
// Set MF properties. We never use vregs...
@@ -301,11 +260,6 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF,
301260
CaptureSpec->addSuccessor(CaptureSpec);
302261

303262
CallTarget->setAlignment(4);
304-
if (Reg) {
305-
insertRegReturnAddrClobber(*CallTarget, *Reg);
306-
} else {
307-
assert(!Is64Bit && "We only support non-reg thunks on 32-bit x86!");
308-
insert32BitPushReturnAddrClobber(*CallTarget);
309-
}
263+
insertRegReturnAddrClobber(*CallTarget, *Reg);
310264
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
311265
}
+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
; RUN: llc < %s -mtriple=i686-- -no-integrated-as | FileCheck -check-prefix=X86 %s
2+
; RUN: llc < %s -mtriple=x86_64-- -no-integrated-as | FileCheck -check-prefix=X64 %s
3+
4+
; If the target does not have 64-bit integer registers, emit 32-bit register
5+
; names.
6+
7+
; X86: call __x86_indirect_thunk_e{{[abcd]}}x
8+
; X64: call __x86_indirect_thunk_r
9+
10+
define void @q_modifier(i32* %p) {
11+
entry:
12+
tail call void asm sideeffect "call __x86_indirect_thunk_${0:V}", "r,~{dirflag},~{fpsr},~{flags}"(i32* %p)
13+
ret void
14+
}

0 commit comments

Comments
 (0)