@@ -26250,28 +26250,57 @@ static unsigned getOpcodeForRetpoline(unsigned RPOpc) {
26250
26250
26251
26251
static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
26252
26252
unsigned Reg) {
26253
+ if (Subtarget.useRetpolineExternalThunk()) {
26254
+ // When using an external thunk for retpolines, we pick names that match the
26255
+ // names GCC happens to use as well. This helps simplify the implementation
26256
+ // of the thunks for kernels where they have no easy ability to create
26257
+ // aliases and are doing non-trivial configuration of the thunk's body. For
26258
+ // example, the Linux kernel will do boot-time hot patching of the thunk
26259
+ // bodies and cannot easily export aliases of these to loaded modules.
26260
+ //
26261
+ // Note that at any point in the future, we may need to change the semantics
26262
+ // of how we implement retpolines and at that time will likely change the
26263
+ // name of the called thunk. Essentially, there is no hard guarantee that
26264
+ // LLVM will generate calls to specific thunks, we merely make a best-effort
26265
+ // attempt to help out kernels and other systems where duplicating the
26266
+ // thunks is costly.
26267
+ switch (Reg) {
26268
+ case X86::EAX:
26269
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26270
+ return "__x86_indirect_thunk_eax";
26271
+ case X86::ECX:
26272
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26273
+ return "__x86_indirect_thunk_ecx";
26274
+ case X86::EDX:
26275
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26276
+ return "__x86_indirect_thunk_edx";
26277
+ case X86::EDI:
26278
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26279
+ return "__x86_indirect_thunk_edi";
26280
+ case X86::R11:
26281
+ assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
26282
+ return "__x86_indirect_thunk_r11";
26283
+ }
26284
+ llvm_unreachable("unexpected reg for retpoline");
26285
+ }
26286
+
26287
+ // When targeting an internal COMDAT thunk use an LLVM-specific name.
26253
26288
switch (Reg) {
26254
- case 0:
26255
- assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
26256
- return Subtarget.useRetpolineExternalThunk()
26257
- ? "__llvm_external_retpoline_push"
26258
- : "__llvm_retpoline_push";
26259
26289
case X86::EAX:
26260
- return Subtarget.useRetpolineExternalThunk()
26261
- ? "__llvm_external_retpoline_eax"
26262
- : "__llvm_retpoline_eax";
26290
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26291
+ return "__llvm_retpoline_eax";
26263
26292
case X86::ECX:
26264
- return Subtarget.useRetpolineExternalThunk()
26265
- ? "__llvm_external_retpoline_ecx"
26266
- : "__llvm_retpoline_ecx";
26293
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26294
+ return "__llvm_retpoline_ecx";
26267
26295
case X86::EDX:
26268
- return Subtarget.useRetpolineExternalThunk()
26269
- ? "__llvm_external_retpoline_edx"
26270
- : "__llvm_retpoline_edx";
26296
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26297
+ return "__llvm_retpoline_edx";
26298
+ case X86::EDI:
26299
+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26300
+ return "__llvm_retpoline_edi";
26271
26301
case X86::R11:
26272
- return Subtarget.useRetpolineExternalThunk()
26273
- ? "__llvm_external_retpoline_r11"
26274
- : "__llvm_retpoline_r11";
26302
+ assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
26303
+ return "__llvm_retpoline_r11";
26275
26304
}
26276
26305
llvm_unreachable("unexpected reg for retpoline");
26277
26306
}
@@ -26290,15 +26319,13 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
26290
26319
// just use R11, but we scan for uses anyway to ensure we don't generate
26291
26320
// incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
26292
26321
// already a register use operand to the call to hold the callee. If none
26293
- // are available, push the callee instead. This is less efficient, but is
26294
- // necessary for functions using 3 regparms. Such function calls are
26295
- // (currently) not eligible for tail call optimization, because there is no
26296
- // scratch register available to hold the address of the callee.
26322
+ // are available, use EDI instead. EDI is chosen because EBX is the PIC base
26323
+ // register and ESI is the base pointer to realigned stack frames with VLAs.
26297
26324
SmallVector<unsigned, 3> AvailableRegs;
26298
26325
if (Subtarget.is64Bit())
26299
26326
AvailableRegs.push_back(X86::R11);
26300
26327
else
26301
- AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX});
26328
+ AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI });
26302
26329
26303
26330
// Zero out any registers that are already used.
26304
26331
for (const auto &MO : MI.operands()) {
@@ -26316,30 +26343,18 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
26316
26343
break;
26317
26344
}
26318
26345
}
26346
+ if (!AvailableReg)
26347
+ report_fatal_error("calling convention incompatible with retpoline, no "
26348
+ "available registers");
26319
26349
26320
26350
const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
26321
26351
26322
- if (AvailableReg == 0) {
26323
- // No register available. Use PUSH. This must not be a tailcall, and this
26324
- // must not be x64.
26325
- if (Subtarget.is64Bit())
26326
- report_fatal_error(
26327
- "Cannot make an indirect call on x86-64 using both retpoline and a "
26328
- "calling convention that preservers r11");
26329
- if (Opc != X86::CALLpcrel32)
26330
- report_fatal_error("Cannot make an indirect tail call on x86 using "
26331
- "retpoline without a preserved register");
26332
- BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg);
26333
- MI.getOperand(0).ChangeToES(Symbol);
26334
- MI.setDesc(TII->get(Opc));
26335
- } else {
26336
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
26337
- .addReg(CalleeVReg);
26338
- MI.getOperand(0).ChangeToES(Symbol);
26339
- MI.setDesc(TII->get(Opc));
26340
- MachineInstrBuilder(*BB->getParent(), &MI)
26341
- .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
26342
- }
26352
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
26353
+ .addReg(CalleeVReg);
26354
+ MI.getOperand(0).ChangeToES(Symbol);
26355
+ MI.setDesc(TII->get(Opc));
26356
+ MachineInstrBuilder(*BB->getParent(), &MI)
26357
+ .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
26343
26358
return BB;
26344
26359
}
26345
26360
0 commit comments