From 4a0dc3ef36ceff20787ff277a1fb6a1b513c4934 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Mon, 14 Oct 2024 08:55:12 +0200 Subject: [PATCH] [AMDGPU][SplitModule] Handle !callees metadata (#108802) See #106528 to review the first commit. Handle the `!callees` metadata to further reduce the amount of indirect call cases that end up conservatively assuming that any indirectly callable function is a potential target. --- llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 63 +++++++++++++---- .../kernels-dependency-indirect-callee-md.ll | 69 +++++++++++++++++++ 2 files changed, 117 insertions(+), 15 deletions(-) create mode 100644 llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect-callee-md.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp index a62c72d124825..7d86e0c72bd07 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp @@ -482,6 +482,29 @@ void SplitGraph::Node::visitAllDependencies( } } +/// Checks if \p I has MD_callees and if it does, parse it and put the function +/// in \p Callees. +/// +/// \returns true if there was metadata and it was parsed correctly. false if +/// there was no MD or if it contained unknown entries and parsing failed. +/// If this returns false, \p Callees will contain incomplete information +/// and must not be used. +static bool handleCalleesMD(const Instruction &I, + SetVector &Callees) { + auto *MD = I.getMetadata(LLVMContext::MD_callees); + if (!MD) + return false; + + for (const auto &Op : MD->operands()) { + Function *Callee = mdconst::extract_or_null(Op); + if (!Callee) + return false; + Callees.insert(Callee); + } + + return true; +} + void SplitGraph::buildGraph(CallGraph &CG) { SplitModuleTimer SMT("buildGraph", "graph construction"); LLVM_DEBUG( @@ -519,28 +542,38 @@ void SplitGraph::buildGraph(CallGraph &CG) { Fn.printAsOperand(dbgs()); dbgs() << " - analyzing function\n"); - bool HasIndirectCall = false; + SetVector KnownCallees; + bool HasUnknownIndirectCall = false; for (const auto &Inst : instructions(Fn)) { // look at all calls without a direct callee. - if (const auto *CB = dyn_cast(&Inst); - CB && !CB->getCalledFunction()) { - // inline assembly can be ignored, unless InlineAsmIsIndirectCall is - // true. - if (CB->isInlineAsm()) { - LLVM_DEBUG(dbgs() << " found inline assembly\n"); - continue; - } - - // everything else is handled conservatively. - HasIndirectCall = true; - break; + const auto *CB = dyn_cast(&Inst); + if (!CB || CB->getCalledFunction()) + continue; + + // inline assembly can be ignored, unless InlineAsmIsIndirectCall is + // true. + if (CB->isInlineAsm()) { + LLVM_DEBUG(dbgs() << " found inline assembly\n"); + continue; } + + if (handleCalleesMD(Inst, KnownCallees)) + continue; + // If we failed to parse any !callees MD, or some was missing, + // the entire KnownCallees list is now unreliable. + KnownCallees.clear(); + + // Everything else is handled conservatively. If we fall into the + // conservative case don't bother analyzing further. + HasUnknownIndirectCall = true; + break; } - if (HasIndirectCall) { + if (HasUnknownIndirectCall) { LLVM_DEBUG(dbgs() << " indirect call found\n"); FnsWithIndirectCalls.push_back(&Fn); - } + } else if (!KnownCallees.empty()) + DirectCallees.insert(KnownCallees.begin(), KnownCallees.end()); } Node &N = getNode(Cache, Fn); diff --git a/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect-callee-md.ll b/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect-callee-md.ll new file mode 100644 index 0000000000000..f1ed02b2502a0 --- /dev/null +++ b/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect-callee-md.ll @@ -0,0 +1,69 @@ +; RUN: sed -s 's/_MD_/, !callees !{ptr @CallCandidate0}/' %s | llvm-split -o %t -j 3 -mtriple amdgcn-amd-amdhsa +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s +; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s + +; RUN: sed -s 's/_MD_//g' %s | llvm-split -o %t-nomd -j 3 -mtriple amdgcn-amd-amdhsa +; RUN: llvm-dis -o - %t-nomd0 | FileCheck --check-prefix=CHECK-NOMD0 --implicit-check-not=define %s +; RUN: llvm-dis -o - %t-nomd1 | FileCheck --check-prefix=CHECK-NOMD1 --implicit-check-not=define %s +; RUN: llvm-dis -o - %t-nomd2 | FileCheck --check-prefix=CHECK-NOMD2 --implicit-check-not=define %s + +; CHECK0: define internal void @HelperC +; CHECK0: define amdgpu_kernel void @C + +; CHECK1: define hidden void @CallCandidate1 +; CHECK1: define internal void @HelperB +; CHECK1: define amdgpu_kernel void @B + +; CHECK2: define internal void @HelperA +; CHECK2: define hidden void @CallCandidate0 +; CHECK2: define amdgpu_kernel void @A + +; CHECK-NOMD0: define internal void @HelperC +; CHECK-NOMD0: define amdgpu_kernel void @C + +; CHECK-NOMD1: define internal void @HelperB +; CHECK-NOMD1: define amdgpu_kernel void @B + +; CHECK-NOMD2: define internal void @HelperA +; CHECK-NOMD2: define hidden void @CallCandidate0 +; CHECK-NOMD2: define hidden void @CallCandidate1 +; CHECK-NOMD2: define amdgpu_kernel void @A + +@addrthief = global [2 x ptr] [ptr @CallCandidate0, ptr @CallCandidate1] + +define internal void @HelperA(ptr %call) { + call void %call() _MD_ + ret void +} + +define internal void @CallCandidate0() { + ret void +} + +define internal void @CallCandidate1() { + ret void +} + +define internal void @HelperB() { + ret void +} + +define internal void @HelperC() { + ret void +} + +define amdgpu_kernel void @A(ptr %call) { + call void @HelperA(ptr %call) + ret void +} + +define amdgpu_kernel void @B() { + call void @HelperB() + ret void +} + +define amdgpu_kernel void @C() { + call void @HelperC() + ret void +}