Skip to content

Commit 3f88803

Browse files
ekochetkigcbot
authored andcommitted
Restrict early retry for simple kernels
Codegen can significantly decrease register pressure in case of kernels with one basic block. In this case the first stage can become better than the second even in case of high register pressure estimation in IGC. So this commit forbids early retry for single BB kernels.
1 parent 5695cb5 commit 3f88803

File tree

2 files changed

+41
-0
lines changed

2 files changed

+41
-0
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,12 @@ bool EmitPass::shouldForceEarlyRecompile(MetaDataUtils *pMdUtils, llvm::Function
540540
if (m_currShader->IsRecompilationRequestForced()) {
541541
return true;
542542
}
543+
544+
// Codegen works efficiently for kernels with 1 BB,
545+
// so give it a chance to compile on the first stage
546+
if (F->size() == 1)
547+
return false;
548+
543549
auto Threshold = IGC_GET_FLAG_VALUE(EarlyRetryLargeGRFThreshold);
544550
auto GRFPerThread = m_pCtx->getNumGRFPerThread();
545551
// If we are not in large GRF mode and auto GRF is disabled we use
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2025 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
// REQUIRES: regkeys
10+
// RUN: ocloc compile -file %s -options "-igc_opts 'EarlyRetryDefaultGRFThreshold=5,EarlyRetryLargeGRFThreshold=5'" \
11+
// RUN: -device mtl | FileCheck %s --check-prefix=CHECK
12+
13+
// a kernel with 1 BB, should not be recompiled even in case of high register pressure
14+
// even higher than existing early retry thresholds
15+
16+
// CHECK-NOT: [RetryManager] Start recompilation of the kernel
17+
18+
__kernel void add_mul_pressure(__global const float* in, __global float* out) {
19+
int gid = get_global_id(0);
20+
float a = in[gid];
21+
float b = a + 1.0f;
22+
float c = b * 2.0f;
23+
float d = c + 3.0f;
24+
float e = d * 4.0f;
25+
float f = e + 5.0f;
26+
float g = f * 6.0f;
27+
float h = g + 7.0f;
28+
float i = h * 8.0f;
29+
float j = i + 9.0f;
30+
float k = j * 10.0f;
31+
float l = k + 11.0f;
32+
// Use all variables to ensure register pressure
33+
float result = a + b + c + d + e + f + g + h + i + j + k + l;
34+
out[gid] = result;
35+
}

0 commit comments

Comments
 (0)