Skip to content

Commit d5e6a56

Browse files
authored
[XeVM] Add first integration tests (#425)
Enable XeVM integration tests (load/store/dpas) using wrappers and `gc-gpu-runner`. To achieve this, GPU components and tools are decoupled from IMEX, and `gpu-to-gpuopcl` pass (part of `gc-gpu-runner`) is extended to support upstream GPU code.
1 parent 1fa5c26 commit d5e6a56

File tree

70 files changed

+1322
-142
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+1322
-142
lines changed

CMakeLists.txt

+10-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
################################################################################
2-
# Copyright (C) 2024 Intel Corporation
2+
# Copyright (C) 2025 Intel Corporation
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
55
# you may not use this file except in compliance with the License.
@@ -45,7 +45,8 @@ option(GC_ENABLE_TEST_DNNL_API "Build the dnnl tests" ${GC_ENABLE_DNNL_API})
4545
option(GC_ENABLE_TEST_MLIR "Build the mlir tests" ON)
4646
option(GC_ENABLE_TOOLS "Build the tools" ON)
4747
option(GC_ENABLE_OPT "Build gc-opt" ${GC_ENABLE_TOOLS})
48-
option(GC_ENABLE_IMEX "Enable Intel® Extension for MLIR" OFF)
48+
option(GC_ENABLE_IMEX "Enable Intel® Extension for MLIR (implicitly enables GPU compilation)" OFF)
49+
option(GC_ENABLE_GPU "Enable GPU runtime and tools components" OFF)
4950
option(GC_ENABLE_BINDINGS_PYTHON "Enable Graph Complier Python Binding" ON)
5051
option(GC_DEV_LINK_LLVM_DYLIB "Link dynamic libraries of LLVM and MLIR. For developers only. Do not use it in packing the library." OFF)
5152
option(GC_ENABLE_RUNTIME_NAIVE_BRGEMM "Use naive BRGEMM as runtime backend for debug purpose." OFF)
@@ -55,6 +56,10 @@ if(GC_ENABLE_LEGACY)
5556
add_subdirectory(legacy/core)
5657
endif()
5758

59+
if (GC_ENABLE_GPU)
60+
set(GC_ENABLE_GPU ON)
61+
endif()
62+
5863
if (GC_ENABLE_IMEX)
5964
# normalize the value for lit config
6065
set(GC_ENABLE_IMEX ON)
@@ -70,6 +75,9 @@ endif()
7075
############################## Targets #########################################
7176
# All common options, includes etc. are added to this interface target.
7277
add_library(GcInterface INTERFACE)
78+
if (GC_ENABLE_GPU)
79+
target_compile_options(GcInterface INTERFACE -DGC_USE_GPU)
80+
endif()
7381
target_compile_features(GcInterface INTERFACE cxx_std_17)
7482
target_include_directories(GcInterface INTERFACE
7583
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>

README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -76,5 +76,6 @@ Graph Compiler supports the following build-time options.
7676
| GC_ENABLE_TEST | **ON**, OFF | Controls building the tests |
7777
| GC_DEV_LINK_LLVM_DYLIB | ON, **OFF** | Controls dynamic link LLVM/MLIR libraries, mainly for developer |
7878
| GC_ENABLE_BINDINGS_PYTHON | **ON**, OFF | Controls building the Python API |
79-
| GC_ENABLE_IMEX | ON, **OFF** | Whether to enable the GPU components |
79+
| GC_ENABLE_IMEX | ON, **OFF** | Whether to enable the IMEX components |
80+
| GC_ENABLE_GPU | ON, **OFF** | Whether to enable the GPU tools and components |
8081

include/gc/Conversion/Passes.h

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define GC_CONVERSION_PASSES_H
1111

1212
#include "gc/Conversion/XeVMToLLVM/XeVMToLLVM.h"
13+
#include "mlir/Pass/Pass.h"
1314

1415
namespace mlir {
1516

include/gc/Dialect/LLVMIR/XeVMOps.td

+70
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,76 @@ def XeVM_BlockPrefetch2dOp : XeVM_Op<"blockprefetch2d">,
221221
let hasVerifier = 1;
222222
}
223223

224+
def XeVM_MatrixElemType : AnyTypeOf<[AnyI8, AnyI16, AnyI32, F32, F16, BF16]>;
225+
226+
/// Enum attribute of the different precision types.
227+
def XeVM_PrecisionTypeAttr : I32EnumAttr<"PrecisionType",
228+
"XeVM precision type",
229+
[
230+
I32EnumAttrCase<"UNUSED", 0, "unused">,
231+
I32EnumAttrCase<"U8", 1, "u8">,
232+
I32EnumAttrCase<"U4", 2, "u4">,
233+
I32EnumAttrCase<"U2", 3, "u2">,
234+
I32EnumAttrCase<"S8", 4, "i8">,
235+
I32EnumAttrCase<"S4", 5, "i4">,
236+
I32EnumAttrCase<"S2", 6, "i2">,
237+
I32EnumAttrCase<"BF8", 7, "bf8">,
238+
I32EnumAttrCase<"TF32", 8, "tf32">,
239+
I32EnumAttrCase<"BF16", 9, "bf16">,
240+
I32EnumAttrCase<"FP16", 10, "f16">
241+
]> {
242+
let cppNamespace = "::mlir::xevm";
243+
}
244+
245+
def XeVM_DPASOp : XeVM_Op<"dpas">,
246+
Results<(outs FixedVectorOf<[XeVM_MatrixElemType]>:$d)>,
247+
Arguments<(ins
248+
FixedVectorOfRankAndType<[1], [XeVM_MatrixElemType]>:$c,
249+
FixedVectorOfRankAndType<[1], [XeVM_MatrixElemType]>:$a,
250+
FixedVectorOfRankAndType<[1], [XeVM_MatrixElemType]>:$b,
251+
XeVM_PrecisionTypeAttr:$pa,
252+
XeVM_PrecisionTypeAttr:$pb,
253+
I32Attr:$rc
254+
)> {
255+
256+
let summary = "Matrix multiply-add";
257+
258+
let description = [{
259+
The `xevm.dpas` operation is a matrix multiplication plus accumulation:
260+
261+
D = C + A x B
262+
263+
where the A, B, C input matrices and the result D have shapes:
264+
D : MxN
265+
C : MxN
266+
A : MxK
267+
B : KxN
268+
269+
Shape restrictions:
270+
M : must be 1, 2, 4, or 8
271+
N : fixed execution size, must be 16
272+
K : systolic_depth * OPS_PER_CHAN
273+
OPS_PER_CHAN
274+
1 : for TF32
275+
2 : for 16-bit precision(BF, HF)
276+
4 : for 8-bit precision (FP8, UB, B)
277+
8 : for less-then 8 bit precision (U4/S4, U2/S2).
278+
279+
If systolic_depth is 8, K would be 8, 16, 32, or 64 (based on OPS_PER_CHAN).
280+
$a, $b, $c, $d - matrix A, B, C, D, respectively
281+
$pa, $pb - precision of matrix A and B resepectively
282+
$rc - repeat count
283+
284+
Further restrictions as well as more details can be found here:
285+
https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_matrix_multiply_accumulate.html
286+
}];
287+
288+
let assemblyFormat = [{
289+
operands ` ` `{` `pa` `=` $pa `,` `pb` `=` $pb `,` `rc` `=` $rc `}` attr-dict `:` functional-type(operands, results)
290+
}];
291+
292+
// let hasVerifier = 1;
293+
}
224294

225295
def XeVM_TargetAttr : XeVM_Attr<"XeVMTarget", "target"> {
226296
let description = [{

include/gc/ExecutionEngine/Driver/Driver.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ namespace mlir {
1818
class DialectRegistry;
1919
namespace gc {
2020

21-
const DialectRegistry &initCompilerAndGetDialects();
21+
DialectRegistry &initCompilerAndGetDialects();
2222

2323
// the pointers to XXXMemRefType
2424
using GeneralMemrefPtr = void *;

include/gc/Transforms/CMakeLists.txt

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
if(GC_ENABLE_DNNL_API)
22
list(APPEND TABLEGEN_MACROS -DGC_HAS_ONEDNN_DIALECT)
33
endif()
4+
if(GC_ENABLE_GPU)
5+
list(APPEND TABLEGEN_MACROS -DGC_USE_GPU)
6+
endif()
47
if(GC_ENABLE_IMEX)
5-
list(APPEND TABLEGEN_MACROS -DGC_USE_IMEX)
8+
list(APPEND TABLEGEN_MACROS -DGC_USE_IMEX -DGC_USE_GPU)
69
endif()
710

811
set(LLVM_TARGET_DEFINITIONS Passes.td)

include/gc/Transforms/Passes.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,6 @@ std::unique_ptr<Pass> createMergeAllocPass();
115115
void populateFrontendPasses(mlir::OpPassManager &);
116116
void populateCPUPipeline(mlir::OpPassManager &);
117117

118-
#ifdef GC_USE_IMEX
119118
struct GPUPipelineOptions : PassPipelineOptions<GPUPipelineOptions> {
120119
Option<bool> isUsmArgs{
121120
*this, "is-usm-args",
@@ -136,6 +135,8 @@ struct GPUPipelineOptions : PassPipelineOptions<GPUPipelineOptions> {
136135
llvm::cl::init(false)};
137136
};
138137
void populateGPUPipeline(mlir::OpPassManager &, const GPUPipelineOptions &);
138+
#ifdef GC_USE_IMEX
139+
void populateIMEXPipeline(mlir::OpPassManager &, const GPUPipelineOptions &);
139140
#endif
140141

141142
#define GEN_PASS_DECL

include/gc/Transforms/Passes.td

+15-12
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,21 @@ def LinalgToXeGPU : Pass<"linalg-to-xegpu", "func::FuncOp"> {
9393
"DPAS register block sizes MxNxK">,
9494
];
9595
}
96+
#endif
97+
98+
#ifdef GC_USE_GPU
99+
def GpuToGpuOcl : Pass<"gpu-to-gpuocl", "ModuleOp"> {
100+
let summary = "Convert the GPU operations to GpuOclRuntime calls.";
101+
let description = [{
102+
Convert the gpu alloc, dealloc, memcpy and launch operations to GpuOclRuntime calls.
103+
}];
104+
let options = [
105+
Option<"callFinish", "call-finish", "bool",
106+
/*default=*/"false",
107+
"Call finish() after each kernel launch.">
108+
];
109+
}
110+
#endif // GC_USE_GPU
96111

97112
def AddContextArg : Pass<"add-ctx-arg", "func::FuncOp"> {
98113
let summary = "Add a context argument.";
@@ -109,17 +124,6 @@ def AllocsToSLM : Pass<"allocs-to-slm", "func::FuncOp"> {
109124
];
110125
}
111126

112-
def GpuToGpuOcl : Pass<"gpu-to-gpuocl", "ModuleOp"> {
113-
let summary = "Convert the GPU operations to GpuOclRuntime calls.";
114-
let description = [{
115-
Convert the gpu alloc, dealloc, memcpy and launch operations to GpuOclRuntime calls.
116-
}];
117-
let options = [
118-
Option<"callFinish", "call-finish", "bool",
119-
/*default=*/"false",
120-
"Call finish() after each kernel launch.">
121-
];
122-
}
123127

124128
def GpuTilingAndFusion : Pass<"gpu-tiling", "func::FuncOp"> {
125129
let summary = "GPU tiling and fusion path.";
@@ -185,7 +189,6 @@ def GpuXeVMAttachTarget: Pass<"xevm-attach-target", ""> {
185189
];
186190
}
187191

188-
#endif // GC_USE_IMEX
189192

190193
def IterativeTilingAndFusion : Pass<"iterative-tiling-and-fusion",
191194
"func::FuncOp"> {

lib/gc/CAPI/CMakeLists.txt

+1-3
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@ set(GC_ALL_LIBS
44
GcAnalysis
55
MLIRCPURuntimeTransforms)
66

7-
if(GC_ENABLE_IMEX)
8-
list(APPEND GC_ALL_LIBS GcGpuPasses)
9-
endif()
7+
list(APPEND GC_ALL_LIBS GcGpuPasses)
108

119
add_mlir_public_c_api_library(GcCAPI
1210
Dialects.cpp

0 commit comments

Comments
 (0)