Skip to content

Commit

Permalink
replace sysDesc with target info
Browse files Browse the repository at this point in the history
  • Loading branch information
zhczhong committed Aug 6, 2024
1 parent efc2d86 commit 5765bc7
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 75 deletions.
56 changes: 0 additions & 56 deletions include/gc/Analysis/MatmulConfigAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,62 +19,6 @@ namespace gc {

using namespace mlir;

struct SystemDesc {
// get runtime OMP_NUM_THREADS
uint32_t getNumThreads() {
std::optional<Attribute> numThreads = layout.getDevicePropertyValue(
Builder(ctx).getStringAttr("CPU" /* device ID*/),
Builder(ctx).getStringAttr("num_threads"));
if (numThreads && isa<IntegerAttr>(*numThreads)) {
return dyn_cast<IntegerAttr>(*numThreads).getInt();
}
return 1;
}
// get cache size by cacheLevel
size_t getCacheSize(uint8_t cacheLevel) {
if (cacheLevel == 1) {
std::optional<Attribute> cacheSize = layout.getDevicePropertyValue(
Builder(ctx).getStringAttr("CPU" /* device ID*/),
Builder(ctx).getStringAttr("L1_cache_size_in_bytes"));
if (cacheSize && isa<IntegerAttr>(*cacheSize)) {
return dyn_cast<IntegerAttr>(*cacheSize).getInt();
}
} else if (cacheLevel == 2) {
std::optional<Attribute> cacheSize = layout.getDevicePropertyValue(
Builder(ctx).getStringAttr("CPU" /* device ID*/),
Builder(ctx).getStringAttr("L2_cache_size_in_bytes"));
if (cacheSize && isa<IntegerAttr>(*cacheSize)) {
return dyn_cast<IntegerAttr>(*cacheSize).getInt();
}
} else if (cacheLevel == 3) {
std::optional<Attribute> cacheSize = layout.getDevicePropertyValue(
Builder(ctx).getStringAttr("CPU" /* device ID*/),
Builder(ctx).getStringAttr("L3_cache_size_in_bytes"));
if (cacheSize && isa<IntegerAttr>(*cacheSize)) {
return dyn_cast<IntegerAttr>(*cacheSize).getInt();
}
}
return 0;
}

// get the maximum vector length in bits
size_t getMaxVectorLength() {
std::optional<Attribute> maxVectorLength = layout.getDevicePropertyValue(
Builder(ctx).getStringAttr("CPU" /* device ID*/),
Builder(ctx).getStringAttr("max_vector_width"));
if (maxVectorLength && isa<IntegerAttr>(*maxVectorLength)) {
return dyn_cast<IntegerAttr>(*maxVectorLength).getInt();
}
return 512;
}

SystemDesc(ModuleOp m) : layout(m), ctx(m->getContext()) {}

private:
DataLayout layout;
MLIRContext *ctx;
};

// The configuration for matmul tiling
// TODO: support batch matmul
struct MatmulConfig {
Expand Down
42 changes: 23 additions & 19 deletions lib/gc/Analysis/MatmulConfigAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "gc/Analysis/MatmulConfigAnalysis.h"
#include "gc/Analysis/TargetDescriptionAnalysis.h"
#include <limits>
#include <llvm/Support/Debug.h>

Expand Down Expand Up @@ -64,7 +65,8 @@ getCandidate(uint32_t num, uint32_t floor,
}

// check if the threads are valid
bool validateThreads(ArrayRef<uint32_t> threads, SystemDesc &sysDesc) {
bool validateThreads(ArrayRef<uint32_t> threads,
CPUTargetDescriptionAnalysis &sysDesc) {
uint32_t numThreads = sysDesc.getNumThreads();
uint32_t actualThreads = 1U;
for (uint32_t t : threads)
Expand All @@ -77,24 +79,25 @@ bool validateThreads(ArrayRef<uint32_t> threads, SystemDesc &sysDesc) {
double vectorRegEfficiencyCost(linalg::LinalgOp &linalgOp,
ArrayRef<uint32_t> shape,
const MatmulConfig &config,
SystemDesc &sysDesc) {
CPUTargetDescriptionAnalysis &sysDesc) {
size_t dtypeSize = DataLayout().getTypeSizeInBits(
ShapeAdaptor(linalgOp.getDpsInputs()[1].getType()).getElementType());
size_t maxVectorLength = sysDesc.getMaxVectorLength() / dtypeSize;
size_t maxVectorWidth = sysDesc.getMaxVectorWidth() / dtypeSize;
// TODO: take matrix register like amx into account
double cost = (maxVectorLength - config.innerMostMBlock % maxVectorLength) %
maxVectorLength * 1.0 / config.innerMostMBlock +
(maxVectorLength - config.innerMostKBlock % maxVectorLength) %
maxVectorLength * 1.0 / config.innerMostKBlock +
(maxVectorLength - config.innerMostNBlock % maxVectorLength) %
maxVectorLength * 1.0 / config.innerMostNBlock;
double cost = (maxVectorWidth - config.innerMostMBlock % maxVectorWidth) %
maxVectorWidth * 1.0 / config.innerMostMBlock +
(maxVectorWidth - config.innerMostKBlock % maxVectorWidth) %
maxVectorWidth * 1.0 / config.innerMostKBlock +
(maxVectorWidth - config.innerMostNBlock % maxVectorWidth) %
maxVectorWidth * 1.0 / config.innerMostNBlock;
return cost;
}

// calculate the cost of the workload balance
double workloadBalancedCost(linalg::LinalgOp &linalgOp,
ArrayRef<uint32_t> shape,
const MatmulConfig &config, SystemDesc &sysDesc) {
const MatmulConfig &config,
CPUTargetDescriptionAnalysis &sysDesc) {
if (shape.size() < 3) {
// Has an invalid shape
return 0;
Expand All @@ -118,7 +121,7 @@ double workloadBalancedCost(linalg::LinalgOp &linalgOp,
double memoryConsumptionOnThreadCost(linalg::LinalgOp &linalgOp,
ArrayRef<uint32_t> shape,
const MatmulConfig &config,
SystemDesc &sysDesc) {
CPUTargetDescriptionAnalysis &sysDesc) {
if (shape.size() < 3) {
// Has an invalid shape
return 0;
Expand All @@ -141,7 +144,7 @@ double memoryConsumptionOnThreadCost(linalg::LinalgOp &linalgOp,
double computationIntensityOnL2Cache(linalg::LinalgOp &linalgOp,
ArrayRef<uint32_t> shape,
const MatmulConfig &config,
SystemDesc &sysDesc) {
CPUTargetDescriptionAnalysis &sysDesc) {
double fullLoadRatio = 0.7;
uint32_t L2Cache = sysDesc.getCacheSize(2);
size_t dtypeSize = DataLayout().getTypeSize(
Expand All @@ -157,16 +160,17 @@ double computationIntensityOnL2Cache(linalg::LinalgOp &linalgOp,
return 1 / computationIntensity;
}

using CostModelFn =
std::function<double(linalg::LinalgOp &linalgOp, ArrayRef<uint32_t> shape,
MatmulConfig cfg, SystemDesc &sysDesc)>;
using CostModelFn = std::function<double(
linalg::LinalgOp &linalgOp, ArrayRef<uint32_t> shape, MatmulConfig cfg,
CPUTargetDescriptionAnalysis &sysDesc)>;

// filter the config by the cost model
std::vector<MatmulConfig>
filterConfigByCostModel(ArrayRef<MatmulConfig> configs,
linalg::LinalgOp &linalgOp, ArrayRef<uint32_t> shape,
SystemDesc &sysDesc, const CostModelFn &costModel,
float preserveRatio = 0.5, float threshold = -1) {
CPUTargetDescriptionAnalysis &sysDesc,
const CostModelFn &costModel, float preserveRatio = 0.5,
float threshold = -1) {
std::vector<MatmulConfig> result;
std::vector<float> costs;
std::vector<size_t> idx;
Expand Down Expand Up @@ -196,7 +200,7 @@ filterConfigByCostModel(ArrayRef<MatmulConfig> configs,

// prepare the config candidates
std::vector<MatmulConfig>
prepareConfigCandidates(Operation *root, SystemDesc &sysDesc,
prepareConfigCandidates(Operation *root, CPUTargetDescriptionAnalysis &sysDesc,
ArrayRef<uint32_t> shape,
ArrayRef<uint32_t> givenInnermostBlock) {
if (shape.size() < 3) {
Expand Down Expand Up @@ -347,7 +351,7 @@ bool readConfigFromAttrs(MatmulConfig &config, ArrayRef<NamedAttribute> attrs) {
// previous matmul
MatmulConfigAnalysis::MatmulConfigAnalysis(Operation *root) {
if (auto linalgOp = dyn_cast<linalg::LinalgOp>(root)) {
SystemDesc sysDesc(root->getParentOfType<ModuleOp>());
CPUTargetDescriptionAnalysis sysDesc(root);
SmallVector<SmallVector<DimType>> oprandDimType =
*getOprandDimType(linalgOp);
// get the origin M,N,K size
Expand Down

0 comments on commit 5765bc7

Please sign in to comment.