Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,22 @@
#define HOST_PASSES_MACHINERY_H_INCLUDED

#include <base/base_module_pass_machinery.h>

#include <vecz/pass.h>
#include <optional>

namespace llvm {
class TargetMachine;
}

namespace vecz {
class VeczPassOptions;
}
namespace host {
struct OptimizationOptions {
llvm::SmallVector<vecz::VeczPassOptions> vecz_pass_opts;
bool force_no_tail = false;
bool early_link_builtins = false;
};

class HostPassMachinery final : public compiler::BaseModulePassMachinery {
public:
Expand Down Expand Up @@ -63,6 +71,10 @@ class HostPassMachinery final : public compiler::BaseModulePassMachinery {
/// @brief Returns an optimization pass pipeline correponding to
/// BaseModule::getLateTargetPasses.
llvm::ModulePassManager getLateTargetPasses();

static host::OptimizationOptions processOptimizationOptions(
std::optional<std::string> env_debug_prefix,
std::optional<compiler::VectorizationMode> vecz_mode);
};

} // namespace host
Expand Down
113 changes: 112 additions & 1 deletion modules/compiler/targets/host/source/HostPassMachinery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <compiler/utils/attributes.h>
#include <compiler/utils/compute_local_memory_usage_pass.h>
#include <compiler/utils/define_mux_builtins_pass.h>
#include <compiler/utils/link_builtins_pass.h>
#include <compiler/utils/make_function_name_unique_pass.h>
#include <compiler/utils/manual_type_legalization_pass.h>
#include <compiler/utils/metadata.h>
Expand Down Expand Up @@ -55,6 +56,99 @@

namespace host {

// Process various compiler options based off compiler build options and common
// environment variables
host::OptimizationOptions
HostPassMachinery::processOptimizationOptions(
std::optional<std::string> env_debug_prefix,
std::optional<compiler::VectorizationMode> vecz_mode) {
OptimizationOptions env_var_opts;
vecz::VeczPassOptions vecz_opts;
// The minimum number of elements to vectorize for. For a fixed-length VF,
// this is the exact number of elements to vectorize by. For scalable VFs,
// the actual number of elements is a multiple (vscale) of these, unknown at
// compile time. Default taken from config. May be overriden later.
vecz_opts.factor = compiler::utils::VectorizationFactor::getScalar();

vecz_opts.choices.enable(vecz::VectorizationChoices::eDivisionExceptions);

vecz_opts.vecz_auto = vecz_mode == compiler::VectorizationMode::AUTO;
vecz_opts.vec_dim_idx = 0;

// This is of the form of a comma separated set of fields
// S - use scalable vectorization
// V - vectorize only, otherwise produce both scalar and vector kernels
// A - let vecz automatically choose the vectorization factor
// 1-64 - vectorization factor multiplier: the fixed amount itself, or the
// value that multiplies the scalable amount
// VP - produce a vector-predicated kernel
// VVP - produce both a vectorized and a vector-predicated kernel
bool add_vvp = false;
if (const auto *vecz_vf_flags_env = std::getenv("CA_HOST_VF")) {
// Set scalable to off and let users add it explicitly with 'S'.
vecz_opts.factor.setIsScalable(false);
llvm::SmallVector<llvm::StringRef, 4> flags;
const llvm::StringRef vf_flags_ref(vecz_vf_flags_env);
vf_flags_ref.split(flags, ',');
for (auto r : flags) {
if (r == "A" || r == "a") {
vecz_opts.vecz_auto = true;
} else if (r == "V" || r == "v") {
// Note: This is a legacy toggle for forcing vectorization with no
// scalar tail based on the "VF" environment variable. Ideally we'd be
// setting it on a per-function basis, and we'd also be setting the
// vectorization options themselves on a per-function basis. Until we've
// designed a new method, keep the legacy behaviour by re-parsing the
// "VF" environment variable and look for a "v/V" toggle.
env_var_opts.force_no_tail = true;
} else if (r == "S" || r == "s") {
vecz_opts.factor.setIsScalable(true);
env_var_opts.early_link_builtins = true;
} else if (isdigit(r[0])) {
vecz_opts.factor.setKnownMin(std::stoi(r.str()));
} else if (r == "VP" || r == "vp") {
vecz_opts.choices.enable(
vecz::VectorizationChoices::eVectorPredication);
} else if (r == "VVP" || r == "vvp") {
// Add the vectorized pass option now (controlled by other iterations
// of this loop), and flag that we have to add a vector-predicated form
// later.
add_vvp = true;
} else {
// An error - just stop processing the environment variable now.
break;
}
}
}

// Choices override the cost model
const char *ptr = std::getenv("CODEPLAY_VECZ_CHOICES");
if (ptr) {
const bool success = vecz_opts.choices.parseChoicesString(ptr);
if (!success) {
llvm::errs() << "failed to parse the CODEPLAY_VECZ_CHOICES variable\n";
}
}

env_var_opts.vecz_pass_opts.push_back(vecz_opts);
if (add_vvp) {
vecz_opts.choices.enable(vecz::VectorizationChoices::eVectorPredication);
env_var_opts.vecz_pass_opts.push_back(vecz_opts);
}

// Allow any decisions made on early linking builtins to be overridden
// with an env variable
if (env_debug_prefix) {
const std::string env_name = *env_debug_prefix + "_EARLY_LINK_BUILTINS";
if (const char *early_link_builtins_env = getenv(env_name.c_str())) {
env_var_opts.early_link_builtins = atoi(early_link_builtins_env) != 0;
}
}

return env_var_opts;
}


static bool hostVeczPassOpts(
llvm::Function &F, llvm::ModuleAnalysisManager &MAM,
llvm::SmallVectorImpl<vecz::VeczPassOptions> &Opts) {
Expand Down Expand Up @@ -117,7 +211,16 @@ static bool hostVeczPassOpts(
vecz_options.factor =
compiler::utils::VectorizationFactor::getFixedWidth(SIMDWidth);

Opts.push_back(vecz_options);
if (getenv("CA_HOST_VF")) {
auto env_var_opts = HostPassMachinery::processOptimizationOptions(
/*env_debug_prefix*/ {}, vecz_mode);
if (env_var_opts.vecz_pass_opts.empty()) {
return false;
}
Opts.assign(env_var_opts.vecz_pass_opts);
} else {
Opts.push_back(vecz_options);
}
return true;
}

Expand Down Expand Up @@ -178,6 +281,7 @@ void HostPassMachinery::registerPassCallbacks() {

bool HostPassMachinery::handlePipelineElement(llvm::StringRef Name,
llvm::ModulePassManager &PM) {

if (Name.consume_front("host-late-passes")) {
PM.addPass(getLateTargetPasses());
return true;
Expand Down Expand Up @@ -238,6 +342,9 @@ llvm::ModulePassManager HostPassMachinery::getKernelFinalizationPasses(
llvm::ModulePassManager PM;
const compiler::BasePassPipelineTuner tuner(options);

auto env_var_opts =
processOptimizationOptions("CA_HOST", /* vecz_mode*/ {});

// Forcibly compute the BuiltinInfoAnalysis so that cached retrievals work.
PM.addPass(llvm::RequireAnalysisPass<compiler::utils::BuiltinInfoAnalysis,
llvm::Module>());
Expand All @@ -246,6 +353,10 @@ llvm::ModulePassManager HostPassMachinery::getKernelFinalizationPasses(
PM.addPass(llvm::createModuleToFunctionPassAdaptor(
compiler::utils::ReplaceAddressSpaceQualifierFunctionsPass()));

if (env_var_opts.early_link_builtins) {
PM.addPass(compiler::utils::LinkBuiltinsPass());
}

addPreVeczPasses(PM, tuner);

PM.addPass(vecz::RunVeczPass());
Expand Down
2 changes: 1 addition & 1 deletion modules/compiler/targets/host/source/info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ HostInfo::HostInfo(host::arch arch, host::os os,

vectorizable = true;
dma_optimizable = true;
scalable_vector_support = false;
scalable_vector_support = getenv("CA_HOST_VF") ? true : false;
kernel_debug = true;
#ifdef CA_ENABLE_DEBUG_SUPPORT
// Dummy values for testing. Enabled only on debug enabled builds with a
Expand Down
10 changes: 5 additions & 5 deletions modules/compiler/targets/host/source/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,11 +300,11 @@ HostKernel::lookupOrCreateOptimizedKernel(std::array<size_t, 3> local_size) {
}

// Host doesn't support scalable values.
if (fn_metadata.min_work_item_factor.isScalable() ||
fn_metadata.pref_work_item_factor.isScalable() ||
fn_metadata.sub_group_size.isScalable()) {
return cargo::make_unexpected(compiler::Result::FINALIZE_PROGRAM_FAILURE);
}
// if (fn_metadata.min_work_item_factor.isScalable() ||
// fn_metadata.pref_work_item_factor.isScalable() ||
// fn_metadata.sub_group_size.isScalable()) {
// return cargo::make_unexpected(compiler::Result::FINALIZE_PROGRAM_FAILURE);
// }

// Note that we grab a handle to the module here, which we use to reference
// the module going forward. This is despite us passing ownership of the
Expand Down
12 changes: 8 additions & 4 deletions modules/mux/targets/host/source/metadata_hooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,17 +78,21 @@ cargo::optional<kernel_variant_map> readBinaryMetadata(loader::ElfFile *elf,
handler::VectorizeInfoMetadata md;
while (handler.read(md)) {
// We don't expect scalable vectorization widths on host.
bool isScalable = false;
if (md.min_work_item_factor.isScalable() ||
md.pref_work_item_factor.isScalable()) {
return cargo::nullopt;
printf("Warning: Scalable support is experimental on host target\n");
// return cargo::nullopt;
isScalable = true;
}
const host::binary_kernel_s kernel{
/*hook*/ 0,
std::move(md.kernel_name),
// TODO: Work out sensible values for scalable.
static_cast<uint32_t>(md.local_memory_usage),
md.min_work_item_factor.getFixedValue(),
md.pref_work_item_factor.getFixedValue(),
md.sub_group_size.getFixedValue()};
isScalable ? 1 : md.min_work_item_factor.getFixedValue(),
isScalable ? 1 : md.pref_work_item_factor.getFixedValue(),
isScalable ? 1 : md.sub_group_size.getFixedValue()};
auto it = kernels.find(md.source_name);
if (it != kernels.end()) {
it->second.push_back(kernel);
Expand Down
1 change: 1 addition & 0 deletions source/cl/test/UnitCL/cmake/CompileKernelToBin.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ endif()
# ${CLC_EXECUTABLE} may have other things in it (like a qemu invocation). Turn
# it into a CMake list, so that execute_process() isn't confused.
string(REPLACE " " ";" CLC_EXECUTABLE "${CLC_EXECUTABLE}")
# message("__CSD__ ${CLC_EXECUTABLE} -d ${DEVICE_NAME} -cl-kernel-arg-info -cl-std=CL${CLC_CL_STD} ${CLC_OPTIONS_LIST} ${DEFS_LIST} -o ${OUTPUT_FILE} -- ${INPUT_FILE}")
execute_process(
COMMAND ${CLC_EXECUTABLE}
-d ${DEVICE_NAME}
Expand Down
Binary file not shown.
Loading