diff --git a/.gitignore b/.gitignore index 753a943c..8010d3e9 100644 --- a/.gitignore +++ b/.gitignore @@ -1131,4 +1131,4 @@ secring.* *build* .vscode/ -lib/core/builtin_headers/okl_intrinsic_* +lib/core/intrinsics/okl_intrinsic_* diff --git a/include/oklt/core/transpiler_session/user_input.h b/include/oklt/core/transpiler_session/user_input.h index be44f90b..64960081 100644 --- a/include/oklt/core/transpiler_session/user_input.h +++ b/include/oklt/core/transpiler_session/user_input.h @@ -19,6 +19,7 @@ struct UserInput { std::vector includeDirectories; ///< The include directories. std::vector defines; ///< The defined macroses. std::string hash; ///< OKL hash + std::vector userIntrinsics; ///< OKL user external intrincis folder }; } // namespace oklt diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 182f1e03..e5509f7a 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -182,8 +182,10 @@ set (OCCA_TRANSPILER_SOURCES core/rewriter/rewriter_proxy.cpp core/rewriter/rewriter_fabric.cpp - core/builtin_headers/intrinsic_impl.cpp - core/builtin_headers/intrinsic_impl.h + core/intrinsics/builtin_intrinsics.h + core/intrinsics/builtin_intrinsics.cpp + core/intrinsics/external_intrinsics.h + core/intrinsics/external_intrinsics.cpp core/handler_manager/handler_manager.cpp core/handler_manager/handler_map.cpp @@ -277,22 +279,22 @@ target_link_libraries(occa-transpiler ) embed_resource_txt(${ROOT_DIR}/lib/resources/okl_intrinsic_cuda.h - ${ROOT_DIR}/lib/core/builtin_headers/okl_intrinsic_cuda.h + ${ROOT_DIR}/lib/core/intrinsics/okl_intrinsic_cuda.h INTRINSIC_CUDA ) embed_resource_txt(${ROOT_DIR}/lib/resources/okl_intrinsic_dpcpp.h - ${ROOT_DIR}/lib/core/builtin_headers/okl_intrinsic_dpcpp.h + ${ROOT_DIR}/lib/core/intrinsics/okl_intrinsic_dpcpp.h INTRINSIC_DPCPP ) embed_resource_txt(${ROOT_DIR}/lib/resources/okl_intrinsic_hip.h - ${ROOT_DIR}/lib/core/builtin_headers/okl_intrinsic_hip.h + ${ROOT_DIR}/lib/core/intrinsics/okl_intrinsic_hip.h INTRINSIC_HIP ) embed_resource_txt(${ROOT_DIR}/lib/resources/okl_intrinsic_host.h - ${ROOT_DIR}/lib/core/builtin_headers/okl_intrinsic_host.h + ${ROOT_DIR}/lib/core/intrinsics/okl_intrinsic_host.h INTRINSIC_HOST ) diff --git a/lib/attributes/backend/launcher.cpp b/lib/attributes/backend/launcher.cpp index 2ae8acbb..3d02f1ce 100644 --- a/lib/attributes/backend/launcher.cpp +++ b/lib/attributes/backend/launcher.cpp @@ -201,7 +201,8 @@ void collectLoops(OklLoopInfo& loopInfo, std::list& out) { } #endif -std::pair splitTileAttr(OklLoopInfo& loopInfo, const oklt::Rewriter& r) { +std::pair splitTileAttr(OklLoopInfo& loopInfo, + const oklt::Rewriter& r) { auto sz = util::parseStrTo(loopInfo.tileSize); // Prepare first loop @@ -358,7 +359,6 @@ HandleResult handleLauncherTranslationUnit(SessionStage& s, const TranslationUni SPDLOG_DEBUG("Handle translation unit"); - // s.getRewriter().InsertTextBefore(loc, "#include " + includeOCCA + "\n\n"); auto& backendDeps = s.tryEmplaceUserCtx().backendHeaders; backendDeps.clear(); backendDeps.emplace_back("#include " + std::string(includeOCCA) + "\n\n"); @@ -374,8 +374,8 @@ HandleResult handleLauncherKernelAttribute(SessionStage& s, auto& rewriter = s.getRewriter(); if (!sema.getParsingKernelInfo()) { - return tl::make_unexpected(Error{OkltPipelineErrorCode::INTERNAL_ERROR_KERNEL_INFO_NULL, - "handleKernelAttribute"}); + return tl::make_unexpected( + Error{OkltPipelineErrorCode::INTERNAL_ERROR_KERNEL_INFO_NULL, "handleKernelAttribute"}); } auto kernelInfo = *sema.getParsingKernelInfo(); diff --git a/lib/attributes/utils/replace_attribute.cpp b/lib/attributes/utils/replace_attribute.cpp index 96b31fe9..b7d433dd 100644 --- a/lib/attributes/utils/replace_attribute.cpp +++ b/lib/attributes/utils/replace_attribute.cpp @@ -1,6 +1,5 @@ #include "attributes/utils/replace_attribute.h" #include "attributes/attribute_names.h" -#include "core/builtin_headers/intrinsic_impl.h" #include "core/transpiler_session/header_info.h" #include "core/transpiler_session/session_stage.h" #include "core/utils/var_decl.h" diff --git a/lib/core/diag/diag_consumer.cpp b/lib/core/diag/diag_consumer.cpp index aa57b1c1..208ea9db 100644 --- a/lib/core/diag/diag_consumer.cpp +++ b/lib/core/diag/diag_consumer.cpp @@ -26,7 +26,8 @@ DiagConsumer::DiagConsumer(SessionStage& session) clang::DiagnosticConsumer(){}; void DiagConsumer::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, const Diagnostic& Info) { - if (!_includeDiag.test_and_set()) { + if (!_includeDiag) { + _includeDiag = 1; return; } @@ -44,13 +45,13 @@ bool DiagConsumer::IncludeInDiagnosticCounts() const { // Accept only Warning, Error and Fatal if (diagLevel < DiagnosticsEngine::Level::Warning) { - const_cast(_includeDiag).clear(); + _includeDiag = 0; return false; } for (auto& ptr : getDiagDiagHandleInstances()) { if (ptr->_id == info.getID() && ptr->HandleDiagnostic(_session, diagLevel, info)) { - const_cast(_includeDiag).clear(); + _includeDiag = 0; return false; } } diff --git a/lib/core/diag/diag_consumer.h b/lib/core/diag/diag_consumer.h index ee0c94ff..5d2b089e 100644 --- a/lib/core/diag/diag_consumer.h +++ b/lib/core/diag/diag_consumer.h @@ -22,7 +22,7 @@ class DiagConsumer : public clang::DiagnosticConsumer { protected: SessionStage& _session; - std::atomic_flag _includeDiag = true; + mutable int _includeDiag = 0; }; } // namespace oklt diff --git a/lib/core/builtin_headers/intrinsic_impl.cpp b/lib/core/intrinsics/builtin_intrinsics.cpp similarity index 89% rename from lib/core/builtin_headers/intrinsic_impl.cpp rename to lib/core/intrinsics/builtin_intrinsics.cpp index 8a34f458..32cef9f0 100644 --- a/lib/core/builtin_headers/intrinsic_impl.cpp +++ b/lib/core/intrinsics/builtin_intrinsics.cpp @@ -1,8 +1,8 @@ -#include "core/builtin_headers/intrinsic_impl.h" -#include "core/builtin_headers/okl_intrinsic_cuda.h" -#include "core/builtin_headers/okl_intrinsic_dpcpp.h" -#include "core/builtin_headers/okl_intrinsic_hip.h" -#include "core/builtin_headers/okl_intrinsic_host.h" +#include "core/intrinsics/builtin_intrinsics.h" +#include "core/intrinsics/okl_intrinsic_cuda.h" +#include "core/intrinsics/okl_intrinsic_dpcpp.h" +#include "core/intrinsics/okl_intrinsic_hip.h" +#include "core/intrinsics/okl_intrinsic_host.h" #include #include "core/transpiler_session/transpiler_session.h" diff --git a/lib/core/builtin_headers/intrinsic_impl.h b/lib/core/intrinsics/builtin_intrinsics.h similarity index 99% rename from lib/core/builtin_headers/intrinsic_impl.h rename to lib/core/intrinsics/builtin_intrinsics.h index 41fa8cbf..463a82f0 100644 --- a/lib/core/builtin_headers/intrinsic_impl.h +++ b/lib/core/intrinsics/builtin_intrinsics.h @@ -14,6 +14,7 @@ constexpr const char INTRINSIC_INCLUDE_FILENAME[] = "okl_intrinsic.h"; void addInstrinsicStub(TranspilerSession &session, clang::CompilerInstance &compiler); + std::vector embedInstrinsic(std::string &input, TargetBackend backend); diff --git a/lib/core/intrinsics/external_intrinsics.cpp b/lib/core/intrinsics/external_intrinsics.cpp new file mode 100644 index 00000000..46d2770a --- /dev/null +++ b/lib/core/intrinsics/external_intrinsics.cpp @@ -0,0 +1,177 @@ +#include "core/intrinsics/external_intrinsics.h" + +#include +#include "core/transpiler_session/session_stage.h" +#include "core/transpiler_session/transpiler_session.h" +#include "oklt/util/io_helper.h" +#include "util/string_utils.hpp" + +#include +#include + +namespace oklt { + +using namespace llvm; +namespace fs = std::filesystem; + +tl::expected getIntrincisImplSourcePath(TargetBackend backend, + const fs::path& intrincisPath) { + switch (backend) { + case TargetBackend::CUDA: + return intrincisPath / "cuda"; + case TargetBackend::HIP: + return intrincisPath / "hip"; + case TargetBackend::DPCPP: + return intrincisPath / "dpcpp"; + case TargetBackend::OPENMP: + return intrincisPath / "openmp"; + case TargetBackend::SERIAL: + return intrincisPath / "serial"; + case TargetBackend::_LAUNCHER: + return intrincisPath / "launcher"; + default: + return tl::make_unexpected("User intrinsic does not implement target backend"); + } +} + +std::string normalizedFileName(const std::string& fileName) { + auto normalizedName = fileName; + if (util::startsWith(normalizedName, "./")) { + normalizedName = normalizedName.substr(2); + } + return normalizedName; +} + +bool isExternalIntrinsicInclude(TranspilerSession& session, const std::string& fileName) { + const auto& userIntrinsic = session.getInput().userIntrinsics; + if (userIntrinsic.empty()) { + return false; + } + auto normalizedName = normalizedFileName(fileName); + for (const auto& intrinsic : userIntrinsic) { + auto folderPrefix = intrinsic.filename().string(); + if (util::startsWith(normalizedName, folderPrefix)) { + return true; + } + } + return false; +} + +std::optional getExternalInstrincisInclude(TranspilerSession& session, + const std::string& fileName) { + const auto& userIntrinsic = session.getInput().userIntrinsics; + if (userIntrinsic.empty()) { + return std::nullopt; + } + + auto normalizedName = normalizedFileName(fileName); + for (const auto& intrinsic : userIntrinsic) { + auto folderPrefix = intrinsic.filename().string(); + if (util::startsWith(normalizedName, folderPrefix)) { + return intrinsic; + } + } + return std::nullopt; +} + +tl::expected getExternalIntrinsicSource(TargetBackend backend, + const fs::path& intrinsicPath, + clang::SourceManager& sm) { + auto implPathResult = getIntrincisImplSourcePath(backend, intrinsicPath); + if (!implPathResult) { + return tl::make_unexpected(implPathResult.error()); + } + + auto sourceFolder = implPathResult.value(); + if (!std::filesystem::exists(sourceFolder)) { + return tl::make_unexpected("Intrinsic implementation folder does not exist"); + } + + std::vector files(fs::directory_iterator(sourceFolder), {}); + if (files.empty()) { + return tl::make_unexpected("Intrinsic implementation files is missing"); + } + + auto it = std::find_if(files.cbegin(), files.cend(), [](const fs::path& p) -> bool { + return p.extension().string() == std::string(".cpp"); + }); + + if (it == files.cend()) { + std::string error = "Can't' find implementation file with path: " + sourceFolder.string(); + return tl::make_unexpected(error); + } + + auto contentResult = util::readFileAsStr(*it); + if (!contentResult) { + std::string error = "Can't get memory buffer for: " + it->string(); + return tl::make_unexpected(error); + } + return contentResult.value(); +} + +bool overrideExternalIntrinsic(SessionStage& stage, + HeaderDepsInfo& deps, + const std::string& includedFileName, + clang::OptionalFileEntryRef includedFile) { + auto& session = stage.getSession(); + auto& sourceManager = stage.getCompiler().getSourceManager(); + const auto& userIntrinsics = session.getInput().userIntrinsics; + if (!userIntrinsics.empty()) { + auto maybeIntrinsicPath = getExternalInstrincisInclude(session, includedFileName); + if (!maybeIntrinsicPath) { + return false; + } + auto intrinsicPath = maybeIntrinsicPath.value(); + auto intrinsicResult = + getExternalIntrinsicSource(stage.getBackend(), intrinsicPath, sourceManager); + if (!intrinsicResult) { + session.pushError(std::error_code(), intrinsicResult.error()); + return false; + } + deps.externalIntrinsicsSources[includedFileName] = std::move(intrinsicResult.value()); + + auto emptyExternalIntrinsic = MemoryBuffer::getMemBuffer(""); + if (includedFile) { + auto fileRef = includedFile; + const auto& fileEntry = fileRef->getFileEntry(); + sourceManager.overrideFileContents(&fileEntry, std::move(emptyExternalIntrinsic)); + } else { + // INFO: case when the file can be found by relative path + // it happens when the include path is relative to WORKING DIR path + auto& fm = sourceManager.getFileManager(); + auto maybeFileRef = fm.getFileRef(includedFileName); + if (maybeFileRef) { + auto foundFileRef = maybeFileRef.get(); + sourceManager.overrideFileContents(foundFileRef, std::move(emptyExternalIntrinsic)); + } + } + return true; + } + return false; +} + +void updateExternalIntrinsicMap(SessionStage& stage, HeaderDepsInfo& deps) { + if (deps.externalIntrinsicsSources.empty()) { + return; + } + + auto backend = stage.getBackend(); + auto& session = stage.getSession(); + auto& sm = stage.getCompiler().getSourceManager(); + for (auto& mappedIntrinsic : deps.externalIntrinsicsSources) { + auto maybeIntrinsicPath = getExternalInstrincisInclude(session, mappedIntrinsic.first); + if (!maybeIntrinsicPath) { + std::string error = "Count not find implementation for " + mappedIntrinsic.first; + session.pushError(std::error_code(), error); + return; + } + auto intrinsicPath = maybeIntrinsicPath.value(); + auto intrinsicResult = getExternalIntrinsicSource(backend, intrinsicPath, sm); + if (!intrinsicResult) { + session.pushError(std::error_code(), intrinsicResult.error()); + return; + } + mappedIntrinsic.second = std::move(intrinsicResult.value()); + } +} +} // namespace oklt diff --git a/lib/core/intrinsics/external_intrinsics.h b/lib/core/intrinsics/external_intrinsics.h new file mode 100644 index 00000000..4ad4a8bd --- /dev/null +++ b/lib/core/intrinsics/external_intrinsics.h @@ -0,0 +1,16 @@ +#pragma once +#include +#include + +namespace oklt { + +class SessionStage; +class HeaderDepsInfo; + +bool overrideExternalIntrinsic(SessionStage& stage, + HeaderDepsInfo& deps, + const std::string& includedFileName, + clang::OptionalFileEntryRef includedFile); + +void updateExternalIntrinsicMap(SessionStage& stage, HeaderDepsInfo& deps); +} // namespace oklt diff --git a/lib/core/transpiler_session/code_generator.cpp b/lib/core/transpiler_session/code_generator.cpp index 59dc2a3c..2b905ed4 100644 --- a/lib/core/transpiler_session/code_generator.cpp +++ b/lib/core/transpiler_session/code_generator.cpp @@ -6,11 +6,12 @@ #include "core/transpiler_session/transpilation_node.h" #include "core/transpiler_session/transpiler_session.h" -#include "core/builtin_headers/intrinsic_impl.h" +#include "core/intrinsics/builtin_intrinsics.h" +// #include "core/intrinsics/external_intrinsics.h" #include "core/handler_manager/handler_manager.h" -#include "core/utils/attributes.h" +// #include "core/utils/attributes.h" #include "core/vfs/overlay_fs.h" #include @@ -84,6 +85,10 @@ void removeSystemHeaders(SessionStage& stage, const HeaderDepsInfo& deps) { SPDLOG_TRACE("remove system include {} {}", dep.relativePath, dep.fileName); rewriter.RemoveText({dep.hashLoc, dep.filenameRange.getEnd()}); } + + for (const auto& intrinsic : deps.externalIntrinsicHeaders) { + rewriter.RemoveText({intrinsic.hashLoc, intrinsic.filenameRange.getEnd()}); + } } // gather all transpiled files: main input and affected header and also header with removed system @@ -94,14 +99,12 @@ TransformedFiles gatherTransformedFiles(SessionStage& stage) { // to preserve them for possible laucher generator auto clone = stage.getSession().getStagedHeaders(); inputs.fileMap.merge(clone); - inputs.fileMap["okl_kernel.cpp"] = stage.getRewriterResultForMainFile();std::ostringstream oss; -oss << std::this_thread::get_id() << std::endl; -printf("%s\n", oss.str().c_str()); + inputs.fileMap["okl_kernel.cpp"] = stage.getRewriterResultForMainFile(); return inputs; } tl::expected preprocesseInputs(SessionStage& stage, - const TransformedFiles& inputs) { + const TransformedFiles& inputs) { auto invocation = std::make_shared(); auto& ppOutOpt = invocation->getPreprocessorOutputOpts(); @@ -147,17 +150,16 @@ tl::expected preprocesseInputs(SessionStage& stage, return preprocessedAndFused.value(); } -std::string restoreSystemAndBackendHeaders( - TargetBackend backend, - std::string& input, - const HeaderDepsInfo& deps) -{ +std::string restoreSystemAndBackendHeaders(SessionStage& stage, + std::string& input, + const HeaderDepsInfo& deps) { + auto backend = stage.getBackend(); // insert backend specific headers and namespaces for (auto it = deps.backendNss.rbegin(); it < deps.backendNss.rend(); ++it) { input.insert(0, *it); } - if(deps.useOklIntrinsic) { + if (deps.useOklIntrinsic) { auto intrinsicHeaders = embedInstrinsic(input, backend); for (auto it = intrinsicHeaders.rbegin(); it < intrinsicHeaders.rend(); ++it) { @@ -165,6 +167,10 @@ std::string restoreSystemAndBackendHeaders( } } + for (const auto& externalIntrinsicSource : deps.externalIntrinsicsSources) { + input.insert(0, externalIntrinsicSource.second); + } + for (auto it = deps.backendHeaders.rbegin(); it < deps.backendHeaders.rend(); ++it) { input.insert(0, *it); } @@ -190,9 +196,8 @@ tl::expected fuseIncludeDeps(SessionStage& stage, const Head return preprocessedResult; } - auto finalTranspiledKernel = restoreSystemAndBackendHeaders(stage.getBackend(), - preprocessedResult.value(), - deps); + auto finalTranspiledKernel = + restoreSystemAndBackendHeaders(stage, preprocessedResult.value(), deps); return finalTranspiledKernel; } } // namespace diff --git a/lib/core/transpiler_session/header_info.cpp b/lib/core/transpiler_session/header_info.cpp index 462c5fe1..a1d95d90 100644 --- a/lib/core/transpiler_session/header_info.cpp +++ b/lib/core/transpiler_session/header_info.cpp @@ -1,12 +1,19 @@ #include "core/transpiler_session/header_info.h" -#include "core/builtin_headers/intrinsic_impl.h" +#include "core/intrinsics/builtin_intrinsics.h" +#include "core/intrinsics/external_intrinsics.h" +#include "core/transpiler_session/session_stage.h" namespace {} namespace oklt { -InclusionDirectiveCallback::InclusionDirectiveCallback(HeaderDepsInfo& deps_, - const clang::SourceManager& sm_) + +using namespace llvm; + +InclusionDirectiveCallback::InclusionDirectiveCallback(SessionStage& stage, + HeaderDepsInfo& deps_, + clang::SourceManager& sm_) : deps(deps_), - sm(sm_) {} + sm(sm_), + _stage(stage) {} void InclusionDirectiveCallback::InclusionDirective(clang::SourceLocation hashLoc, const clang::Token& includeTok, @@ -18,7 +25,7 @@ void InclusionDirectiveCallback::InclusionDirective(clang::SourceLocation hashLo clang::StringRef relativePath, const clang::Module* imported, clang::SrcMgr::CharacteristicKind fileType) { - if(!deps.useOklIntrinsic) { + if (!deps.useOklIntrinsic) { deps.useOklIntrinsic = fileName == INTRINSIC_INCLUDE_FILENAME; } @@ -27,10 +34,13 @@ void InclusionDirectiveCallback::InclusionDirective(clang::SourceLocation hashLo return; } - deps.topLevelDeps.push_back(HeaderDep{ + auto fileNameStr = fileName.str(); + bool isIntrinsic = overrideExternalIntrinsic(_stage, deps, fileNameStr, file); + + auto dep = HeaderDep{ .hashLoc = hashLoc, .includeTok = includeTok, - .fileName = fileName.str(), + .fileName = fileNameStr, .isAngled = isAngled, .filenameRange = filenameRange, .file = file, @@ -38,8 +48,12 @@ void InclusionDirectiveCallback::InclusionDirective(clang::SourceLocation hashLo .relativePath = relativePath.str(), .imported = imported, .fileType = fileType, + }; - }); + if (isIntrinsic) { + deps.externalIntrinsicHeaders.push_back(std::move(dep)); + } else { + deps.topLevelDeps.push_back(std::move(dep)); + } } - } // namespace oklt diff --git a/lib/core/transpiler_session/header_info.h b/lib/core/transpiler_session/header_info.h index 68ecc1dc..815334fd 100644 --- a/lib/core/transpiler_session/header_info.h +++ b/lib/core/transpiler_session/header_info.h @@ -11,7 +11,6 @@ struct TransformedFiles { // name to file content map std::map fileMap; }; - // Stub to collect data from InclusionDirective callbacks. struct HeaderDep { clang::SourceLocation hashLoc; @@ -31,12 +30,18 @@ struct HeaderDepsInfo { std::vector topLevelDeps; std::vector backendHeaders; std::vector backendNss; + std::vector externalIntrinsicHeaders; + std::map externalIntrinsicsSources; bool useOklIntrinsic = false; }; +class SessionStage; + class InclusionDirectiveCallback : public clang::PPCallbacks { public: - InclusionDirectiveCallback(HeaderDepsInfo& depsInfo, const clang::SourceManager& sm); + InclusionDirectiveCallback(SessionStage& session, + HeaderDepsInfo& depsInfo, + clang::SourceManager& sm); void InclusionDirective(clang::SourceLocation HashLoc, const clang::Token& IncludeTok, clang::StringRef fileName, @@ -47,8 +52,11 @@ class InclusionDirectiveCallback : public clang::PPCallbacks { clang::StringRef RelativePath, const clang::Module* Imported, clang::SrcMgr::CharacteristicKind FileType) override; + + private: HeaderDepsInfo& deps; - const clang::SourceManager& sm; + clang::SourceManager& sm; + SessionStage& _stage; }; } // namespace oklt diff --git a/lib/core/transpiler_session/session_stage.cpp b/lib/core/transpiler_session/session_stage.cpp index 1d264266..89244531 100644 --- a/lib/core/transpiler_session/session_stage.cpp +++ b/lib/core/transpiler_session/session_stage.cpp @@ -1,9 +1,8 @@ #include "core/transpiler_session/session_stage.h" #include "core/diag/diag_consumer.h" #include "core/handler_manager/handler_manager.h" +#include "core/intrinsics/external_intrinsics.h" #include "core/transpiler_session/transpiler_session.h" -#include "core/builtin_headers/intrinsic_impl.h" -#include "core/vfs/overlay_fs.h" #include #include @@ -35,6 +34,8 @@ void SessionStage::setLauncherMode() { _rewriter = std::make_unique(_compiler.getSourceManager(), _compiler.getLangOpts()); _backend = TargetBackend::_LAUNCHER; + auto& deps = tryEmplaceUserCtx(); + updateExternalIntrinsicMap(*this, deps); } std::string SessionStage::getRewriterResultForMainFile() { diff --git a/lib/core/utils/for_stmt_parser.cpp b/lib/core/utils/for_stmt_parser.cpp index 0580b566..045d3515 100644 --- a/lib/core/utils/for_stmt_parser.cpp +++ b/lib/core/utils/for_stmt_parser.cpp @@ -66,7 +66,7 @@ tl::expected parseForStmt(SessionStage& stage, const clang::Attr* a) { auto& ctx = stage.getCompiler().getASTContext(); OklLoopInfo ret{.attr = a, .stmt = s}; - const Expr *start, *end = nullptr; + const Expr *start = nullptr, *end = nullptr; if (isa(s.getInit())) { auto d = dyn_cast(s.getInit()); @@ -90,8 +90,6 @@ tl::expected parseForStmt(SessionStage& stage, start = rsh->getSubExpr(); } ret.range.start = start; - - auto child_count = std::distance(start->children().begin(), start->children().end()); } // Condition diff --git a/lib/core/vfs/overlay_fs.cpp b/lib/core/vfs/overlay_fs.cpp index 587966b2..fc271901 100644 --- a/lib/core/vfs/overlay_fs.cpp +++ b/lib/core/vfs/overlay_fs.cpp @@ -1,5 +1,4 @@ #include "core/vfs/overlay_fs.h" -#include "core/transpiler_session/header_info.h" namespace oklt { using namespace llvm; diff --git a/lib/pipeline/core/stage_action.cpp b/lib/pipeline/core/stage_action.cpp index ec2fc526..87a77ef8 100644 --- a/lib/pipeline/core/stage_action.cpp +++ b/lib/pipeline/core/stage_action.cpp @@ -1,4 +1,5 @@ -#include "core/builtin_headers/intrinsic_impl.h" +#include "core/intrinsics/builtin_intrinsics.h" +#include "core/intrinsics/external_intrinsics.h" #include "core/transpiler_session/session_stage.h" #include "core/vfs/overlay_fs.h" @@ -23,7 +24,6 @@ bool StageAction::PrepareToExecuteAction(clang::CompilerInstance& compiler) { return false; } addInstrinsicStub(*_session.get(), compiler); - return true; } diff --git a/lib/pipeline/core/stage_action_runner.cpp b/lib/pipeline/core/stage_action_runner.cpp index 20764f4e..5c681f24 100644 --- a/lib/pipeline/core/stage_action_runner.cpp +++ b/lib/pipeline/core/stage_action_runner.cpp @@ -32,14 +32,18 @@ SharedTranspilerSessionResult runStageAction(StringRef stageName, SharedTranspil SPDLOG_INFO("start: {}", stageName); SPDLOG_TRACE("input source:\n{}\n", source); - Twine toolName = "clang";//stageName; + Twine toolName = "clang"; // stageName; auto cppFileNamePath = input.sourcePath; auto cppFileName = std::string(cppFileNamePath.replace_extension(".cpp")); // TODO get this info from user input aka json prop file - std::vector args = { - "-std=c++17", "-Wno-extra-tokens", "-Wno-invalid-pp-token", "-fparse-all-comments", "-I.", getISystemOpt()}; + std::vector args = {"-std=c++17", + "-Wno-extra-tokens", + "-Wno-invalid-pp-token", + "-fparse-all-comments", + "-I.", + getISystemOpt()}; for (const auto& define : input.defines) { std::string def = "-D" + define; @@ -51,6 +55,14 @@ SharedTranspilerSessionResult runStageAction(StringRef stageName, SharedTranspil args.push_back(std::move(incPath)); } + for (const auto& intrinsicPath : input.userIntrinsics) { + if (std::filesystem::exists(intrinsicPath)) { + auto parent = intrinsicPath.parent_path(); + std::string includeInctrincisPath = "-I" + parent.string(); + args.push_back(includeInctrincisPath); + } + } + auto stageAction = instantiateStageAction(stageName); if (!stageAction) { Error err{std::error_code(), fmt::format("no stage action: {} in registry", stageName)}; @@ -77,7 +89,7 @@ SharedTranspilerSessionResult runStageAction(StringRef stageName, SharedTranspil if (!warnings.empty()) { SPDLOG_INFO("{} warnings: ", stageName); for (const auto& w : warnings) { - SPDLOG_WARN(w.desc ); + SPDLOG_WARN(w.desc); } } if (!ret || !session->getErrors().empty()) { diff --git a/lib/pipeline/stages/transpiler/transpilation.cpp b/lib/pipeline/stages/transpiler/transpilation.cpp index 870dea7c..744ecaff 100644 --- a/lib/pipeline/stages/transpiler/transpilation.cpp +++ b/lib/pipeline/stages/transpiler/transpilation.cpp @@ -1,8 +1,8 @@ #include -#include "core/builtin_headers/intrinsic_impl.h" #include "core/diag/diag_consumer.h" #include "core/handler_manager/handler_manager.h" +#include "core/intrinsics/builtin_intrinsics.h" #include "core/transpiler_session/attributed_type_map.h" #include "core/transpiler_session/code_generator.h" #include "core/transpiler_session/session_stage.h" @@ -217,18 +217,19 @@ HandleResult runFromLeavesToRoot(TraversalType& traversal, } return result; } - transpilationAccumulator.push_back(TranspilationNode{.ki = ki, .li = cl, .attr = attr, .node = node}); + transpilationAccumulator.push_back( + TranspilationNode{.ki = ki, .li = cl, .attr = attr, .node = node}); } if (stage.getAttrManager().hasImplicitHandler(stage.getBackend(), node.getNodeKind())) { - transpilationAccumulator.push_back(TranspilationNode{ - .ki = ki, .li = cl, .attr = nullptr, .node = node}); + transpilationAccumulator.push_back( + TranspilationNode{.ki = ki, .li = cl, .attr = nullptr, .node = node}); } return {}; } -bool isIntrinsicHeader(const clang::SourceManager& sm, const clang::SourceLocation &loc) { +bool isIntrinsicHeader(const clang::SourceManager& sm, const clang::SourceLocation& loc) { auto fid = sm.getFileID(loc); const auto* fileEntry = sm.getFileEntryForID(fid); if (fileEntry) { @@ -250,7 +251,7 @@ bool skipNode(SessionStage& s, const NodeType& n) { return true; } - if(isIntrinsicHeader(sm, loc)) { + if (isIntrinsicHeader(sm, loc)) { return true; } @@ -416,7 +417,8 @@ class TranspilationConsumer : public clang::ASTConsumer { // no errors and empty output could mean that the source is already transpiled // so use input as output and lets the next stage try to figure out if (result->first.empty()) { - result->first = _stage.getSession().getStagedSource();; + result->first = _stage.getSession().getStagedSource(); + ; } output.launcher.source = oklt::format(std::move(result->first)); output.launcher.metadata = std::move(result->second); @@ -438,8 +440,8 @@ class Transpilation : public StageAction { } auto& deps = _stage->tryEmplaceUserCtx(); - std::unique_ptr callback = - std::make_unique(deps, compiler.getSourceManager()); + std::unique_ptr callback = std::make_unique( + *_stage, deps, compiler.getSourceManager()); // setup preprocessor hook to gather all user/system includes compiler.getPreprocessor().addPPCallbacks(std::move(callback)); diff --git a/lib/util/string_utils.cpp b/lib/util/string_utils.cpp index f044f183..0eee4e98 100644 --- a/lib/util/string_utils.cpp +++ b/lib/util/string_utils.cpp @@ -13,6 +13,10 @@ std::string toLower(const std::string& str) { return result; } +bool startsWith(const std::string& value, const std::string& prefix) { + return value.rfind(prefix, 0) == 0; +} + std::string pointerToStr(const void* ptr) { return std::to_string(reinterpret_cast(ptr)); } diff --git a/lib/util/string_utils.hpp b/lib/util/string_utils.hpp index 02447614..58c07ea7 100644 --- a/lib/util/string_utils.hpp +++ b/lib/util/string_utils.hpp @@ -18,6 +18,15 @@ namespace oklt::util { */ std::string toLower(const std::string& str); +/** + * @brief check a string starts with prefix value + * + * @param str Input string + * @param str Prefix string + * @return bool Is started with provided prefix + */ +bool startsWith(const std::string& value, const std::string& prefix); + /** * @brief Convert a pointer to a string * diff --git a/tests/functional/configs/test_suite_transpiler/backends/cuda/suite.json b/tests/functional/configs/test_suite_transpiler/backends/cuda/suite.json index ecbecb99..c9378c76 100644 --- a/tests/functional/configs/test_suite_transpiler/backends/cuda/suite.json +++ b/tests/functional/configs/test_suite_transpiler/backends/cuda/suite.json @@ -12,5 +12,6 @@ "macro.json", "implicit.json", "includes.json", - "intrinsics.json" + "intrinsics.json", + "user_intrisics.json" ] diff --git a/tests/functional/configs/test_suite_transpiler/backends/cuda/user_intrisics.json b/tests/functional/configs/test_suite_transpiler/backends/cuda/user_intrisics.json new file mode 100644 index 00000000..5de89324 --- /dev/null +++ b/tests/functional/configs/test_suite_transpiler/backends/cuda/user_intrisics.json @@ -0,0 +1,14 @@ +[ + { + "action": "normalize_and_transpile", + "action_config": { + "backend": "cuda", + "source": "transpiler/backends/cuda/user_intrinsics/user_intrinsics.cpp", + "includes": [], + "defs": [], + "launcher": "", + "intrinsic": "misc/custom_intrinsics" + }, + "reference": "transpiler/backends/cuda/user_intrinsics/user_intrinsics_ref.cpp" + } +] diff --git a/tests/functional/configs/test_suite_transpiler/backends/dpcpp/max_inner_dims.json b/tests/functional/configs/test_suite_transpiler/backends/dpcpp/max_inner_dims.json deleted file mode 100644 index fc376830..00000000 --- a/tests/functional/configs/test_suite_transpiler/backends/dpcpp/max_inner_dims.json +++ /dev/null @@ -1,13 +0,0 @@ -[ - { - "action": "normalize_and_transpile", - "action_config": { - "backend": "dpcpp", - "source": "transpiler/backends/dpcpp/max_inner_loops/outer_inner_split_max.cpp", - "includes": [], - "defs": [], - "launcher": "" - }, - "reference": "transpiler/backends/dpcpp/max_inner_loops/outer_inner_split_max_ref.cpp" - } -] diff --git a/tests/functional/configs/test_suite_transpiler/backends/dpcpp/suite.json b/tests/functional/configs/test_suite_transpiler/backends/dpcpp/suite.json index d5c5216b..d409422d 100644 --- a/tests/functional/configs/test_suite_transpiler/backends/dpcpp/suite.json +++ b/tests/functional/configs/test_suite_transpiler/backends/dpcpp/suite.json @@ -10,5 +10,6 @@ "barrier.json", "nobarrier.json", "exclusive.json", - "macro.json" + "macro.json", + "user_intrisics.json" ] diff --git a/tests/functional/configs/test_suite_transpiler/backends/dpcpp/user_intrisics.json b/tests/functional/configs/test_suite_transpiler/backends/dpcpp/user_intrisics.json new file mode 100644 index 00000000..b5b98017 --- /dev/null +++ b/tests/functional/configs/test_suite_transpiler/backends/dpcpp/user_intrisics.json @@ -0,0 +1,14 @@ +[ + { + "action": "normalize_and_transpile", + "action_config": { + "backend": "dpcpp", + "source": "transpiler/backends/dpcpp/user_intrinsics/user_intrinsics.cpp", + "includes": [], + "defs": [], + "launcher": "", + "intrinsic": "misc/custom_intrinsics" + }, + "reference": "transpiler/backends/dpcpp/user_intrinsics/user_intrinsics_ref.cpp" + } +] diff --git a/tests/functional/configs/test_suite_transpiler/backends/hip/suite.json b/tests/functional/configs/test_suite_transpiler/backends/hip/suite.json index 55f38b32..8e35c5a6 100644 --- a/tests/functional/configs/test_suite_transpiler/backends/hip/suite.json +++ b/tests/functional/configs/test_suite_transpiler/backends/hip/suite.json @@ -10,5 +10,6 @@ "macro.json", "restrict.json", "exclusive.json", - "shared.json" + "shared.json", + "user_intrisics.json" ] diff --git a/tests/functional/configs/test_suite_transpiler/backends/hip/user_intrisics.json b/tests/functional/configs/test_suite_transpiler/backends/hip/user_intrisics.json new file mode 100644 index 00000000..96d1d254 --- /dev/null +++ b/tests/functional/configs/test_suite_transpiler/backends/hip/user_intrisics.json @@ -0,0 +1,14 @@ +[ + { + "action": "normalize_and_transpile", + "action_config": { + "backend": "hip", + "source": "transpiler/backends/hip/user_intrinsics/user_intrinsics.cpp", + "includes": [], + "defs": [], + "launcher": "", + "intrinsic": "misc/custom_intrinsics" + }, + "reference": "transpiler/backends/hip/user_intrinsics/user_intrinsics_ref.cpp" + } +] diff --git a/tests/functional/configs/test_suite_transpiler/backends/openmp/max_inner_dims.json b/tests/functional/configs/test_suite_transpiler/backends/openmp/max_inner_dims.json deleted file mode 100644 index 6800bca9..00000000 --- a/tests/functional/configs/test_suite_transpiler/backends/openmp/max_inner_dims.json +++ /dev/null @@ -1,13 +0,0 @@ -[ - { - "action": "normalize_and_transpile", - "action_config": { - "backend": "openmp", - "source": "transpiler/backends/openmp/max_inner_loops/outer_inner_split_max.cpp", - "includes": [], - "defs": [], - "launcher": "" - }, - "reference": "transpiler/backends/openmp/max_inner_loops/outer_inner_split_max_ref.cpp" - } -] diff --git a/tests/functional/configs/test_suite_transpiler/backends/openmp/suite.json b/tests/functional/configs/test_suite_transpiler/backends/openmp/suite.json index 85e39330..ca47a645 100644 --- a/tests/functional/configs/test_suite_transpiler/backends/openmp/suite.json +++ b/tests/functional/configs/test_suite_transpiler/backends/openmp/suite.json @@ -8,5 +8,6 @@ "barrier.json", "nobarrier.json", "exclusive.json", - "macro.json" + "macro.json", + "user_intrisics.json" ] diff --git a/tests/functional/configs/test_suite_transpiler/backends/openmp/user_intrisics.json b/tests/functional/configs/test_suite_transpiler/backends/openmp/user_intrisics.json new file mode 100644 index 00000000..b7a7bf8f --- /dev/null +++ b/tests/functional/configs/test_suite_transpiler/backends/openmp/user_intrisics.json @@ -0,0 +1,14 @@ +[ + { + "action": "normalize_and_transpile", + "action_config": { + "backend": "openmp", + "source": "transpiler/backends/openmp/user_intrinsics/user_intrinsics.cpp", + "includes": [], + "defs": [], + "launcher": "", + "intrinsic": "misc/custom_intrinsics" + }, + "reference": "transpiler/backends/openmp/user_intrinsics/user_intrinsics_ref.cpp" + } +] diff --git a/tests/functional/configs/test_suite_transpiler/backends/serial/suite.json b/tests/functional/configs/test_suite_transpiler/backends/serial/suite.json index 85e39330..ca47a645 100644 --- a/tests/functional/configs/test_suite_transpiler/backends/serial/suite.json +++ b/tests/functional/configs/test_suite_transpiler/backends/serial/suite.json @@ -8,5 +8,6 @@ "barrier.json", "nobarrier.json", "exclusive.json", - "macro.json" + "macro.json", + "user_intrisics.json" ] diff --git a/tests/functional/configs/test_suite_transpiler/backends/serial/user_intrisics.json b/tests/functional/configs/test_suite_transpiler/backends/serial/user_intrisics.json new file mode 100644 index 00000000..4e105053 --- /dev/null +++ b/tests/functional/configs/test_suite_transpiler/backends/serial/user_intrisics.json @@ -0,0 +1,14 @@ +[ + { + "action": "normalize_and_transpile", + "action_config": { + "backend": "serial", + "source": "transpiler/backends/serial/user_intrinsics/user_intrinsics.cpp", + "includes": [], + "defs": [], + "launcher": "", + "intrinsic": "misc/custom_intrinsics" + }, + "reference": "transpiler/backends/serial/user_intrinsics/user_intrinsics_ref.cpp" + } +] diff --git a/tests/functional/data/misc/custom_intrinsics/api.h b/tests/functional/data/misc/custom_intrinsics/api.h new file mode 100644 index 00000000..043542e1 --- /dev/null +++ b/tests/functional/data/misc/custom_intrinsics/api.h @@ -0,0 +1,4 @@ +#pragma once + +bool okl_is_nan(float value); + diff --git a/tests/functional/data/misc/custom_intrinsics/cuda/impl.cpp b/tests/functional/data/misc/custom_intrinsics/cuda/impl.cpp new file mode 100644 index 00000000..3e13fdb8 --- /dev/null +++ b/tests/functional/data/misc/custom_intrinsics/cuda/impl.cpp @@ -0,0 +1,5 @@ +//INFO: transpiling to cuda backend already include necessary header + +__device__ bool okl_is_nan(float value) { + return isnan(value) != 0; +} diff --git a/tests/functional/data/misc/custom_intrinsics/dpcpp/impl.cpp b/tests/functional/data/misc/custom_intrinsics/dpcpp/impl.cpp new file mode 100644 index 00000000..ae44d84c --- /dev/null +++ b/tests/functional/data/misc/custom_intrinsics/dpcpp/impl.cpp @@ -0,0 +1,8 @@ +// INFO: from documentation +// isNaN +// Description:The function returns 1, if and only if its argument is a NaN. +// Calling interface: +// int __binary32_isNaN(float x); +SYCL_EXTERNAL bool okl_is_nan(float value) { + return __binary32_isNaN(value) == 1; +} diff --git a/tests/functional/data/misc/custom_intrinsics/hip/impl.cpp b/tests/functional/data/misc/custom_intrinsics/hip/impl.cpp new file mode 100644 index 00000000..19491ff8 --- /dev/null +++ b/tests/functional/data/misc/custom_intrinsics/hip/impl.cpp @@ -0,0 +1,7 @@ +// INFO: relate to the documentation is must be supported natively +// https://rocm.docs.amd.com/projects/HIP/en/latest/reference/kernel_language.html#single-precision-mathematical-functions + +__device__ bool okl_is_nan(float value) { + return isnan(value); +} + diff --git a/tests/functional/data/misc/custom_intrinsics/launcher/impl.cpp b/tests/functional/data/misc/custom_intrinsics/launcher/impl.cpp new file mode 100644 index 00000000..42a0f9d0 --- /dev/null +++ b/tests/functional/data/misc/custom_intrinsics/launcher/impl.cpp @@ -0,0 +1,6 @@ +//INFO: needs to keep the old OCCA compatibility +// some types must be knonw for launcher + +bool okl_is_nan(float) { + return true; +} diff --git a/tests/functional/data/misc/custom_intrinsics/openmp/impl.cpp b/tests/functional/data/misc/custom_intrinsics/openmp/impl.cpp new file mode 100644 index 00000000..5c88a0a1 --- /dev/null +++ b/tests/functional/data/misc/custom_intrinsics/openmp/impl.cpp @@ -0,0 +1,6 @@ +// INFO: needed for std::isnan +#include + +bool okl_is_nan(float value) { + return std::isnan(value); +} diff --git a/tests/functional/data/misc/custom_intrinsics/serial/impl.cpp b/tests/functional/data/misc/custom_intrinsics/serial/impl.cpp new file mode 100644 index 00000000..aba55cd2 --- /dev/null +++ b/tests/functional/data/misc/custom_intrinsics/serial/impl.cpp @@ -0,0 +1,6 @@ +//INFO: needed for std::isnan +#include + +bool okl_is_nan(float value) { + return std::isnan(value); +} diff --git a/tests/functional/data/transpiler/backends/cuda/intrinsic/intrinsic_ref.cpp b/tests/functional/data/transpiler/backends/cuda/intrinsic/intrinsic_ref.cpp index b51e22f3..078bbb45 100644 --- a/tests/functional/data/transpiler/backends/cuda/intrinsic/intrinsic_ref.cpp +++ b/tests/functional/data/transpiler/backends/cuda/intrinsic/intrinsic_ref.cpp @@ -29,7 +29,7 @@ inline __device__ T okl_shfl_down_sync(unsigned mask, T var, unsigned int delta, template inline __device__ T okl_shfl_xor_sync(unsigned mask, T var, int laneMask, int width = warpSize) { - return __shfl_xor_sync(mask, laneMask, width); + return __shfl_xor_sync(mask, var, laneMask, width); } // Pipeline Primitives Interface diff --git a/tests/functional/data/transpiler/backends/cuda/user_intrinsics/user_intrinsics.cpp b/tests/functional/data/transpiler/backends/cuda/user_intrinsics/user_intrinsics.cpp new file mode 100644 index 00000000..d24f44f8 --- /dev/null +++ b/tests/functional/data/transpiler/backends/cuda/user_intrinsics/user_intrinsics.cpp @@ -0,0 +1,14 @@ +#include "custom_intrinsics/api.h" + +@kernel void zero_nans(float* vec) { + @outer for (int i = 0; i < 32; ++i) { + @inner for (int j = 0; j < 32; ++j) { + int idx = i * 32 + j; + float value = vec[idx]; + if(okl_is_nan(value)) { + vec[idx] = 0.0f; + } + } + } +} + diff --git a/tests/functional/data/transpiler/backends/cuda/user_intrinsics/user_intrinsics_ref.cpp b/tests/functional/data/transpiler/backends/cuda/user_intrinsics/user_intrinsics_ref.cpp new file mode 100644 index 00000000..e12a2832 --- /dev/null +++ b/tests/functional/data/transpiler/backends/cuda/user_intrinsics/user_intrinsics_ref.cpp @@ -0,0 +1,20 @@ +#include + +// INFO: transpiling to cuda backend already include necessary header + +__device__ bool okl_is_nan(float value) { return isnan(value) != 0; } + +extern "C" __global__ __launch_bounds__(32) void _occa_zero_nans_0(float *vec) { + { + int i = (0) + blockIdx.x; + { + int j = (0) + threadIdx.x; + int idx = i * 32 + j; + float value = vec[idx]; + if (okl_is_nan(value)) { + vec[idx] = 0.0f; + } + } + } +} + diff --git a/tests/functional/data/transpiler/backends/dpcpp/user_intrinsics/user_intrinsics.cpp b/tests/functional/data/transpiler/backends/dpcpp/user_intrinsics/user_intrinsics.cpp new file mode 100644 index 00000000..d24f44f8 --- /dev/null +++ b/tests/functional/data/transpiler/backends/dpcpp/user_intrinsics/user_intrinsics.cpp @@ -0,0 +1,14 @@ +#include "custom_intrinsics/api.h" + +@kernel void zero_nans(float* vec) { + @outer for (int i = 0; i < 32; ++i) { + @inner for (int j = 0; j < 32; ++j) { + int idx = i * 32 + j; + float value = vec[idx]; + if(okl_is_nan(value)) { + vec[idx] = 0.0f; + } + } + } +} + diff --git a/tests/functional/data/transpiler/backends/dpcpp/user_intrinsics/user_intrinsics_ref.cpp b/tests/functional/data/transpiler/backends/dpcpp/user_intrinsics/user_intrinsics_ref.cpp new file mode 100644 index 00000000..4d6a1cf7 --- /dev/null +++ b/tests/functional/data/transpiler/backends/dpcpp/user_intrinsics/user_intrinsics_ref.cpp @@ -0,0 +1,32 @@ +#include + +// INFO: from documentation +// isNaN +// Description:The function returns 1, if and only if its argument is a NaN. +// Calling interface: +// int __binary32_isNaN(float x); +SYCL_EXTERNAL bool okl_is_nan(float value) { + return __binary32_isNaN(value) == 1; +} + +using namespace sycl; + +extern "C" [[sycl::reqd_work_group_size(1, 1, 32)]] void +_occa_zero_nans_0(sycl::queue *queue_, sycl::nd_range<3> *range_, float *vec) { + queue_->submit([&](sycl::handler &handler_) { + handler_.parallel_for(*range_, [=](sycl::nd_item<3> item_) { + { + int i = (0) + item_.get_group(2); + { + int j = (0) + item.get_local_id(2); + int idx = i * 32 + j; + float value = vec[idx]; + if (okl_is_nan(value)) { + vec[idx] = 0.0f; + } + } + } + }); + }); +} + diff --git a/tests/functional/data/transpiler/backends/hip/user_intrinsics/user_intrinsics.cpp b/tests/functional/data/transpiler/backends/hip/user_intrinsics/user_intrinsics.cpp new file mode 100644 index 00000000..d24f44f8 --- /dev/null +++ b/tests/functional/data/transpiler/backends/hip/user_intrinsics/user_intrinsics.cpp @@ -0,0 +1,14 @@ +#include "custom_intrinsics/api.h" + +@kernel void zero_nans(float* vec) { + @outer for (int i = 0; i < 32; ++i) { + @inner for (int j = 0; j < 32; ++j) { + int idx = i * 32 + j; + float value = vec[idx]; + if(okl_is_nan(value)) { + vec[idx] = 0.0f; + } + } + } +} + diff --git a/tests/functional/data/transpiler/backends/hip/user_intrinsics/user_intrinsics_ref.cpp b/tests/functional/data/transpiler/backends/hip/user_intrinsics/user_intrinsics_ref.cpp new file mode 100644 index 00000000..fb0989ac --- /dev/null +++ b/tests/functional/data/transpiler/backends/hip/user_intrinsics/user_intrinsics_ref.cpp @@ -0,0 +1,20 @@ +#include + +// INFO: relate to the documentation is must be supported natively +// https://rocm.docs.amd.com/projects/HIP/en/latest/reference/kernel_language.html#single-precision-mathematical-functions + +__device__ bool okl_is_nan(float value) { return isnan(value); } + +extern "C" __global__ __launch_bounds__(32) void _occa_zero_nans_0(float *vec) { + { + int i = (0) + blockIdx.x; + { + int j = (0) + threadIdx.x; + int idx = i * 32 + j; + float value = vec[idx]; + if (okl_is_nan(value)) { + vec[idx] = 0.0f; + } + } + } +} diff --git a/tests/functional/data/transpiler/backends/openmp/user_intrinsics/user_intrinsics.cpp b/tests/functional/data/transpiler/backends/openmp/user_intrinsics/user_intrinsics.cpp new file mode 100644 index 00000000..d24f44f8 --- /dev/null +++ b/tests/functional/data/transpiler/backends/openmp/user_intrinsics/user_intrinsics.cpp @@ -0,0 +1,14 @@ +#include "custom_intrinsics/api.h" + +@kernel void zero_nans(float* vec) { + @outer for (int i = 0; i < 32; ++i) { + @inner for (int j = 0; j < 32; ++j) { + int idx = i * 32 + j; + float value = vec[idx]; + if(okl_is_nan(value)) { + vec[idx] = 0.0f; + } + } + } +} + diff --git a/tests/functional/data/transpiler/backends/openmp/user_intrinsics/user_intrinsics_ref.cpp b/tests/functional/data/transpiler/backends/openmp/user_intrinsics/user_intrinsics_ref.cpp new file mode 100644 index 00000000..6922b06a --- /dev/null +++ b/tests/functional/data/transpiler/backends/openmp/user_intrinsics/user_intrinsics_ref.cpp @@ -0,0 +1,20 @@ +// INFO: needed for std::isnan +#include + +bool okl_is_nan(float value) { + return std::isnan(value); +} + +extern "C" void zero_nans(float *vec) { +#pragma omp parallel for + for (int i = 0; i < 32; ++i) { + for (int j = 0; j < 32; ++j) { + int idx = i * 32 + j; + float value = vec[idx]; + if (okl_is_nan(value)) { + vec[idx] = 0.0f; + } + } + } +} + diff --git a/tests/functional/data/transpiler/backends/serial/user_intrinsics/user_intrinsics.cpp b/tests/functional/data/transpiler/backends/serial/user_intrinsics/user_intrinsics.cpp new file mode 100644 index 00000000..d24f44f8 --- /dev/null +++ b/tests/functional/data/transpiler/backends/serial/user_intrinsics/user_intrinsics.cpp @@ -0,0 +1,14 @@ +#include "custom_intrinsics/api.h" + +@kernel void zero_nans(float* vec) { + @outer for (int i = 0; i < 32; ++i) { + @inner for (int j = 0; j < 32; ++j) { + int idx = i * 32 + j; + float value = vec[idx]; + if(okl_is_nan(value)) { + vec[idx] = 0.0f; + } + } + } +} + diff --git a/tests/functional/data/transpiler/backends/serial/user_intrinsics/user_intrinsics_ref.cpp b/tests/functional/data/transpiler/backends/serial/user_intrinsics/user_intrinsics_ref.cpp new file mode 100644 index 00000000..2c10dc0e --- /dev/null +++ b/tests/functional/data/transpiler/backends/serial/user_intrinsics/user_intrinsics_ref.cpp @@ -0,0 +1,19 @@ +//INFO: needed for std::isnan +#include + +bool okl_is_nan(float value) { + return std::isnan(value); +} + +extern "C" void zero_nans(float *vec) { + for (int i = 0; i < 32; ++i) { + for (int j = 0; j < 32; ++j) { + int idx = i * 32 + j; + float value = vec[idx]; + if (okl_is_nan(value)) { + vec[idx] = 0.0f; + } + } + } +} + diff --git a/tests/functional/generic_configurable_tests.cpp b/tests/functional/generic_configurable_tests.cpp index 2e219daf..3394fd60 100644 --- a/tests/functional/generic_configurable_tests.cpp +++ b/tests/functional/generic_configurable_tests.cpp @@ -93,10 +93,23 @@ struct TranspileActionConfig { std::vector mutable includes; std::vector mutable defs; std::filesystem::path launcher; - NLOHMANN_DEFINE_TYPE_INTRUSIVE(TranspileActionConfig, backend, source, includes, defs, launcher) + std::optional intrinsic = std::nullopt; oklt::UserInput build(const fs::path& dataDir) const; }; +void from_json(const json& j, TranspileActionConfig& conf) { + + j.at("backend").get_to(conf.backend); + j.at("source").get_to(conf.source); + j.at("includes").get_to(conf.includes); + j.at("defs").get_to(conf.defs); + j.at("launcher").get_to(conf.launcher); + auto it = j.find("intrinsic"); + if(it != j.end()) { + conf.intrinsic = it->get(); + } +} + oklt::UserInput TranspileActionConfig::build(const fs::path& dataDir) const { auto expectedBackend = oklt::backendFromString(backend); if (!expectedBackend) { @@ -110,11 +123,18 @@ oklt::UserInput TranspileActionConfig::build(const fs::path& dataDir) const { std::ifstream sourceFile{sourceFullPath}; std::string sourceCode{std::istreambuf_iterator(sourceFile), {}}; + std::vector intrinsics; + if(intrinsic) { + fs::path fullIntrinsicPath = dataDir / std::filesystem::path(intrinsic.value()); + intrinsics.push_back(std::move(fullIntrinsicPath)); + } return oklt::UserInput{.backend = expectedBackend.value(), .source = std::move(sourceCode), .sourcePath = std::move(sourceFullPath), .includeDirectories = includes, - .defines = defs}; + .defines = defs, + .userIntrinsics = std::move(intrinsics) + }; } namespace { diff --git a/tool/main.cpp b/tool/main.cpp index e84b5550..91bfc374 100644 --- a/tool/main.cpp +++ b/tool/main.cpp @@ -79,13 +79,13 @@ int main(int argc, char* argv[]) { transpile_command.add_argument("-l", "--launcher") .default_value("") .help("optional launcher output file"); + transpile_command.add_argument("-e", "--external-intrinsic") + .default_value>({}) + .append() + .help("Specify external intrinsics pathes"); transpile_command.add_argument("-n", "--normalizer-output") .default_value("") .help("optional normalization output file"); - transpile_command.add_argument("-s", "--sema") - .help("sema: {no-sema, with-sema}") - .required() - .default_value("with-sema"); program.add_subparser(normalize_command); program.add_subparser(transpile_command); @@ -164,6 +164,16 @@ int main(int argc, char* argv[]) { includes.push_back(includeStr); } + std::vector intrinsicsStrs; + if(transpile_command.is_used("-e")) { + intrinsicsStrs = transpile_command.get>("-e"); + } + + std::vector intrinsics; + for (const auto &intrinsic: intrinsicsStrs) { + intrinsics.push_back(intrinsic); + } + auto normalization_output = std::filesystem::path(transpile_command.get("-n")); if (normalization_output.empty()) { normalization_output = build_normalization_output_filename(sourcePath); @@ -175,7 +185,9 @@ int main(int argc, char* argv[]) { .source = sourceCode, .sourcePath = sourcePath, .includeDirectories = std::move(includes), - .defines = std::move(defines)}; + .defines = std::move(defines), + .userIntrinsics = std::move(intrinsics) + }; oklt::UserResult result = [](auto&& input, auto need_normalize) { if (need_normalize) {