From 835c34d50f5405754eaace0f81d9b274a72e511a Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Fri, 11 Jul 2025 21:20:55 -0700 Subject: [PATCH 01/13] Refactor --offload-arch for SYCL offloading. --- clang/include/clang/Basic/OffloadArch.h | 80 +++++++++++- clang/lib/Basic/OffloadArch.cpp | 53 ++++++++ clang/lib/Driver/Driver.cpp | 113 ++++++++++------- clang/lib/Driver/ToolChains/SYCL.cpp | 119 ------------------ clang/lib/Driver/ToolChains/SYCL.h | 108 ---------------- .../Driver/sycl-offload-arch-intel-gpu.cpp | 111 +--------------- 6 files changed, 198 insertions(+), 386 deletions(-) diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h index 4dda3ec2216fa..5c91128eb47db 100644 --- a/clang/include/clang/Basic/OffloadArch.h +++ b/clang/include/clang/Basic/OffloadArch.h @@ -103,9 +103,62 @@ enum class OffloadArch { Generic, // A processor model named 'generic' if the target backend defines a // public one. // Intel CPUs + SKYLAKEAVX512, + COREAVX2, + COREI7AVX, + COREI7, + WESTMERE, + SANDYBRIDGE, + IVYBRIDGE, + BROADWELL, + COFFEELAKE, + ALDERLAKE, + SKYLAKE, + SKX, + CASCADELAKE, + ICELAKECLIENT, + ICELAKESERVER, + SAPPHIRERAPIDS, GRANITERAPIDS, // Intel GPUs + BDW, + SKL, + KBL, + CFL, + APL, + BXT, + GLK, + WHL, + AML, + CML, + ICLLP, + ICL, + EHL, + JSL, + TGLLP, + TGL, + RKL, + ADL_S, + RPL_S, + ADL_P, + ADL_N, + DG1, + ACM_G10, + DG2_G10, + ACM_G11, + DG2_G11, + ACM_G12, + DG2_G12, + PVC, + PVC_VG, + MTL_U, + MTL_S, + ARL_U, + ARL_S, + MTL_H, + ARL_H, BMG_G21, + LNL_M, LAST, CudaDefault = OffloadArch::SM_52, @@ -122,17 +175,40 @@ static inline bool IsAMDOffloadArch(OffloadArch A) { } static inline bool IsIntelCPUOffloadArch(OffloadArch Arch) { - return Arch >= OffloadArch::GRANITERAPIDS && Arch < OffloadArch::BMG_G21; + return Arch >= OffloadArch::SKYLAKEAVX512 && Arch < OffloadArch::BMG_G21; } static inline bool IsIntelGPUOffloadArch(OffloadArch Arch) { - return Arch >= OffloadArch::BMG_G21 && Arch < OffloadArch::LAST; + return Arch >= OffloadArch::BDW && Arch < OffloadArch::LAST; } static inline bool IsIntelOffloadArch(OffloadArch Arch) { return IsIntelCPUOffloadArch(Arch) || IsIntelGPUOffloadArch(Arch); } +// Check if the given Arch value is a Generic AMD GPU. +// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading. +// This list is used to filter out GFX*_GENERIC AMD GPUs in +// `IsSYCLSupportedAMDGPUArch`. +static inline bool IsAMDGenericGPUArch(OffloadArch Arch) { + return Arch == OffloadArch::GFX9_GENERIC || + Arch == OffloadArch::GFX10_1_GENERIC || + Arch == OffloadArch::GFX10_3_GENERIC || + Arch == OffloadArch::GFX11_GENERIC || + Arch == OffloadArch::GFX12_GENERIC; +} + +// Check if the given Arch value is a valid SYCL supported AMD GPU. +static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV && + !IsAMDGenericGPUArch(Arch); +} + +// Check if the given Arch value is a valid SYCL supported NVidia GPU. +static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) { + return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a; +} + const char *OffloadArchToString(OffloadArch A); const char *OffloadArchToVirtualArchString(OffloadArch A); diff --git a/clang/lib/Basic/OffloadArch.cpp b/clang/lib/Basic/OffloadArch.cpp index dce9ffaedb905..ccba7e8a29e46 100644 --- a/clang/lib/Basic/OffloadArch.cpp +++ b/clang/lib/Basic/OffloadArch.cpp @@ -89,9 +89,62 @@ static const OffloadArchToStringMap ArchNames[] = { GFX(1250), // gfx1250 {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, // Intel CPUs + {OffloadArch::SKYLAKEAVX512, "skylakeavx512", ""}, + {OffloadArch::COREAVX2, "coreavx2", ""}, + {OffloadArch::COREI7AVX, "corei7avx", ""}, + {OffloadArch::COREI7, "corei7", ""}, + {OffloadArch::WESTMERE, "westmere", ""}, + {OffloadArch::SANDYBRIDGE, "sandybridge", ""}, + {OffloadArch::IVYBRIDGE, "ivybridge", ""}, + {OffloadArch::BROADWELL, "broadwell", ""}, + {OffloadArch::COFFEELAKE, "coffeelake", ""}, + {OffloadArch::ALDERLAKE, "alderlake", ""}, + {OffloadArch::SKYLAKE, "skylake", ""}, + {OffloadArch::SKX, "skx", ""}, + {OffloadArch::CASCADELAKE, "cascadelake", ""}, + {OffloadArch::ICELAKECLIENT, "icelakeclient", ""}, + {OffloadArch::ICELAKESERVER, "icelakeserver", ""}, + {OffloadArch::SAPPHIRERAPIDS, "sapphirerapids", ""}, {OffloadArch::GRANITERAPIDS, "graniterapids", ""}, // Intel GPUS + {OffloadArch::BDW, "bdw", ""}, + {OffloadArch::SKL, "skl", ""}, + {OffloadArch::KBL, "kbl", ""}, + {OffloadArch::CFL, "cfl", ""}, + {OffloadArch::APL, "apl", ""}, + {OffloadArch::BXT, "bxt", ""}, + {OffloadArch::GLK, "glk", ""}, + {OffloadArch::WHL, "whl", ""}, + {OffloadArch::AML, "aml", ""}, + {OffloadArch::CML, "cml", ""}, + {OffloadArch::ICLLP, "icllp", ""}, + {OffloadArch::ICL, "icl", ""}, + {OffloadArch::EHL, "ehl", ""}, + {OffloadArch::JSL, "jsl", ""}, + {OffloadArch::TGLLP, "tgllp", ""}, + {OffloadArch::TGL, "tgl", ""}, + {OffloadArch::RKL, "rkl", ""}, + {OffloadArch::ADL_S, "adl_s", ""}, + {OffloadArch::RPL_S, "rpl_s", ""}, + {OffloadArch::ADL_P, "adl_p", ""}, + {OffloadArch::ADL_N, "adl_n", ""}, + {OffloadArch::DG1, "dg1", ""}, + {OffloadArch::ACM_G10, "acm_g10", ""}, + {OffloadArch::DG2_G10, "dg2_g10", ""}, + {OffloadArch::ACM_G11, "acm_g11", ""}, + {OffloadArch::DG2_G11, "dg2_g11", ""}, + {OffloadArch::ACM_G12, "acm_g12", ""}, + {OffloadArch::DG2_G12, "dg2_g12", ""}, + {OffloadArch::PVC, "pvc", ""}, + {OffloadArch::PVC_VG, "pvc_vg", ""}, + {OffloadArch::MTL_U, "mtl_u", ""}, + {OffloadArch::MTL_S, "mtl_s", ""}, + {OffloadArch::ARL_U, "arl_u", ""}, + {OffloadArch::ARL_S, "arl_s", ""}, + {OffloadArch::MTL_H, "mtl_h", ""}, + {OffloadArch::ARL_H, "arl_h", ""}, {OffloadArch::BMG_G21, "bmg_g21", ""}, + {OffloadArch::LNL_M, "lnl_m", ""}, {OffloadArch::Generic, "generic", ""}, // clang-format on }; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index b664e91abfffa..39d9cd6e7bad0 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1479,61 +1479,78 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, Diag(clang::diag::err_drv_sycl_offload_arch_new_driver); return; } - const ToolChain *HostTC = C.getSingleOffloadToolChain(); - auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs()); - auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(), - HostTC->getTriple()); + llvm::Triple AMDTriple("amdgcn-amd-amdhsa"); + llvm::Triple NVPTXTriple("nvptx64-nvidia-cuda"); + llvm::Triple IntelGPUTriple("spir64_gen-unknown-unknown"); + llvm::Triple IntelCPUTriple("spir64_x86_64-unknown-unknown"); // Attempt to deduce the offloading triple from the set of architectures. // We need to temporarily create these toolchains so that we can access // tools for inferring architectures. llvm::DenseSet Archs; - if (NVPTXTriple) { - auto TempTC = std::make_unique( - *this, *NVPTXTriple, *HostTC, C.getInputArgs(), Action::OFK_None); - for (StringRef Arch : - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true)) - Archs.insert(Arch); - } - if (AMDTriple) { - auto TempTC = std::make_unique( - *this, *AMDTriple, *HostTC, C.getInputArgs()); - for (StringRef Arch : - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &*TempTC, true)) - Archs.insert(Arch); - } - if (!AMDTriple && !NVPTXTriple) { - for (StringRef Arch : - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, nullptr, true)) - Archs.insert(Arch); - } - for (StringRef Arch : Archs) { - if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch( - getProcessorFromTargetID(*NVPTXTriple, Arch)))) { - DerivedArchs[NVPTXTriple->getTriple()].insert(Arch); - } else if (AMDTriple && - IsSYCLSupportedAMDGPUArch(StringToOffloadArch( - getProcessorFromTargetID(*AMDTriple, Arch)))) { - DerivedArchs[AMDTriple->getTriple()].insert(Arch); - } else if (IsSYCLSupportedIntelCPUArch(StringToOffloadArchSYCL(Arch))) { - DerivedArchs[getSYCLDeviceTriple("spir64_x86_64").getTriple()].insert( - Arch); - } else if (IsSYCLSupportedIntelGPUArch(StringToOffloadArchSYCL(Arch))) { - StringRef IntelGPUArch; - // For Intel Graphics AOT target, valid values for '--offload-arch' - // are mapped to valid device names accepted by OCLOC (the Intel GPU AOT - // compiler) via the '-device' option. The mapIntelGPUArchName - // function maps the accepted values for '--offload-arch' to enable SYCL - // offloading to Intel GPUs and the corresponding '-device' value passed - // to OCLOC. - IntelGPUArch = mapIntelGPUArchName(Arch).data(); - DerivedArchs[getSYCLDeviceTriple("spir64_gen").getTriple()].insert( - IntelGPUArch); - } else { - Diag(clang::diag::err_drv_invalid_sycl_target) << Arch; + + for (StringRef Arch : + C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) { + bool IsNVPTX = IsSYCLSupportedNVidiaGPUArch( + StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch))); + bool IsAMDGPU = IsSYCLSupportedAMDGPUArch( + StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch))); + bool IsIntelGPU = IsIntelGPUOffloadArch( + StringToOffloadArch(getProcessorFromTargetID(IntelGPUTriple, Arch))); + bool IsIntelCPU = IsIntelCPUOffloadArch( + StringToOffloadArch(getProcessorFromTargetID(IntelCPUTriple, Arch))); + if (!IsNVPTX && !IsAMDGPU && !Arch.empty() && !IsIntelGPU && + !IsIntelCPU && !Arch.equals_insensitive("native")) { + Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch; return; } } + + for (const llvm::Triple &TT : + {AMDTriple, NVPTXTriple, IntelGPUTriple, IntelCPUTriple}) { + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, + C.getDefaultToolChain().getTriple()); + + llvm::SmallVector Archs = + getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, + /*SpecificToolchain=*/false); + if (!Archs.empty()) { + C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); + OffloadArchs[&TC] = Archs; + } + } + + /* for (StringRef Arch : Archs) { + if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch( + getProcessorFromTargetID(*NVPTXTriple, Arch)))) + { DerivedArchs[NVPTXTriple->getTriple()].insert(Arch); } else if + (AMDTriple && IsSYCLSupportedAMDGPUArch(StringToOffloadArch( + getProcessorFromTargetID(*AMDTriple, Arch)))) { + DerivedArchs[AMDTriple->getTriple()].insert(Arch); + } else if (IsSYCLSupportedIntelCPUArch(StringToOffloadArchSYCL(Arch))) + { DerivedArchs[getSYCLDeviceTriple("spir64_x86_64").getTriple()].insert( + Arch); + } else if (IsSYCLSupportedIntelGPUArch(StringToOffloadArchSYCL(Arch))) + { StringRef IntelGPUArch; + // For Intel Graphics AOT target, valid values for '--offload-arch' + // are mapped to valid device names accepted by OCLOC (the Intel GPU + AOT + // compiler) via the '-device' option. The mapIntelGPUArchName + // function maps the accepted values for '--offload-arch' to enable + SYCL + // offloading to Intel GPUs and the corresponding '-device' value + passed + // to OCLOC. + IntelGPUArch = mapIntelGPUArchName(Arch).data(); + DerivedArchs[getSYCLDeviceTriple("spir64_gen").getTriple()].insert( + IntelGPUArch); + } else { + Diag(clang::diag::err_drv_invalid_sycl_target) << Arch; + return; + } + } + */ + // Emit an error if architecture value is not provided // to --offload-arch. if (Archs.empty()) { @@ -7540,6 +7557,8 @@ static StringRef getCanonicalArchString(Compilation &C, bool SpecificToolchain) { // Lookup the CUDA / HIP architecture string. Only report an error if we were // expecting the triple to be only NVPTX / AMDGPU. + // Arch = Intels arch bdw + // Triple = nvidia triple OffloadArch Arch = StringToOffloadArch(getProcessorFromTargetID(Triple, ArchStr)); if (Triple.isNVPTX() && diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 280b388c5bd8f..bd8a3df933164 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -21,125 +21,6 @@ using namespace clang::driver::tools; using namespace clang; using namespace llvm::opt; -// Struct that relates an AOT target value with -// Intel CPUs and Intel GPUs. -struct StringToOffloadArchSYCLMap { - const char *ArchName; - SYCLSupportedIntelArchs IntelArch; -}; - -// Mapping of supported SYCL offloading architectures. -static const StringToOffloadArchSYCLMap StringToArchNamesMap[] = { - // Intel CPU mapping. - {"skylake-avx512", SYCLSupportedIntelArchs::SKYLAKEAVX512}, - {"core-avx2", SYCLSupportedIntelArchs::COREAVX2}, - {"corei7-avx", SYCLSupportedIntelArchs::COREI7AVX}, - {"corei7", SYCLSupportedIntelArchs::COREI7}, - {"westmere", SYCLSupportedIntelArchs::WESTMERE}, - {"sandybridge", SYCLSupportedIntelArchs::SANDYBRIDGE}, - {"ivybridge", SYCLSupportedIntelArchs::IVYBRIDGE}, - {"broadwell", SYCLSupportedIntelArchs::BROADWELL}, - {"coffeelake", SYCLSupportedIntelArchs::COFFEELAKE}, - {"alderlake", SYCLSupportedIntelArchs::ALDERLAKE}, - {"skylake", SYCLSupportedIntelArchs::SKYLAKE}, - {"skx", SYCLSupportedIntelArchs::SKX}, - {"cascadelake", SYCLSupportedIntelArchs::CASCADELAKE}, - {"icelake-client", SYCLSupportedIntelArchs::ICELAKECLIENT}, - {"icelake-server", SYCLSupportedIntelArchs::ICELAKESERVER}, - {"sapphirerapids", SYCLSupportedIntelArchs::SAPPHIRERAPIDS}, - {"graniterapids", SYCLSupportedIntelArchs::GRANITERAPIDS}, - // Intel GPU mapping. - {"bdw", SYCLSupportedIntelArchs::BDW}, - {"skl", SYCLSupportedIntelArchs::SKL}, - {"kbl", SYCLSupportedIntelArchs::KBL}, - {"cfl", SYCLSupportedIntelArchs::CFL}, - {"apl", SYCLSupportedIntelArchs::APL}, - {"bxt", SYCLSupportedIntelArchs::BXT}, - {"glk", SYCLSupportedIntelArchs::GLK}, - {"whl", SYCLSupportedIntelArchs::WHL}, - {"aml", SYCLSupportedIntelArchs::AML}, - {"cml", SYCLSupportedIntelArchs::CML}, - {"icllp", SYCLSupportedIntelArchs::ICLLP}, - {"icl", SYCLSupportedIntelArchs::ICL}, - {"ehl", SYCLSupportedIntelArchs::EHL}, - {"jsl", SYCLSupportedIntelArchs::JSL}, - {"tgllp", SYCLSupportedIntelArchs::TGLLP}, - {"tgl", SYCLSupportedIntelArchs::TGL}, - {"rkl", SYCLSupportedIntelArchs::RKL}, - {"adl_s", SYCLSupportedIntelArchs::ADL_S}, - {"rpl_s", SYCLSupportedIntelArchs::RPL_S}, - {"adl_p", SYCLSupportedIntelArchs::ADL_P}, - {"adl_n", SYCLSupportedIntelArchs::ADL_N}, - {"dg1", SYCLSupportedIntelArchs::DG1}, - {"acm_g10", SYCLSupportedIntelArchs::ACM_G10}, - {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, - {"acm_g11", SYCLSupportedIntelArchs::ACM_G11}, - {"dg2_g10", SYCLSupportedIntelArchs::DG2_G10}, - {"dg2_g11", SYCLSupportedIntelArchs::DG2_G11}, - {"acm_g12", SYCLSupportedIntelArchs::ACM_G12}, - {"dg2_g12", SYCLSupportedIntelArchs::DG2_G12}, - {"pvc", SYCLSupportedIntelArchs::PVC}, - {"pvc_vg", SYCLSupportedIntelArchs::PVC_VG}, - {"mtl_u", SYCLSupportedIntelArchs::MTL_U}, - {"mtl_s", SYCLSupportedIntelArchs::MTL_S}, - {"arl_u", SYCLSupportedIntelArchs::ARL_U}, - {"arl_s", SYCLSupportedIntelArchs::ARL_S}, - {"mtl_h", SYCLSupportedIntelArchs::MTL_H}, - {"arl_h", SYCLSupportedIntelArchs::ARL_H}, - {"bmg_g21", SYCLSupportedIntelArchs::BMG_G21}, - {"lnl_m", SYCLSupportedIntelArchs::LNL_M}}; - -// Check if the user provided value for --offload-arch is a valid -// SYCL supported Intel AOT target. -SYCLSupportedIntelArchs -clang::driver::StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString) { - auto result = std::find_if( - std::begin(StringToArchNamesMap), std::end(StringToArchNamesMap), - [ArchNameAsString](const StringToOffloadArchSYCLMap &map) { - return ArchNameAsString == map.ArchName; - }); - if (result == std::end(StringToArchNamesMap)) - return SYCLSupportedIntelArchs::UNKNOWN; - return result->IntelArch; -} - -// This is a mapping between the user provided --offload-arch value for Intel -// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU -// AOT compiler). -StringRef clang::driver::mapIntelGPUArchName(StringRef ArchName) { - StringRef Arch; - Arch = llvm::StringSwitch(ArchName) - .Case("bdw", "bdw") - .Case("skl", "skl") - .Case("kbl", "kbl") - .Case("cfl", "cfl") - .Cases("apl", "bxt", "apl") - .Case("glk", "glk") - .Case("whl", "whl") - .Case("aml", "aml") - .Case("cml", "cml") - .Cases("icllp", "icl", "icllp") - .Cases("ehl", "jsl", "ehl") - .Cases("tgllp", "tgl", "tgllp") - .Case("rkl", "rkl") - .Cases("adl_s", "rpl_s", "adl_s") - .Case("adl_p", "adl_p") - .Case("adl_n", "adl_n") - .Case("dg1", "dg1") - .Cases("acm_g10", "dg2_g10", "acm_g10") - .Cases("acm_g11", "dg2_g11", "acm_g11") - .Cases("acm_g12", "dg2_g12", "acm_g12") - .Case("pvc", "pvc") - .Case("pvc_vg", "pvc_vg") - .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u") - .Case("mtl_h", "mtl_h") - .Case("arl_h", "arl_h") - .Case("bmg_g21", "bmg_g21") - .Case("lnl_m", "lnl_m") - .Default(""); - return Arch; -} - SYCLInstallationDetector::SYCLInstallationDetector(const Driver &D) : D(D), InstallationCandidates() { InstallationCandidates.emplace_back(D.Dir + "/.."); diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index ad12d1a444729..280c0bfd6b70d 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -17,114 +17,6 @@ namespace clang { namespace driver { -// List of architectures (Intel CPUs and Intel GPUs) -// that support SYCL offloading. -enum class SYCLSupportedIntelArchs { - // Intel CPUs - UNKNOWN, - SKYLAKEAVX512, - COREAVX2, - COREI7AVX, - COREI7, - WESTMERE, - SANDYBRIDGE, - IVYBRIDGE, - BROADWELL, - COFFEELAKE, - ALDERLAKE, - SKYLAKE, - SKX, - CASCADELAKE, - ICELAKECLIENT, - ICELAKESERVER, - SAPPHIRERAPIDS, - GRANITERAPIDS, - // Intel GPUs - BDW, - SKL, - KBL, - CFL, - APL, - BXT, - GLK, - WHL, - AML, - CML, - ICLLP, - ICL, - EHL, - JSL, - TGLLP, - TGL, - RKL, - ADL_S, - RPL_S, - ADL_P, - ADL_N, - DG1, - ACM_G10, - DG2_G10, - ACM_G11, - DG2_G11, - ACM_G12, - DG2_G12, - PVC, - PVC_VG, - MTL_U, - MTL_S, - ARL_U, - ARL_S, - MTL_H, - ARL_H, - BMG_G21, - LNL_M, -}; - -// Check if the given Arch value is a Generic AMD GPU. -// Currently GFX*_GENERIC AMD GPUs do not support SYCL offloading. -// This list is used to filter out GFX*_GENERIC AMD GPUs in -// `IsSYCLSupportedAMDGPUArch`. -static inline bool IsAMDGenericGPUArch(OffloadArch Arch) { - return Arch == OffloadArch::GFX9_GENERIC || - Arch == OffloadArch::GFX10_1_GENERIC || - Arch == OffloadArch::GFX10_3_GENERIC || - Arch == OffloadArch::GFX11_GENERIC || - Arch == OffloadArch::GFX12_GENERIC; -} - -// Check if the given Arch value is a valid SYCL supported AMD GPU. -static inline bool IsSYCLSupportedAMDGPUArch(OffloadArch Arch) { - return Arch >= OffloadArch::GFX700 && Arch < OffloadArch::AMDGCNSPIRV && - !IsAMDGenericGPUArch(Arch); -} - -// Check if the given Arch value is a valid SYCL supported NVidia GPU. -static inline bool IsSYCLSupportedNVidiaGPUArch(OffloadArch Arch) { - return Arch >= OffloadArch::SM_50 && Arch <= OffloadArch::SM_90a; -} - -// Check if the given Arch value is a valid SYCL supported Intel CPU. -static inline bool IsSYCLSupportedIntelCPUArch(SYCLSupportedIntelArchs Arch) { - return Arch >= SYCLSupportedIntelArchs::SKYLAKEAVX512 && - Arch <= SYCLSupportedIntelArchs::GRANITERAPIDS; -} - -// Check if the given Arch value is a valid SYCL supported Intel GPU. -static inline bool IsSYCLSupportedIntelGPUArch(SYCLSupportedIntelArchs Arch) { - return Arch >= SYCLSupportedIntelArchs::BDW && - Arch <= SYCLSupportedIntelArchs::LNL_M; -} - -// Check if the user provided value for --offload-arch is a valid -// SYCL supported Intel AOT target. -SYCLSupportedIntelArchs -StringToOffloadArchSYCL(llvm::StringRef ArchNameAsString); - -// This is a mapping between the user provided --offload-arch value for Intel -// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU -// AOT compiler). -StringRef mapIntelGPUArchName(StringRef ArchName); - class SYCLInstallationDetector { public: SYCLInstallationDetector(const Driver &D); diff --git a/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp b/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp index df7873fade58f..5c12a4c94b728 100644 --- a/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp +++ b/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp @@ -3,119 +3,10 @@ // SYCL AOT compilation to Intel GPUs using --offload-arch -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=bdw %s 2>&1 | \ +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=bmg_g21 %s 2>&1 | \ // RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=bdw -DMAC_STR=BDW -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=skl %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=skl -DMAC_STR=SKL - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=kbl %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=kbl -DMAC_STR=KBL - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=cfl %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=cfl -DMAC_STR=CFL - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=apl %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=apl -DMAC_STR=APL - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=bxt %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=apl -DMAC_STR=APL - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=glk %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=glk -DMAC_STR=GLK - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=whl %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=whl -DMAC_STR=WHL - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=aml %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=aml -DMAC_STR=AML - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=cml %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=cml -DMAC_STR=CML - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=icllp %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=icllp -DMAC_STR=ICLLP - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=icl %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=icllp -DMAC_STR=ICLLP - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=ehl %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=ehl -DMAC_STR=EHL - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=jsl %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=ehl -DMAC_STR=EHL - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=tgllp %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=tgllp -DMAC_STR=TGLLP - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=tgl %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=tgllp -DMAC_STR=TGLLP - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=rkl %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=rkl -DMAC_STR=RKL - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=adl_s %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=adl_s -DMAC_STR=ADL_S - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=rpl_s %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=adl_s -DMAC_STR=ADL_S - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=adl_p %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=adl_p -DMAC_STR=ADL_P - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=adl_n %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=adl_n -DMAC_STR=ADL_N - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=dg1 %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=dg1 -DMAC_STR=DG1 - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=acm_g10 %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g10 -DMAC_STR=ACM_G10 - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=dg2_g10 %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g10 -DMAC_STR=ACM_G10 - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=acm_g11 %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g11 -DMAC_STR=ACM_G11 - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=dg2_g11 %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g11 -DMAC_STR=ACM_G11 - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=acm_g12 %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g12 -DMAC_STR=ACM_G12 - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=dg2_g12 %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g12 -DMAC_STR=ACM_G12 - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=pvc %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=pvc -DMAC_STR=PVC - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=pvc_vg %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=pvc_vg -DMAC_STR=PVC_VG - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=mtl_u %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_u -DMAC_STR=MTL_U - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=mtl_s %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_u -DMAC_STR=MTL_U - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=arl_u %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_u -DMAC_STR=MTL_U - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=arl_s %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_u -DMAC_STR=MTL_U - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=mtl_h %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_h -DMAC_STR=MTL_H - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=arl_h %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=arl_h -DMAC_STR=ARL_H - -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=bmg_g21 %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=bmg_g21 -DMAC_STR=BMG_G21 -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=lnl_m %s 2>&1 | \ -// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=lnl_m -DMAC_STR=LNL_M // TARGET-TRIPLE-GPU: clang{{.*}} "-triple" "spir64_gen-unknown-unknown" // TARGET-TRIPLE-GPU: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__" From 82dd6d3c60a9cf9796aab7c0e1184feb5ffda234 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Mon, 14 Jul 2025 14:19:26 -0700 Subject: [PATCH 02/13] Add Intel target specific code. --- clang/lib/Driver/Driver.cpp | 82 ++++++++----- .../Driver/sycl-offload-arch-intel-gpu.cpp | 112 +++++++++++++++++- 2 files changed, 163 insertions(+), 31 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 39d9cd6e7bad0..5fed9a64cc2da 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1487,7 +1487,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // Attempt to deduce the offloading triple from the set of architectures. // We need to temporarily create these toolchains so that we can access // tools for inferring architectures. - llvm::DenseSet Archs; + // llvm::DenseSet Archs; for (StringRef Arch : C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) { @@ -1499,16 +1499,25 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, StringToOffloadArch(getProcessorFromTargetID(IntelGPUTriple, Arch))); bool IsIntelCPU = IsIntelCPUOffloadArch( StringToOffloadArch(getProcessorFromTargetID(IntelCPUTriple, Arch))); + if (IsNVPTX) + UniqueSYCLTriplesVec.push_back(NVPTXTriple); + else if (IsAMDGPU) + UniqueSYCLTriplesVec.push_back(AMDTriple); + else if (IsIntelGPU) + UniqueSYCLTriplesVec.push_back(IntelGPUTriple); + else if (IsIntelCPU) + UniqueSYCLTriplesVec.push_back(IntelCPUTriple); + if (!IsNVPTX && !IsAMDGPU && !Arch.empty() && !IsIntelGPU && !IsIntelCPU && !Arch.equals_insensitive("native")) { - Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch; + Diag(clang::diag::err_drv_invalid_sycl_target) << Arch; return; } } for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple, IntelGPUTriple, IntelCPUTriple}) { - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT, + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, C.getDefaultToolChain().getTriple()); llvm::SmallVector Archs = @@ -1516,6 +1525,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, /*SpecificToolchain=*/false); if (!Archs.empty()) { C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); + // if(IsIntelGPU) OffloadArchs[&TC] = Archs; } } @@ -1551,34 +1561,28 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, } */ - // Emit an error if architecture value is not provided - // to --offload-arch. - if (Archs.empty()) { - Diag(clang::diag::err_drv_sycl_offload_arch_missing_value); - return; - } - - for (const auto &TripleAndArchs : DerivedArchs) - SYCLTriples.insert(TripleAndArchs.first()); + /* + for (const auto &TripleAndArchs : DerivedArchs) + SYCLTriples.insert(TripleAndArchs.first()); - for (const auto &Val : SYCLTriples) { - llvm::Triple SYCLTargetTriple(getSYCLDeviceTriple(Val.getKey())); - std::string NormalizedName = SYCLTargetTriple.normalize(); - - // Make sure we don't have a duplicate triple. - auto Duplicate = FoundNormalizedTriples.find(NormalizedName); - if (Duplicate != FoundNormalizedTriples.end()) { - Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) - << Val.getKey() << Duplicate->second; - continue; - } + for (const auto &Val : SYCLTriples) { + llvm::Triple SYCLTargetTriple(getSYCLDeviceTriple(Val.getKey())); + std::string NormalizedName = SYCLTargetTriple.normalize(); - // Store the current triple so that we can check for duplicates in the - // following iterations. - FoundNormalizedTriples[NormalizedName] = Val.getKey(); - UniqueSYCLTriplesVec.push_back(SYCLTargetTriple); - } + // Make sure we don't have a duplicate triple. + auto Duplicate = FoundNormalizedTriples.find(NormalizedName); + if (Duplicate != FoundNormalizedTriples.end()) { + Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) + << Val.getKey() << Duplicate->second; + continue; + } + // Store the current triple so that we can check for duplicates in the + // following iterations. + FoundNormalizedTriples[NormalizedName] = Val.getKey(); + UniqueSYCLTriplesVec.push_back(SYCLTargetTriple); + } + */ addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); } else { @@ -7573,8 +7577,21 @@ static StringRef getCanonicalArchString(Compilation &C, C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) << "HIP" << ArchStr; return StringRef(); + } else if (Triple.isSPIRAOT() && + Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && + (Arch == OffloadArch::UNKNOWN || !IsIntelGPUOffloadArch(Arch))) { + if (SpecificToolchain) + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "spir64_gen" << ArchStr; + return StringRef(); + } else if (Triple.isSPIRAOT() && + Triple.getSubArch() == llvm::Triple::SPIRSubArch_x86_64 && + (Arch == OffloadArch::UNKNOWN || !IsIntelCPUOffloadArch(Arch))) { + if (SpecificToolchain) + C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) + << "spir64_x86_64" << ArchStr; + return StringRef(); } - if (IsNVIDIAOffloadArch(Arch)) return Args.MakeArgStringRef(OffloadArchToString(Arch)); @@ -7587,6 +7604,13 @@ static StringRef getCanonicalArchString(Compilation &C, } return Args.MakeArgStringRef(getCanonicalTargetID(*Arch, Features)); } + if (IsIntelGPUOffloadArch(Arch)) { + return Args.MakeArgStringRef(ArchStr); + } + + if (IsIntelCPUOffloadArch(Arch)) { + return Args.MakeArgStringRef(ArchStr); + } // If the input isn't CUDA or HIP just return the architecture. return ArchStr; diff --git a/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp b/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp index 5c12a4c94b728..f04e82a4ffb54 100644 --- a/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp +++ b/clang/test/Driver/sycl-offload-arch-intel-gpu.cpp @@ -3,13 +3,121 @@ // SYCL AOT compilation to Intel GPUs using --offload-arch -// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=bmg_g21 %s 2>&1 | \ +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=bdw %s 2>&1 | \ // RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=bdw -DMAC_STR=BDW +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=skl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=skl -DMAC_STR=SKL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=kbl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=kbl -DMAC_STR=KBL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=cfl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=cfl -DMAC_STR=CFL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=apl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=apl -DMAC_STR=APL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=bxt %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=apl -DMAC_STR=APL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=glk %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=glk -DMAC_STR=GLK + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=whl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=whl -DMAC_STR=WHL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=aml %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=aml -DMAC_STR=AML + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=cml %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=cml -DMAC_STR=CML + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=icllp %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=icllp -DMAC_STR=ICLLP + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=icl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=icllp -DMAC_STR=ICLLP + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=ehl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=ehl -DMAC_STR=EHL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=jsl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=ehl -DMAC_STR=EHL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=tgllp %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=tgllp -DMAC_STR=TGLLP + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=tgl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=tgllp -DMAC_STR=TGLLP + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=rkl %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=rkl -DMAC_STR=RKL + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=adl_s %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=adl_s -DMAC_STR=ADL_S + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=rpl_s %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=adl_s -DMAC_STR=ADL_S + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=adl_p %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=adl_p -DMAC_STR=ADL_P +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=adl_n %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=adl_n -DMAC_STR=ADL_N + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=dg1 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=dg1 -DMAC_STR=DG1 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=acm_g10 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g10 -DMAC_STR=ACM_G10 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=dg2_g10 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g10 -DMAC_STR=ACM_G10 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=acm_g11 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g11 -DMAC_STR=ACM_G11 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=dg2_g11 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g11 -DMAC_STR=ACM_G11 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=acm_g12 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g12 -DMAC_STR=ACM_G12 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=dg2_g12 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU -DDEV_STR=acm_g12 -DMAC_STR=ACM_G12 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=pvc %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=pvc -DMAC_STR=PVC + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=pvc_vg %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=pvc_vg -DMAC_STR=PVC_VG + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=mtl_u %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_u -DMAC_STR=MTL_U + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=mtl_s %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_u -DMAC_STR=MTL_U + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=arl_u %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_u -DMAC_STR=MTL_U + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=arl_s %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_u -DMAC_STR=MTL_U + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=mtl_h %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=mtl_h -DMAC_STR=MTL_H + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=arl_h %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=arl_h -DMAC_STR=ARL_H + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=bmg_g21 %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=bmg_g21 -DMAC_STR=BMG_G21 + +// RUN: %clangxx -### --offload-new-driver -fsycl --offload-arch=lnl_m %s 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=TARGET-TRIPLE-GPU,CLANG-OFFLOAD-PACKAGER-GPU-OPTS -DDEV_STR=lnl_m -DMAC_STR=LNL_M // TARGET-TRIPLE-GPU: clang{{.*}} "-triple" "spir64_gen-unknown-unknown" // TARGET-TRIPLE-GPU: "-D__SYCL_TARGET_INTEL_GPU_[[MAC_STR]]__" // CLANG-OFFLOAD-PACKAGER-GPU: clang-offload-packager{{.*}} "--image={{.*}}triple=spir64_gen-unknown-unknown,arch=[[DEV_STR]],kind=sycl" // CLANG-OFFLOAD-PACKAGER-GPU-OPTS: clang-offload-packager{{.*}} "--image={{.*}}triple=spir64_gen-unknown-unknown,arch=[[DEV_STR]],kind=sycl{{.*}}" - From 87a9fcc1beee077d6698bb1ccbb0e963fe8ba4df Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Tue, 15 Jul 2025 18:47:02 -0700 Subject: [PATCH 03/13] Fix target macro generation. --- clang/include/clang/Basic/OffloadArch.h | 3 +- clang/lib/Driver/Driver.cpp | 73 ++-------- clang/lib/Driver/ToolChains/SYCL.cpp | 179 ++++++++++++------------ 3 files changed, 102 insertions(+), 153 deletions(-) diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h index 5c91128eb47db..de766898378ad 100644 --- a/clang/include/clang/Basic/OffloadArch.h +++ b/clang/include/clang/Basic/OffloadArch.h @@ -175,7 +175,8 @@ static inline bool IsAMDOffloadArch(OffloadArch A) { } static inline bool IsIntelCPUOffloadArch(OffloadArch Arch) { - return Arch >= OffloadArch::SKYLAKEAVX512 && Arch < OffloadArch::BMG_G21; + return Arch >= OffloadArch::SKYLAKEAVX512 && + Arch <= OffloadArch::GRANITERAPIDS; } static inline bool IsIntelGPUOffloadArch(OffloadArch Arch) { diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 5fed9a64cc2da..5416f3b1520c9 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1206,6 +1206,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch))); bool IsAMDGPU = IsAMDOffloadArch( StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch))); + if (!IsNVPTX && !IsAMDGPU && !Arch.empty() && !Arch.equals_insensitive("native")) { Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch; @@ -1222,6 +1223,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, llvm::SmallVector Archs = getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, /*SpecificToolchain=*/false); + if (!Archs.empty()) { C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); OffloadArchs[&TC] = Archs; @@ -1487,7 +1489,6 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // Attempt to deduce the offloading triple from the set of architectures. // We need to temporarily create these toolchains so that we can access // tools for inferring architectures. - // llvm::DenseSet Archs; for (StringRef Arch : C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) { @@ -1499,14 +1500,6 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, StringToOffloadArch(getProcessorFromTargetID(IntelGPUTriple, Arch))); bool IsIntelCPU = IsIntelCPUOffloadArch( StringToOffloadArch(getProcessorFromTargetID(IntelCPUTriple, Arch))); - if (IsNVPTX) - UniqueSYCLTriplesVec.push_back(NVPTXTriple); - else if (IsAMDGPU) - UniqueSYCLTriplesVec.push_back(AMDTriple); - else if (IsIntelGPU) - UniqueSYCLTriplesVec.push_back(IntelGPUTriple); - else if (IsIntelCPU) - UniqueSYCLTriplesVec.push_back(IntelCPUTriple); if (!IsNVPTX && !IsAMDGPU && !Arch.empty() && !IsIntelGPU && !IsIntelCPU && !Arch.equals_insensitive("native")) { @@ -1525,64 +1518,20 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, /*SpecificToolchain=*/false); if (!Archs.empty()) { C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - // if(IsIntelGPU) OffloadArchs[&TC] = Archs; } } - /* for (StringRef Arch : Archs) { - if (NVPTXTriple && IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch( - getProcessorFromTargetID(*NVPTXTriple, Arch)))) - { DerivedArchs[NVPTXTriple->getTriple()].insert(Arch); } else if - (AMDTriple && IsSYCLSupportedAMDGPUArch(StringToOffloadArch( - getProcessorFromTargetID(*AMDTriple, Arch)))) { - DerivedArchs[AMDTriple->getTriple()].insert(Arch); - } else if (IsSYCLSupportedIntelCPUArch(StringToOffloadArchSYCL(Arch))) - { DerivedArchs[getSYCLDeviceTriple("spir64_x86_64").getTriple()].insert( - Arch); - } else if (IsSYCLSupportedIntelGPUArch(StringToOffloadArchSYCL(Arch))) - { StringRef IntelGPUArch; - // For Intel Graphics AOT target, valid values for '--offload-arch' - // are mapped to valid device names accepted by OCLOC (the Intel GPU - AOT - // compiler) via the '-device' option. The mapIntelGPUArchName - // function maps the accepted values for '--offload-arch' to enable - SYCL - // offloading to Intel GPUs and the corresponding '-device' value - passed - // to OCLOC. - IntelGPUArch = mapIntelGPUArchName(Arch).data(); - DerivedArchs[getSYCLDeviceTriple("spir64_gen").getTriple()].insert( - IntelGPUArch); - } else { - Diag(clang::diag::err_drv_invalid_sycl_target) << Arch; - return; - } - } - */ - - /* - for (const auto &TripleAndArchs : DerivedArchs) - SYCLTriples.insert(TripleAndArchs.first()); - - for (const auto &Val : SYCLTriples) { - llvm::Triple SYCLTargetTriple(getSYCLDeviceTriple(Val.getKey())); - std::string NormalizedName = SYCLTargetTriple.normalize(); + auto TCRange = C.getOffloadToolChains(Action::OFK_SYCL); + if (TCRange.first == TCRange.second) { + Diag(clang::diag::err_drv_sycl_offload_arch_missing_value); + return; + } - // Make sure we don't have a duplicate triple. - auto Duplicate = FoundNormalizedTriples.find(NormalizedName); - if (Duplicate != FoundNormalizedTriples.end()) { - Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) - << Val.getKey() << Duplicate->second; - continue; - } + for (const auto &ToolChainAndArchs : OffloadArchs) { + UniqueSYCLTriplesVec.push_back(ToolChainAndArchs.first->getTriple()); + } - // Store the current triple so that we can check for duplicates in the - // following iterations. - FoundNormalizedTriples[NormalizedName] = Val.getKey(); - UniqueSYCLTriplesVec.push_back(SYCLTargetTriple); - } - */ addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); } else { @@ -7561,8 +7510,6 @@ static StringRef getCanonicalArchString(Compilation &C, bool SpecificToolchain) { // Lookup the CUDA / HIP architecture string. Only report an error if we were // expecting the triple to be only NVPTX / AMDGPU. - // Arch = Intels arch bdw - // Triple = nvidia triple OffloadArch Arch = StringToOffloadArch(getProcessorFromTargetID(Triple, ArchStr)); if (Triple.isNVPTX() && diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index bd8a3df933164..071c0944ed237 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -1275,95 +1275,96 @@ StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) { SmallString<64> SYCL::gen::getGenDeviceMacro(StringRef DeviceName) { SmallString<64> Macro; - StringRef Ext = llvm::StringSwitch(DeviceName) - .Case("bdw", "INTEL_GPU_BDW") - .Case("skl", "INTEL_GPU_SKL") - .Case("kbl", "INTEL_GPU_KBL") - .Case("cfl", "INTEL_GPU_CFL") - .Case("apl", "INTEL_GPU_APL") - .Case("glk", "INTEL_GPU_GLK") - .Case("whl", "INTEL_GPU_WHL") - .Case("aml", "INTEL_GPU_AML") - .Case("cml", "INTEL_GPU_CML") - .Case("icllp", "INTEL_GPU_ICLLP") - .Case("ehl", "INTEL_GPU_EHL") - .Case("tgllp", "INTEL_GPU_TGLLP") - .Case("rkl", "INTEL_GPU_RKL") - .Case("adl_s", "INTEL_GPU_ADL_S") - .Case("adl_p", "INTEL_GPU_ADL_P") - .Case("adl_n", "INTEL_GPU_ADL_N") - .Case("dg1", "INTEL_GPU_DG1") - .Case("acm_g10", "INTEL_GPU_ACM_G10") - .Case("acm_g11", "INTEL_GPU_ACM_G11") - .Case("acm_g12", "INTEL_GPU_ACM_G12") - .Case("pvc", "INTEL_GPU_PVC") - .Case("pvc_vg", "INTEL_GPU_PVC_VG") - .Case("mtl_u", "INTEL_GPU_MTL_U") - .Case("mtl_h", "INTEL_GPU_MTL_H") - .Case("arl_h", "INTEL_GPU_ARL_H") - .Case("bmg_g21", "INTEL_GPU_BMG_G21") - .Case("bmg_g31", "INTEL_GPU_BMG_G31") - .Case("lnl_m", "INTEL_GPU_LNL_M") - .Case("ptl_h", "INTEL_GPU_PTL_H") - .Case("ptl_u", "INTEL_GPU_PTL_U") - .Case("wcl", "INTEL_GPU_WCL") - .Case("sm_50", "NVIDIA_GPU_SM_50") - .Case("sm_52", "NVIDIA_GPU_SM_52") - .Case("sm_53", "NVIDIA_GPU_SM_53") - .Case("sm_60", "NVIDIA_GPU_SM_60") - .Case("sm_61", "NVIDIA_GPU_SM_61") - .Case("sm_62", "NVIDIA_GPU_SM_62") - .Case("sm_70", "NVIDIA_GPU_SM_70") - .Case("sm_72", "NVIDIA_GPU_SM_72") - .Case("sm_75", "NVIDIA_GPU_SM_75") - .Case("sm_80", "NVIDIA_GPU_SM_80") - .Case("sm_86", "NVIDIA_GPU_SM_86") - .Case("sm_87", "NVIDIA_GPU_SM_87") - .Case("sm_89", "NVIDIA_GPU_SM_89") - .Case("sm_90", "NVIDIA_GPU_SM_90") - .Case("sm_90a", "NVIDIA_GPU_SM_90A") - .Case("gfx700", "AMD_GPU_GFX700") - .Case("gfx701", "AMD_GPU_GFX701") - .Case("gfx702", "AMD_GPU_GFX702") - .Case("gfx703", "AMD_GPU_GFX703") - .Case("gfx704", "AMD_GPU_GFX704") - .Case("gfx705", "AMD_GPU_GFX705") - .Case("gfx801", "AMD_GPU_GFX801") - .Case("gfx802", "AMD_GPU_GFX802") - .Case("gfx803", "AMD_GPU_GFX803") - .Case("gfx805", "AMD_GPU_GFX805") - .Case("gfx810", "AMD_GPU_GFX810") - .Case("gfx900", "AMD_GPU_GFX900") - .Case("gfx902", "AMD_GPU_GFX902") - .Case("gfx904", "AMD_GPU_GFX904") - .Case("gfx906", "AMD_GPU_GFX906") - .Case("gfx908", "AMD_GPU_GFX908") - .Case("gfx909", "AMD_GPU_GFX909") - .Case("gfx90a", "AMD_GPU_GFX90A") - .Case("gfx90c", "AMD_GPU_GFX90C") - .Case("gfx940", "AMD_GPU_GFX940") - .Case("gfx941", "AMD_GPU_GFX941") - .Case("gfx942", "AMD_GPU_GFX942") - .Case("gfx1010", "AMD_GPU_GFX1010") - .Case("gfx1011", "AMD_GPU_GFX1011") - .Case("gfx1012", "AMD_GPU_GFX1012") - .Case("gfx1013", "AMD_GPU_GFX1013") - .Case("gfx1030", "AMD_GPU_GFX1030") - .Case("gfx1031", "AMD_GPU_GFX1031") - .Case("gfx1032", "AMD_GPU_GFX1032") - .Case("gfx1033", "AMD_GPU_GFX1033") - .Case("gfx1034", "AMD_GPU_GFX1034") - .Case("gfx1035", "AMD_GPU_GFX1035") - .Case("gfx1036", "AMD_GPU_GFX1036") - .Case("gfx1100", "AMD_GPU_GFX1100") - .Case("gfx1101", "AMD_GPU_GFX1101") - .Case("gfx1102", "AMD_GPU_GFX1102") - .Case("gfx1103", "AMD_GPU_GFX1103") - .Case("gfx1150", "AMD_GPU_GFX1150") - .Case("gfx1151", "AMD_GPU_GFX1151") - .Case("gfx1200", "AMD_GPU_GFX1200") - .Case("gfx1201", "AMD_GPU_GFX1201") - .Default(""); + StringRef Ext = + llvm::StringSwitch(DeviceName) + .Case("bdw", "INTEL_GPU_BDW") + .Case("skl", "INTEL_GPU_SKL") + .Case("kbl", "INTEL_GPU_KBL") + .Case("cfl", "INTEL_GPU_CFL") + .Cases("apl", "bxt", "INTEL_GPU_APL") + .Case("glk", "INTEL_GPU_GLK") + .Case("whl", "INTEL_GPU_WHL") + .Case("aml", "INTEL_GPU_AML") + .Case("cml", "INTEL_GPU_CML") + .Cases("icllp", "icl", "INTEL_GPU_ICLLP") + .Cases("ehl", "jsl", "INTEL_GPU_EHL") + .Cases("tgllp", "tgl", "INTEL_GPU_TGLLP") + .Case("rkl", "INTEL_GPU_RKL") + .Cases("adl_s", "rpl_s", "INTEL_GPU_ADL_S") + .Case("adl_p", "INTEL_GPU_ADL_P") + .Case("adl_n", "INTEL_GPU_ADL_N") + .Case("dg1", "INTEL_GPU_DG1") + .Cases("acm_g10", "dg2_g10", "INTEL_GPU_ACM_G10") + .Cases("acm_g11", "dg2_g11", "INTEL_GPU_ACM_G11") + .Cases("acm_g12", "dg2_g12", "INTEL_GPU_ACM_G12") + .Case("pvc", "INTEL_GPU_PVC") + .Case("pvc_vg", "INTEL_GPU_PVC_VG") + .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "INTEL_GPU_MTL_U") + .Case("mtl_h", "INTEL_GPU_MTL_H") + .Case("arl_h", "INTEL_GPU_ARL_H") + .Case("bmg_g21", "INTEL_GPU_BMG_G21") + .Case("bmg_g31", "INTEL_GPU_BMG_G31") + .Case("lnl_m", "INTEL_GPU_LNL_M") + .Case("ptl_h", "INTEL_GPU_PTL_H") + .Case("ptl_u", "INTEL_GPU_PTL_U") + .Case("wcl", "INTEL_GPU_WCL") + .Case("sm_50", "NVIDIA_GPU_SM_50") + .Case("sm_52", "NVIDIA_GPU_SM_52") + .Case("sm_53", "NVIDIA_GPU_SM_53") + .Case("sm_60", "NVIDIA_GPU_SM_60") + .Case("sm_61", "NVIDIA_GPU_SM_61") + .Case("sm_62", "NVIDIA_GPU_SM_62") + .Case("sm_70", "NVIDIA_GPU_SM_70") + .Case("sm_72", "NVIDIA_GPU_SM_72") + .Case("sm_75", "NVIDIA_GPU_SM_75") + .Case("sm_80", "NVIDIA_GPU_SM_80") + .Case("sm_86", "NVIDIA_GPU_SM_86") + .Case("sm_87", "NVIDIA_GPU_SM_87") + .Case("sm_89", "NVIDIA_GPU_SM_89") + .Case("sm_90", "NVIDIA_GPU_SM_90") + .Case("sm_90a", "NVIDIA_GPU_SM_90A") + .Case("gfx700", "AMD_GPU_GFX700") + .Case("gfx701", "AMD_GPU_GFX701") + .Case("gfx702", "AMD_GPU_GFX702") + .Case("gfx703", "AMD_GPU_GFX703") + .Case("gfx704", "AMD_GPU_GFX704") + .Case("gfx705", "AMD_GPU_GFX705") + .Case("gfx801", "AMD_GPU_GFX801") + .Case("gfx802", "AMD_GPU_GFX802") + .Case("gfx803", "AMD_GPU_GFX803") + .Case("gfx805", "AMD_GPU_GFX805") + .Case("gfx810", "AMD_GPU_GFX810") + .Case("gfx900", "AMD_GPU_GFX900") + .Case("gfx902", "AMD_GPU_GFX902") + .Case("gfx904", "AMD_GPU_GFX904") + .Case("gfx906", "AMD_GPU_GFX906") + .Case("gfx908", "AMD_GPU_GFX908") + .Case("gfx909", "AMD_GPU_GFX909") + .Case("gfx90a", "AMD_GPU_GFX90A") + .Case("gfx90c", "AMD_GPU_GFX90C") + .Case("gfx940", "AMD_GPU_GFX940") + .Case("gfx941", "AMD_GPU_GFX941") + .Case("gfx942", "AMD_GPU_GFX942") + .Case("gfx1010", "AMD_GPU_GFX1010") + .Case("gfx1011", "AMD_GPU_GFX1011") + .Case("gfx1012", "AMD_GPU_GFX1012") + .Case("gfx1013", "AMD_GPU_GFX1013") + .Case("gfx1030", "AMD_GPU_GFX1030") + .Case("gfx1031", "AMD_GPU_GFX1031") + .Case("gfx1032", "AMD_GPU_GFX1032") + .Case("gfx1033", "AMD_GPU_GFX1033") + .Case("gfx1034", "AMD_GPU_GFX1034") + .Case("gfx1035", "AMD_GPU_GFX1035") + .Case("gfx1036", "AMD_GPU_GFX1036") + .Case("gfx1100", "AMD_GPU_GFX1100") + .Case("gfx1101", "AMD_GPU_GFX1101") + .Case("gfx1102", "AMD_GPU_GFX1102") + .Case("gfx1103", "AMD_GPU_GFX1103") + .Case("gfx1150", "AMD_GPU_GFX1150") + .Case("gfx1151", "AMD_GPU_GFX1151") + .Case("gfx1200", "AMD_GPU_GFX1200") + .Case("gfx1201", "AMD_GPU_GFX1201") + .Default(""); if (!Ext.empty()) { Macro = "__SYCL_TARGET_"; Macro += Ext; From c2056bb8d87b93e7c1e7f8f0d0bc8826d812e5d5 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Mon, 21 Jul 2025 17:44:36 -0700 Subject: [PATCH 04/13] Add logic for fsycl-targets flow. --- clang/include/clang/Basic/OffloadArch.h | 1 + clang/lib/Basic/OffloadArch.cpp | 10 +- clang/lib/Driver/Driver.cpp | 191 ++++++++------- clang/lib/Driver/ToolChains/Clang.cpp | 18 +- clang/lib/Driver/ToolChains/SYCL.cpp | 37 +++ clang/lib/Driver/ToolChains/SYCL.h | 5 + clang/test/Driver/openmp-offload-gpu.c | 308 ------------------------ clang/test/Driver/sycl-offload.c | 174 +------------ 8 files changed, 181 insertions(+), 563 deletions(-) diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h index de766898378ad..c8d7b6a633186 100644 --- a/clang/include/clang/Basic/OffloadArch.h +++ b/clang/include/clang/Basic/OffloadArch.h @@ -11,6 +11,7 @@ namespace llvm { class StringRef; +template class StringSwitch; } // namespace llvm namespace clang { diff --git a/clang/lib/Basic/OffloadArch.cpp b/clang/lib/Basic/OffloadArch.cpp index ccba7e8a29e46..edfff61f21f5e 100644 --- a/clang/lib/Basic/OffloadArch.cpp +++ b/clang/lib/Basic/OffloadArch.cpp @@ -89,9 +89,9 @@ static const OffloadArchToStringMap ArchNames[] = { GFX(1250), // gfx1250 {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, // Intel CPUs - {OffloadArch::SKYLAKEAVX512, "skylakeavx512", ""}, - {OffloadArch::COREAVX2, "coreavx2", ""}, - {OffloadArch::COREI7AVX, "corei7avx", ""}, + {OffloadArch::SKYLAKEAVX512, "skylake-avx512", ""}, + {OffloadArch::COREAVX2, "core-avx2", ""}, + {OffloadArch::COREI7AVX, "corei7-avx", ""}, {OffloadArch::COREI7, "corei7", ""}, {OffloadArch::WESTMERE, "westmere", ""}, {OffloadArch::SANDYBRIDGE, "sandybridge", ""}, @@ -102,8 +102,8 @@ static const OffloadArchToStringMap ArchNames[] = { {OffloadArch::SKYLAKE, "skylake", ""}, {OffloadArch::SKX, "skx", ""}, {OffloadArch::CASCADELAKE, "cascadelake", ""}, - {OffloadArch::ICELAKECLIENT, "icelakeclient", ""}, - {OffloadArch::ICELAKESERVER, "icelakeserver", ""}, + {OffloadArch::ICELAKECLIENT, "icelake-client", ""}, + {OffloadArch::ICELAKESERVER, "icelake-server", ""}, {OffloadArch::SAPPHIRERAPIDS, "sapphirerapids", ""}, {OffloadArch::GRANITERAPIDS, "graniterapids", ""}, // Intel GPUS diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 5416f3b1520c9..36417fd54a5af 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1381,85 +1381,47 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, << SYCLTargetsValues->getAsString(C.getInputArgs()) << SYCLForceTarget->getAsString(C.getInputArgs()); - for (StringRef Val : SYCLTargetsValues->getValues()) { - StringRef Arch; - StringRef UserTargetName(Val); - if (auto Device = gen::isGPUTarget(Val)) { - if (Device->empty()) { - Diag(clang::diag::err_drv_invalid_sycl_target) << Val; - continue; - } - Arch = Device->data(); - UserTargetName = "spir64_gen"; - } else if (auto Device = gen::isGPUTarget(Val)) { - if (Device->empty()) { - Diag(clang::diag::err_drv_invalid_sycl_target) << Val; - continue; - } - Arch = Device->data(); - UserTargetName = "nvptx64-nvidia-cuda"; - } else if (auto Device = gen::isGPUTarget(Val)) { - if (Device->empty()) { - Diag(clang::diag::err_drv_invalid_sycl_target) << Val; - continue; - } - Arch = Device->data(); - UserTargetName = "amdgcn-amd-amdhsa"; - } + std::multiset SYCLTriples; + for (StringRef SYCLTargetTriple : SYCLTargetsValues->getValues()) + SYCLTriples.insert(SYCLTargetTriple); + + llvm::StringMap FoundNormalizedTriples; + llvm::Triple TT; + for (StringRef Triple : SYCLTriples) { + + if (Triple.starts_with("intel_gpu_")) { + TT = getSYCLDeviceTriple("spir64_gen"); + } else if (Triple.starts_with("nvidia_gpu_")) { + TT = getSYCLDeviceTriple("nvptx64-nvidia-cuda"); + } else if (Triple.starts_with("amd_gpu_")) { + TT = getSYCLDeviceTriple("amdgcn-amd-amdhsa"); + } else + TT = getSYCLDeviceTriple(Triple); - llvm::Triple DeviceTriple(getSYCLDeviceTriple(UserTargetName)); - if (!isValidSYCLTriple(DeviceTriple)) { - Diag(clang::diag::err_drv_invalid_sycl_target) << Val; - continue; - } + std::string NormalizedName = TT.normalize(); - // For any -fsycl-targets=spir64_gen additions, we will scan the - // additional -X* options for potential -device settings. These - // need to be added as a known Arch to the packager. - if (DeviceTriple.isSPIRAOT() && Arch.empty() && - DeviceTriple.getSubArch() == llvm::Triple::SPIRSubArch_gen) { - const ToolChain *HostTC = - C.getSingleOffloadToolChain(); - auto DeviceTC = std::make_unique( - *this, DeviceTriple, *HostTC, C.getInputArgs()); - assert(DeviceTC && "Device toolchain not defined."); - ArgStringList TargetArgs; - DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), - C.getInputArgs(), TargetArgs); - // Look for -device and use that as the known arch to - // be associated with the current spir64_gen entry. Grab the - // right most entry. - for (int i = TargetArgs.size() - 2; i >= 0; --i) { - if (StringRef(TargetArgs[i]) == "-device") { - Arch = TargetArgs[i + 1]; - break; - } - } - } + auto [TripleIt, Inserted] = + FoundNormalizedTriples.try_emplace(NormalizedName, Triple); - // Make sure we don't have a duplicate triple. - std::string NormalizedName = getSYCLDeviceTriple(Val).normalize(); - auto Duplicate = FoundNormalizedTriples.find(NormalizedName); - if (Duplicate != FoundNormalizedTriples.end()) { + if (!Inserted) { Diag(clang::diag::warn_drv_sycl_offload_target_duplicate) - << Val << Duplicate->second; + << Triple << TripleIt->second; continue; } - // Store the current triple so that we can check for duplicates in - // the following iterations. - FoundNormalizedTriples[NormalizedName] = Val; - SYCLTriples.insert(DeviceTriple.normalize()); - if (!Arch.empty()) - DerivedArchs[DeviceTriple.getTriple()].insert(Arch); - } - if (!SYCLTriples.empty()) { - for (const auto &SYCLTriple : SYCLTriples) { - llvm::Triple Triple(SYCLTriple.getKey()); - UniqueSYCLTriplesVec.push_back(Triple); + // If the specified target is invalid, emit a diagnostic. + if (!isValidSYCLTriple(TT)) { + Diag(clang::diag::err_drv_invalid_sycl_target) << Triple; + continue; } + + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, + C.getDefaultToolChain().getTriple()); + C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); + OffloadArchs[&TC] = + getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, + /*SpecificToolchain=*/true); } - addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); } else Diag(clang::diag::warn_drv_empty_joined_argument) << SYCLTargetsValues->getAsString(C.getInputArgs()); @@ -1528,11 +1490,10 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, return; } - for (const auto &ToolChainAndArchs : OffloadArchs) { - UniqueSYCLTriplesVec.push_back(ToolChainAndArchs.first->getTriple()); - } - - addSYCLDefaultTriple(C, UniqueSYCLTriplesVec); + /* for (const auto &ToolChainAndArchs : OffloadArchs) { + UniqueSYCLTriplesVec.push_back(ToolChainAndArchs.first->getTriple()); + } + */ } else { // If -fsycl is supplied without -fsycl-targets we will assume SPIR-V. @@ -1561,13 +1522,17 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, // We'll need to use the SYCL and host triples as the key into // getOffloadingDeviceToolChain, because the device toolchains we're // going to create will depend on both. - const ToolChain *HostTC = C.getSingleOffloadToolChain(); - for (const auto &TT : UniqueSYCLTriplesVec) { - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, - HostTC->getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/true); + if ((IsSYCL && !C.getInputArgs().hasArg(options::OPT_offload_arch_EQ)) && + !HasSYCLTargetsOption) { + const ToolChain *HostTC = C.getSingleOffloadToolChain(); + for (const auto &TT : UniqueSYCLTriplesVec) { + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT, + HostTC->getTriple()); + C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); + + OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, + /*SpecificToolchain=*/true); + } } // @@ -7628,6 +7593,66 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } } + if (Arg->getOption().matches(options::OPT_fsycl_targets_EQ)) { + for (StringRef SYCLTargetValue : Arg->getValues()) { + StringRef Arch; + if (auto Device = + tools::SYCL::gen::isGPUTarget( + SYCLTargetValue)) { + if (Device->empty()) { + Diag(clang::diag::err_drv_invalid_sycl_target) << SYCLTargetValue; + continue; + } + Arch = Device->data(); + } else if (auto Device = tools::SYCL::gen::isGPUTarget< + tools::SYCL::gen::NvidiaGPU>(SYCLTargetValue)) { + if (Device->empty()) { + Diag(clang::diag::err_drv_invalid_sycl_target) << SYCLTargetValue; + continue; + } + Arch = Device->data(); + } else if (auto Device = tools::SYCL::gen::isGPUTarget< + clang::driver::tools::SYCL::gen::AmdGPU>( + SYCLTargetValue)) { + if (Device->empty()) { + Diag(clang::diag::err_drv_invalid_sycl_target) << SYCLTargetValue; + continue; + } + Arch = Device->data(); + } else { + Arch = StringRef(); + } + + /* + if (getSYCLDeviceTriple(SYCLTargetValue).isSPIRAOT() && + Arch.empty() && TC->getTriple().getSubArch() == + llvm::Triple::SPIRSubArch_gen) { const ToolChain *HostTC = + C.getSingleOffloadToolChain(); + auto DeviceTC = + std::make_unique( *this, TC->getTriple(), + *HostTC, C.getInputArgs()); assert(DeviceTC && "Device toolchain not + defined."); ArgStringList TargetArgs; + DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), + C.getInputArgs(), + TargetArgs); + // Look for -device and use that as the known + arch to + // be associated with the current spir64_gen entry. Grab + the + // right most entry. + for (int i = TargetArgs.size() - 2; i >= 0; --i) { + if (StringRef(TargetArgs[i]) == "-device") { + Arch = TargetArgs[i + 1]; + break; + } + } + } + */ + if (!Arch.empty()) + Archs.insert(Arch); + } + } + // Add or remove the seen architectures in order of appearance. If an // invalid architecture is given we simply exit. if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { @@ -7658,7 +7683,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, C, Args, Arch, TC->getTriple(), SpecificToolchain); if (!CanonicalStr.empty()) Archs.insert(CanonicalStr); - else if (SpecificToolchain) + else if (CanonicalStr.empty() && SpecificToolchain) return llvm::SmallVector(); } } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a9fa102cbf90c..44f8bad7c29a1 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10329,9 +10329,21 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA, if (Input.getType() == types::TY_Tempfilelist) File = C.getArgs().MakeArgString("@" + File); - StringRef Arch = OffloadAction->getOffloadingArch() - ? OffloadAction->getOffloadingArch() - : TCArgs.getLastArgValue(options::OPT_march_EQ); + // StringRef Arch = OffloadAction->getOffloadingArch() + // ? OffloadAction->getOffloadingArch() + // : TCArgs.getLastArgValue(options::OPT_march_EQ); + + StringRef Arch; + if (OffloadAction->getOffloadingArch()) { + if (TC->getTripleString() == "spir64_gen-unknown-unknown") { + Arch = mapIntelGPUArchName(OffloadAction->getOffloadingArch()); + } else { + Arch = OffloadAction->getOffloadingArch(); + } + } else { + TCArgs.getLastArgValue(options::OPT_march_EQ); + } + StringRef Kind = Action::GetOffloadKindName(OffloadAction->getOffloadingDeviceKind()); diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 071c0944ed237..539e5c35495bf 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -1273,6 +1273,43 @@ StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) { return Device; } +// This is a mapping between the user provided --offload-arch value for Intel +// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU +// AOT compiler). +StringRef clang::driver::mapIntelGPUArchName(StringRef ArchName) { + StringRef Arch; + Arch = llvm::StringSwitch(ArchName) + .Case("bdw", "bdw") + .Case("skl", "skl") + .Case("kbl", "kbl") + .Case("cfl", "cfl") + .Cases("apl", "bxt", "apl") + .Case("glk", "glk") + .Case("whl", "whl") + .Case("aml", "aml") + .Case("cml", "cml") + .Cases("icllp", "icl", "icllp") + .Cases("ehl", "jsl", "ehl") + .Cases("tgllp", "tgl", "tgllp") + .Case("rkl", "rkl") + .Cases("adl_s", "rpl_s", "adl_s") + .Case("adl_p", "adl_p") + .Case("adl_n", "adl_n") + .Case("dg1", "dg1") + .Cases("acm_g10", "dg2_g10", "acm_g10") + .Cases("acm_g11", "dg2_g11", "acm_g11") + .Cases("acm_g12", "dg2_g12", "acm_g12") + .Case("pvc", "pvc") + .Case("pvc_vg", "pvc_vg") + .Cases("mtl_u", "mtl_s", "arl_u", "arl_s", "mtl_u") + .Case("mtl_h", "mtl_h") + .Case("arl_h", "arl_h") + .Case("bmg_g21", "bmg_g21") + .Case("lnl_m", "lnl_m") + .Default(""); + return Arch; +} + SmallString<64> SYCL::gen::getGenDeviceMacro(StringRef DeviceName) { SmallString<64> Macro; StringRef Ext = diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index 280c0bfd6b70d..468eb88c38399 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -17,6 +17,11 @@ namespace clang { namespace driver { +// This is a mapping between the user provided --offload-arch value for Intel +// GPU targets and the spir64_gen device name accepted by OCLOC (the Intel GPU +// AOT compiler). +StringRef mapIntelGPUArchName(StringRef ArchName); + class SYCLInstallationDetector { public: SYCLInstallationDetector(const Driver &D); diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index b6a9bf306f0e4..64404d7beca4e 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -1,311 +1,3 @@ -/// -/// Perform several driver tests for OpenMP offloading -/// - -/// ########################################################################### - -/// Check -Xopenmp-target uses one of the archs provided when several archs are used. -// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc \ -// RUN: -Xopenmp-target -march=sm_52 -Xopenmp-target -march=sm_60 %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ARCHS %s - -// CHK-FOPENMP-TARGET-ARCHS: ptxas{{.*}}" "--gpu-name" "sm_60" - -/// ########################################################################### - -/// Check -Xopenmp-target -march=sm_52 works as expected when two triples are present. -// RUN: %clang -### -fopenmp=libomp \ -// RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda \ -// RUN: -nogpulib -nogpuinc -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-COMPILATION %s - -// CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_52" - -/// Check PTXAS is passed -c flag when offloading to an NVIDIA device using OpenMP. -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \ -// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 | FileCheck -check-prefix=CHK-PTXAS-DEFAULT %s - -// CHK-PTXAS-DEFAULT: ptxas{{.*}}" "-c" - -/// ########################################################################### - -/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it. -// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target \ -// RUN: -nogpulib -nogpuinc --offload-arch=sm_52 -save-temps %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-PTXAS-NORELO %s - -// CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c" - -/// ########################################################################### - -/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP -/// Check that the flag is passed when -fopenmp-relocatable-target is used. -// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target \ -// RUN: -nogpulib -nogpuinc --offload-arch=sm_52 -save-temps %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-PTXAS-RELO %s - -// CHK-PTXAS-RELO: ptxas{{.*}}" "-c" - -/// ########################################################################### - -/// Check that error is not thrown by toolchain when no cuda lib flag is used. -/// Check that the flag is passed when -fopenmp-relocatable-target is used. -// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 \ -// RUN: -nogpulib -nogpuinc -fopenmp-relocatable-target -save-temps %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-FLAG-NOLIBDEVICE %s - -// CHK-FLAG-NOLIBDEVICE-NOT: error:{{.*}}sm_60 - -/// ########################################################################### - -/// Check that error is not thrown by toolchain when no cuda lib device is found when using -S. -/// Check that the flag is passed when -fopenmp-relocatable-target is used. -// RUN: %clang -### -S -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 \ -// RUN: -fopenmp-relocatable-target -save-temps %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-NOLIBDEVICE %s - -// CHK-NOLIBDEVICE-NOT: error:{{.*}}sm_60 - -/// ########################################################################### - -/// Check that the runtime bitcode library is part of the compile line. -/// Create a bogus bitcode library and specify it with libomptarget-nvptx-bc-path -// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ -// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \ -// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ -// RUN: -fopenmp-relocatable-target -save-temps %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-BCLIB %s - -/// Specify the directory containing the bitcode lib, check clang picks the right one -// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ -// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget \ -// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ -// RUN: -fopenmp-relocatable-target -save-temps \ -// RUN: %s 2>&1 | FileCheck -check-prefix=CHK-BCLIB-DIR %s - -/// Create a bogus bitcode library and find it with LIBRARY_PATH -// RUN: env LIBRARY_PATH=%S/Inputs/libomptarget/subdir %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ -// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ -// RUN: -fopenmp-relocatable-target -save-temps \ -// RUN: %s 2>&1 | FileCheck -check-prefix=CHK-ENV-BCLIB %s - -// CHK-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-test.bc -// CHK-BCLIB-DIR: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget{{/|\\\\}}libomptarget-nvptx.bc -// CHK-ENV-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}subdir{{/|\\\\}}libomptarget-nvptx.bc -// CHK-BCLIB-NOT: {{error:|warning:}} - -/// ########################################################################### - -/// Check that the error is thrown when the libomptarget bitcode library does not exist. -// RUN: not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ -// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ -// RUN: --libomptarget-nvptx-bc-path=not-exist.bc \ -// RUN: -fopenmp-relocatable-target -save-temps %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-BCLIB-ERROR %s - -// CHK-BCLIB-ERROR: bitcode library 'not-exist.bc' does not exist - -/// ########################################################################### - -/// Check that the error is thrown when CUDA 9.1 or lower version is used. -// RUN: not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ -// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_90/usr/local/cuda \ -// RUN: -fopenmp-relocatable-target -save-temps %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-CUDA-VERSION-ERROR %s - -// CHK-CUDA-VERSION-ERROR: NVPTX target requires CUDA 9.2 or above; CUDA 9.0 detected - -/// Check that debug info is emitted in dwarf-2 -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O1 --no-cuda-noopt-device-debug 2>&1 \ -// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 2>&1 \ -// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --no-cuda-noopt-device-debug 2>&1 \ -// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g0 2>&1 \ -// RUN: | FileCheck -check-prefix=NO_DEBUG %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb0 -O3 --cuda-noopt-device-debug 2>&1 \ -// RUN: | FileCheck -check-prefix=NO_DEBUG %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-directives-only 2>&1 \ -// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s - -// DEBUG_DIRECTIVES-NOT: warning: debug -// NO_DEBUG-NOT: warning: debug -// NO_DEBUG: "-fopenmp-is-target-device" -// NO_DEBUG-NOT: "-debug-info-kind= -// NO_DEBUG: ptxas -// DEBUG_DIRECTIVES: "-triple" "nvptx64-nvidia-cuda" -// DEBUG_DIRECTIVES-SAME: "-debug-info-kind=line-directives-only" -// DEBUG_DIRECTIVES-SAME: "-fopenmp-is-target-device" -// DEBUG_DIRECTIVES: ptxas -// DEBUG_DIRECTIVES: "-lineinfo" - -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --no-cuda-noopt-device-debug 2>&1 \ -// RUN: | FileCheck -check-prefix=HAS_DEBUG %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g 2>&1 \ -// RUN: | FileCheck -check-prefix=HAS_DEBUG %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --cuda-noopt-device-debug 2>&1 \ -// RUN: | FileCheck -check-prefix=HAS_DEBUG %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --cuda-noopt-device-debug 2>&1 \ -// RUN: | FileCheck -check-prefix=HAS_DEBUG %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g2 2>&1 \ -// RUN: | FileCheck -check-prefix=HAS_DEBUG %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb2 -O0 --cuda-noopt-device-debug 2>&1 \ -// RUN: | FileCheck -check-prefix=HAS_DEBUG %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g3 -O3 --cuda-noopt-device-debug 2>&1 \ -// RUN: | FileCheck -check-prefix=HAS_DEBUG %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb3 -O2 --cuda-noopt-device-debug 2>&1 \ -// RUN: | FileCheck -check-prefix=HAS_DEBUG %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-tables-only 2>&1 \ -// RUN: | FileCheck -check-prefix=HAS_DEBUG %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb1 -O2 --cuda-noopt-device-debug 2>&1 \ -// RUN: | FileCheck -check-prefix=HAS_DEBUG %s - -// HAS_DEBUG-NOT: warning: debug -// HAS_DEBUG: "-triple" "nvptx64-nvidia-cuda" -// HAS_DEBUG-SAME: "-debug-info-kind={{constructor|line-tables-only}}" -// HAS_DEBUG-SAME: "-dwarf-version=2" -// HAS_DEBUG-SAME: "-fopenmp-is-target-device" -// HAS_DEBUG: ptxas -// HAS_DEBUG-SAME: "-g" -// HAS_DEBUG-SAME: "--dont-merge-basicblocks" -// HAS_DEBUG-SAME: "--return-at-end" - -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode 2>&1 \ -// RUN: | FileCheck -check-prefix=CUDA_MODE %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \ -// RUN: | FileCheck -check-prefix=CUDA_MODE %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode 2>&1 \ -// RUN: | FileCheck -check-prefix=CUDA_MODE %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \ -// RUN: | FileCheck -check-prefix=CUDA_MODE %s -// CUDA_MODE: "-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}" -// CUDA_MODE-SAME: "-fopenmp-cuda-mode" -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode 2>&1 \ -// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \ -// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode 2>&1 \ -// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \ -// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s -// NO_CUDA_MODE-NOT: "-{{fno-|f}}openmp-cuda-mode" - -// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-teams-reduction-recs-num=2048 2>&1 \ -// RUN: | FileCheck -check-prefix=CUDA_RED_RECS %s -// CUDA_RED_RECS: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda" -// CUDA_RED_RECS-SAME: "-fopenmp-cuda-teams-reduction-recs-num=2048" - -// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ -// RUN: --offload-arch=sm_52 --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \ -// RUN: | FileCheck -check-prefix=OPENMP_NVPTX_WRAPPERS %s -// OPENMP_NVPTX_WRAPPERS: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda" -// OPENMP_NVPTX_WRAPPERS-SAME: "-internal-isystem" "{{.*}}openmp_wrappers" - -// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc \ -// RUN: --offload-arch=sm_52 -save-temps -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \ -// RUN: | FileCheck -check-prefix=SAVE_TEMPS_NAMES %s - -// SAVE_TEMPS_NAMES-NOT: "GNU::Linker"{{.*}}["[[SAVE_TEMPS_INPUT1:.*\.o]]", "[[SAVE_TEMPS_INPUT1]]"] - -// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64 -Xopenmp-target=nvptx64 -march=sm_52 \ -// RUN: -nogpulib -nogpuinc -save-temps %s -o openmp-offload-gpu 2>&1 \ -// RUN: | FileCheck -check-prefix=TRIPLE %s - -// TRIPLE: "-triple" "nvptx64-nvidia-cuda" -// TRIPLE: "-target-cpu" "sm_52" - -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ -// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ -// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \ -// RUN: | FileCheck %s -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ -// RUN: --offload-arch=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ -// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \ -// RUN: | FileCheck %s - -// verify the tools invocations -// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c" -// CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "sm_52" -// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj" -// CHECK: clang-linker-wrapper{{.*}} "-o" "a.out" - -// RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 %s 2>&1 \ -// RUN: | FileCheck --check-prefix=CHECK-PHASES %s -// CHECK-PHASES: 0: input, "[[INPUT:.+]]", c, (host-openmp) -// CHECK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp) -// CHECK-PHASES: 2: compiler, {1}, ir, (host-openmp) -// CHECK-PHASES: 3: input, "[[INPUT]]", c, (device-openmp, sm_52) -// CHECK-PHASES: 4: preprocessor, {3}, cpp-output, (device-openmp, sm_52) -// CHECK-PHASES: 5: compiler, {4}, ir, (device-openmp, sm_52) -// CHECK-PHASES: 6: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda:sm_52)" {5}, ir -// CHECK-PHASES: 7: backend, {6}, assembler, (device-openmp, sm_52) -// CHECK-PHASES: 8: assembler, {7}, object, (device-openmp, sm_52) -// CHECK-PHASES: 9: offload, "device-openmp (nvptx64-nvidia-cuda:sm_52)" {8}, object -// CHECK-PHASES: 10: clang-offload-packager, {9}, image -// CHECK-PHASES: 11: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (x86_64-unknown-linux-gnu)" {10}, ir -// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp) -// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp) -// CHECK-PHASES: 14: clang-linker-wrapper, {13}, image, (host-openmp) - -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS -// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]" -// CHECK-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.+]]" -// CHECK-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC]]"], output: "[[DEVICE_OBJ:.+]]" -// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ]]"], output: "[[BINARY:.+.out]]" -// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]" -// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" - -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib -save-temps %s 2>&1 | FileCheck %s --check-prefix=CHECK-TEMP-BINDINGS -// CHECK-TEMP-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ:.+]]"], output: "[[BINARY:.+.out]]" - -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_70 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_60,sm_70,sm_80 --no-offload-arch=sm_60,sm_80 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS -// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]" -// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]" -// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]" -// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]" -// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]" -// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[BINARY:.*]]" -// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.*]]" -// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" - -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp \ -// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70 \ -// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908 \ -// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NVIDIA-AMDGPU - -// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]" -// CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]" -// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]" -// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]" -// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[AMD_BC]]", "[[NVIDIA_CUBIN]]"], output: "[[BINARY:.*]]" -// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]" -// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" - -// RUN: %clang -x ir -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp --offload-arch=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-IR - -// CHECK-IR: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT_IR:.+]]"], output: "[[OBJECT:.+]]" -// CHECK-IR: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OBJECT]]"], output: "a.out" - -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp=libomp --offload-device-only \ -// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 \ -// RUN: | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR -// CHECK-EMIT-LLVM-IR: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm" - -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp=libomp \ -// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 \ -// RUN: | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR-BC -// CHECK-EMIT-LLVM-IR-BC: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm-bc" - -// RUN: %clang -### -fopenmp=libomp --offload-arch=sm_89 \ -// RUN: --no-cuda-version-check \ -// RUN: -nogpulib %s -o openmp-offload-gpu 2>&1 \ -// RUN: | FileCheck -check-prefix=DRIVER_EMBEDDING %s - -// DRIVER_EMBEDDING: -fembed-offload-object={{.*}}.out - // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ // RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY // CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[OUTPUT:.*]]" diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index 56d44faa1efa3..0aad6b2889086 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -403,173 +403,19 @@ // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl) // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 1: preprocessor, {0}, c++-cpp-output, (host-sycl) // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 2: compiler, {1}, ir, (host-sycl) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 3: input, "[[INPUT]]", c++, (device-sycl, skl) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, skl) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 5: compiler, {4}, ir, (device-sycl, skl) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 6: backend, {5}, ir, (device-sycl, skl) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 7: offload, "device-sycl (spir64_gen-unknown-unknown:skl)" {6}, ir -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 8: input, "[[INPUT]]", c++, (device-sycl, sm_50) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 9: preprocessor, {8}, c++-cpp-output, (device-sycl, sm_50) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 10: compiler, {9}, ir, (device-sycl, sm_50) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 11: backend, {10}, ir, (device-sycl, sm_50) -// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 12: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {11}, ir +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 3: input, "[[INPUT]]", c++, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 5: compiler, {4}, ir, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 6: backend, {5}, ir, (device-sycl, sm_50) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 7: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {6}, ir +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 8: input, "[[INPUT]]", c++, (device-sycl, skl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 9: preprocessor, {8}, c++-cpp-output, (device-sycl, skl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 10: compiler, {9}, ir, (device-sycl, skl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 11: backend, {10}, ir, (device-sycl, skl) +// CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 12: offload, "device-sycl (spir64_gen-unknown-unknown:skl)" {11}, ir // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 13: clang-offload-packager, {7, 12}, image, (device-sycl) // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 14: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (x86_64-unknown-linux-gnu)" {13}, ir // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 15: backend, {14}, assembler, (host-sycl) // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 16: assembler, {15}, object, (host-sycl) // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 17: clang-linker-wrapper, {16}, image, (host-sycl) -/// ########################################################################### - -// Check if valid bound arch behaviour occurs when compiling for spir-v,nvidia-gpu, and amd-gpu -// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --offload-new-driver -fno-sycl-instrument-device-code -fno-sycl-device-lib=all -fsycl-targets=spir64,nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=sm_75 -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=gfx908 -ccc-print-phases %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD %s -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 1: preprocessor, {0}, c++-cpp-output, (host-sycl) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 2: compiler, {1}, ir, (host-sycl) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 3: input, "[[INPUT]]", c++, (device-sycl, gfx908) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, gfx908) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 5: compiler, {4}, ir, (device-sycl, gfx908) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 6: backend, {5}, ir, (device-sycl, gfx908) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 7: offload, "device-sycl (amdgcn-amd-amdhsa:gfx908)" {6}, ir -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 8: input, "[[INPUT]]", c++, (device-sycl, sm_75) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 9: preprocessor, {8}, c++-cpp-output, (device-sycl, sm_75) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 10: compiler, {9}, ir, (device-sycl, sm_75) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 11: backend, {10}, ir, (device-sycl, sm_75) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 12: offload, "device-sycl (nvptx64-nvidia-cuda:sm_75)" {11}, ir -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 13: input, "[[INPUT]]", c++, (device-sycl) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 14: preprocessor, {13}, c++-cpp-output, (device-sycl) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 15: compiler, {14}, ir, (device-sycl) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 16: backend, {15}, ir, (device-sycl) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 17: offload, "device-sycl (spir64-unknown-unknown)" {16}, ir -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 18: clang-offload-packager, {7, 12, 17}, image, (device-sycl) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 19: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (x86_64-unknown-linux-gnu)" {18}, ir -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 20: backend, {19}, assembler, (host-sycl) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 21: assembler, {20}, object, (host-sycl) -// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 22: clang-linker-wrapper, {21}, image, (host-sycl) - -/// -fsycl --offload-new-driver with /Fo testing -// RUN: %clang_cl -fsycl --offload-new-driver /Fosomefile.obj -c %s -### 2>&1 \ -// RUN: | FileCheck -check-prefix=FO-CHECK %s -// FO-CHECK: clang{{.*}} "-fsycl-int-header=[[HEADER:.+\.h]]" "-fsycl-int-footer={{.*}}" -// FO-CHECK: clang{{.*}} "-include-internal-header" "[[HEADER]]" {{.*}} "-o" "somefile.obj" - -/// passing of a library should not trigger the unbundler -// RUN: touch %t.a -// RUN: touch %t.lib -// RUN: %clang -ccc-print-phases -fsycl --offload-new-driver -fno-sycl-instrument-device-code -fno-sycl-device-lib=all %t.a %s 2>&1 \ -// RUN: | FileCheck -check-prefix=LIB-UNBUNDLE-CHECK %s -// RUN: %clang_cl -ccc-print-phases -fsycl --offload-new-driver -fno-sycl-instrument-device-code -fno-sycl-device-lib=all %t.lib %s 2>&1 \ -// RUN: | FileCheck -check-prefix=LIB-UNBUNDLE-CHECK %s -// LIB-UNBUNDLE-CHECK-NOT: clang-offload-unbundler - -/// passing of only a library should not create a device link -// RUN: %clang -ccc-print-phases -fsycl --offload-new-driver -lsomelib 2>&1 \ -// RUN: | FileCheck -check-prefix=LIB-NODEVICE %s -// LIB-NODEVICE: 0: input, "somelib", object, (host-sycl) -// LIB-NODEVICE: 1: clang-linker-wrapper, {0}, image, (host-sycl) - -// Checking for an error if c-compilation is forced -// RUN: not %clangxx -### -c -fsycl --offload-new-driver -xc %s 2>&1 | FileCheck -check-prefixes=CHECK_XC_FSYCL %s -// RUN: not %clangxx -### -c -fsycl --offload-new-driver -xc-header %s 2>&1 | FileCheck -check-prefixes=CHECK_XC_FSYCL %s -// RUN: not %clangxx -### -c -fsycl --offload-new-driver -xcpp-output %s 2>&1 | FileCheck -check-prefixes=CHECK_XC_FSYCL %s -// CHECK_XC_FSYCL: '-x c{{.*}}' must not be used in conjunction with '-fsycl' - -// -std=c++17 check (check all 3 compilations) -// RUN: %clangxx -### -c -fsycl --offload-new-driver -xc++ %s 2>&1 | FileCheck -check-prefix=CHECK-STD %s -// RUN: %clang_cl -### -c -fsycl --offload-new-driver -TP %s 2>&1 | FileCheck -check-prefix=CHECK-STD %s -// CHECK-STD: clang{{.*}} "-emit-llvm-bc" {{.*}} "-std=c++17" -// CHECK-STD: clang{{.*}} "-emit-obj" {{.*}} "-std=c++17" - -// -std=c++17 override check -// RUN: %clangxx -### -c -fsycl --offload-new-driver -std=c++14 -xc++ %s 2>&1 | FileCheck -check-prefix=CHECK-STD-OVR %s -// RUN: %clang_cl -### -c -fsycl --offload-new-driver /std:c++14 -TP %s 2>&1 | FileCheck -check-prefix=CHECK-STD-OVR %s -// CHECK-STD-OVR: clang{{.*}} "-emit-llvm-bc" {{.*}} "-std=c++14" -// CHECK-STD-OVR: clang{{.*}} "-emit-obj" {{.*}} "-std=c++14" -// CHECK-STD-OVR-NOT: clang{{.*}} "-std=c++17" - -// Check sycl-post-link optimization level. -// Default is O2 -// RUN: %clang -### -fsycl --offload-new-driver %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O2 -// RUN: %clang_cl -### -fsycl --offload-new-driver %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O2 -// Common options for %clang and %clang_cl -// RUN: %clang -### -fsycl --offload-new-driver -O1 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O1 -// RUN: %clang_cl -### -fsycl --offload-new-driver /O1 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-Os -// RUN: %clang -### -fsycl --offload-new-driver -O2 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O2 -// RUN: %clang_cl -### -fsycl --offload-new-driver /O2 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O3 -// RUN: %clang -### -fsycl --offload-new-driver -Os %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-Os -// RUN: %clang_cl -### -fsycl --offload-new-driver /Os %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-Os -// %clang options -// RUN: %clang -### -fsycl --offload-new-driver -O0 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O0 -// RUN: %clang -### -fsycl --offload-new-driver -Ofast %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O3 -// RUN: %clang -### -fsycl --offload-new-driver -O3 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O3 -// RUN: %clang -### -fsycl --offload-new-driver -Oz %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-Oz -// RUN: %clang -### -fsycl --offload-new-driver -Og %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O1 -// %clang_cl options -// RUN: %clang_cl -### -fsycl --offload-new-driver /Od %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O0 -// RUN: %clang_cl -### -fsycl --offload-new-driver /Ot %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O3 -// only the last option is considered -// RUN: %clang -### -fsycl --offload-new-driver -O2 -O1 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O1 -// RUN: %clang_cl -### -fsycl --offload-new-driver /O2 /O1 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-Os -// CHK-POST-LINK-OPT-LEVEL-O0: --sycl-post-link-options=-O2 -// CHK-POST-LINK-OPT-LEVEL-O1: --sycl-post-link-options=-O1 -// CHK-POST-LINK-OPT-LEVEL-O2: --sycl-post-link-options=-O2 -// CHK-POST-LINK-OPT-LEVEL-O3: --sycl-post-link-options=-O3 -// CHK-POST-LINK-OPT-LEVEL-Os: --sycl-post-link-options=-Os -// CHK-POST-LINK-OPT-LEVEL-Oz: --sycl-post-link-options=-Oz - -// Verify header search dirs are added with -fsycl -// RUN: %clang -### -fsycl --offload-new-driver %s 2>&1 | FileCheck %s -check-prefixes=CHECK-HEADER-DIR -// RUN: %clang_cl -### -fsycl --offload-new-driver %s 2>&1 | FileCheck %s -check-prefixes=CHECK-HEADER-DIR -// CHECK-HEADER-DIR: clang{{.*}} "-fsycl-is-device" -// CHECK-HEADER-DIR-SAME: "-internal-isystem" "[[ROOT:[^"]*]]bin{{[/\\]+}}..{{[/\\]+}}include{{[/\\]+}}sycl{{[/\\]+}}stl_wrappers" -// CHECK-HEADER-DIR-NOT: -internal-isystem -// CHECK-HEADER-DIR-SAME: "-internal-isystem" "[[ROOT]]bin{{[/\\]+}}..{{[/\\]+}}include" -// CHECK-HEADER-DIR: clang{{.*}} "-fsycl-is-host" -// CHECK-HEADER-DIR-SAME: "-internal-isystem" "[[ROOT]]bin{{[/\\]+}}..{{[/\\]+}}include{{[/\\]+}}sycl{{[/\\]+}}stl_wrappers" -// CHECK-HEADER-DIR-NOT: -internal-isystem -// CHECK-HEADER-DIR-SAME: "-internal-isystem" "[[ROOT]]bin{{[/\\]+}}..{{[/\\]+}}include" - -/// Check for option incompatibility with -fsycl -// RUN: not %clang -### -fsycl --offload-new-driver -ffreestanding %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-INCOMPATIBILITY %s -DINCOMPATOPT=-ffreestanding -// RUN: not %clang -### -fsycl --offload-new-driver -static-libstdc++ %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-INCOMPATIBILITY %s -DINCOMPATOPT=-static-libstdc++ -// CHK-INCOMPATIBILITY: error: invalid argument '[[INCOMPATOPT]]' not allowed with '-fsycl' - -/// Using -fsyntax-only with -fsycl --offload-new-driver should not emit IR -// RUN: %clang -### -fsycl --offload-new-driver -fsyntax-only %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHK-FSYNTAX-ONLY %s -// RUN: %clang -### -fsycl --offload-new-driver -fsycl-device-only -fsyntax-only %s 2>&1 \ -// RUN: | FileCheck -check-prefixes=CHK-FSYNTAX-ONLY %s -// CHK-FSYNTAX-ONLY-NOT: "-emit-llvm-bc" -// CHK-FSYNTAX-ONLY: "-fsyntax-only" - -// Emit warning for treating 'c' input as 'c++' when -fsycl --offload-new-driver is used -// RUN: %clang -### -fsycl --offload-new-driver %s 2>&1 | FileCheck -check-prefix FSYCL-CHECK %s -// RUN: %clang_cl -### -fsycl --offload-new-driver %s 2>&1 | FileCheck -check-prefix FSYCL-CHECK %s -// FSYCL-CHECK: warning: treating 'c' input as 'c++' when -fsycl is used [-Wexpected-file-type] - -/// Check for linked sycl lib when using -fpreview-breaking-changes with -fsycl -// RUN: %clang -### -fsycl --offload-new-driver -fpreview-breaking-changes -target x86_64-unknown-windows-msvc %s 2>&1 | FileCheck -check-prefix FSYCL-PREVIEW-BREAKING-CHANGES-CHECK %s -// RUN: %clang_cl -### -fsycl --offload-new-driver -fpreview-breaking-changes %s 2>&1 | FileCheck -check-prefix FSYCL-PREVIEW-BREAKING-CHANGES-CHECK-CL %s -// FSYCL-PREVIEW-BREAKING-CHANGES-CHECK: -defaultlib:sycl{{[0-9]*}}-preview.lib -// FSYCL-PREVIEW-BREAKING-CHANGES-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}.lib -// FSYCL-PREVIEW-BREAKING-CHANGES-CHECK-CL: "--dependent-lib=sycl{{[0-9]*}}-preview" - -/// Check for linked sycl lib when using -fpreview-breaking-changes with -fsycl -// RUN: %clang -### -fsycl --offload-new-driver -fpreview-breaking-changes -target x86_64-unknown-windows-msvc -Xclang --dependent-lib=msvcrtd %s 2>&1 | FileCheck -check-prefix FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK %s -// RUN: %clang_cl -### -fsycl --offload-new-driver -fpreview-breaking-changes /MDd %s 2>&1 | FileCheck -check-prefix FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK %s -// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK: --dependent-lib=sycl{{[0-9]*}}-previewd -// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}.lib -// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}-preview.lib - -/// ########################################################################### - -/// Check -fsycl-decompose-functor behaviors from source -// RUN: %clang -### -fsycl-decompose-functor -target x86_64-unknown-linux-gnu -fsycl -o %t.out %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-DECOMP %s -// RUN: %clang -### -fno-sycl-decompose-functor -target x86_64-unknown-linux-gnu -fsycl -o %t.out %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-NODECOMP %s -// CHK-DECOMP: -fsycl-decompose-functor -// CHK-NODECOMP: -fno-sycl-decompose-functor From 51e82425253fd89392c72302a2b015620f5b821c Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Mon, 21 Jul 2025 22:04:17 -0700 Subject: [PATCH 05/13] Fix multiple fsycl targets flow. --- clang/lib/Driver/Driver.cpp | 57 +++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 36417fd54a5af..0f47267dd2a83 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -7603,14 +7603,18 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Diag(clang::diag::err_drv_invalid_sycl_target) << SYCLTargetValue; continue; } - Arch = Device->data(); + if (IsIntelGPUOffloadArch(StringToOffloadArch( + getProcessorFromTargetID(TC->getTriple(), Device->data())))) + Arch = Device->data(); } else if (auto Device = tools::SYCL::gen::isGPUTarget< tools::SYCL::gen::NvidiaGPU>(SYCLTargetValue)) { if (Device->empty()) { Diag(clang::diag::err_drv_invalid_sycl_target) << SYCLTargetValue; continue; } - Arch = Device->data(); + if (IsSYCLSupportedNVidiaGPUArch(StringToOffloadArch( + getProcessorFromTargetID(TC->getTriple(), Device->data())))) + Arch = Device->data(); } else if (auto Device = tools::SYCL::gen::isGPUTarget< clang::driver::tools::SYCL::gen::AmdGPU>( SYCLTargetValue)) { @@ -7618,36 +7622,33 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Diag(clang::diag::err_drv_invalid_sycl_target) << SYCLTargetValue; continue; } - Arch = Device->data(); + if (IsSYCLSupportedAMDGPUArch(StringToOffloadArch( + getProcessorFromTargetID(TC->getTriple(), Device->data())))) + Arch = Device->data(); } else { Arch = StringRef(); } - /* - if (getSYCLDeviceTriple(SYCLTargetValue).isSPIRAOT() && - Arch.empty() && TC->getTriple().getSubArch() == - llvm::Triple::SPIRSubArch_gen) { const ToolChain *HostTC = - C.getSingleOffloadToolChain(); - auto DeviceTC = - std::make_unique( *this, TC->getTriple(), - *HostTC, C.getInputArgs()); assert(DeviceTC && "Device toolchain not - defined."); ArgStringList TargetArgs; - DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), - C.getInputArgs(), - TargetArgs); - // Look for -device and use that as the known - arch to - // be associated with the current spir64_gen entry. Grab - the - // right most entry. - for (int i = TargetArgs.size() - 2; i >= 0; --i) { - if (StringRef(TargetArgs[i]) == "-device") { - Arch = TargetArgs[i + 1]; - break; - } - } - } - */ + if (TC->getTriple().isSPIRAOT() && llvm::Triple::SPIRSubArch_gen) { + const ToolChain *HostTC = + C.getSingleOffloadToolChain(); + auto DeviceTC = std::make_unique( + *this, TC->getTriple(), *HostTC, C.getInputArgs()); + assert(DeviceTC && "Device toolchain not defined."); + ArgStringList TargetArgs; + DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), + C.getInputArgs(), TargetArgs); + // Look for -device and use that as the known + // arch to be associated with the current spir64_gen entry. Grab + // the right most entry. + for (int i = TargetArgs.size() - 2; i >= 0; --i) { + if (StringRef(TargetArgs[i]) == "-device") { + Arch = TargetArgs[i + 1]; + break; + } + } + } + if (!Arch.empty()) Archs.insert(Arch); } From 5852d37612451d88a985416425a95bd42c482a78 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Mon, 21 Jul 2025 22:34:19 -0700 Subject: [PATCH 06/13] Readd deleted test. --- clang/test/Driver/openmp-offload-gpu.c | 318 +++++++++++++++++++++++++ 1 file changed, 318 insertions(+) diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index 64404d7beca4e..92cd9aeff9351 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -1,3 +1,311 @@ +/// +/// Perform several driver tests for OpenMP offloading +/// + +/// ########################################################################### + +/// Check -Xopenmp-target uses one of the archs provided when several archs are used. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc \ +// RUN: -Xopenmp-target -march=sm_52 -Xopenmp-target -march=sm_60 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-ARCHS %s + +// CHK-FOPENMP-TARGET-ARCHS: ptxas{{.*}}" "--gpu-name" "sm_60" + +/// ########################################################################### + +/// Check -Xopenmp-target -march=sm_52 works as expected when two triples are present. +// RUN: %clang -### -fopenmp=libomp \ +// RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu,nvptx64-nvidia-cuda \ +// RUN: -nogpulib -nogpuinc -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET-COMPILATION %s + +// CHK-FOPENMP-TARGET-COMPILATION: ptxas{{.*}}" "--gpu-name" "sm_52" + +/// Check PTXAS is passed -c flag when offloading to an NVIDIA device using OpenMP. +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 | FileCheck -check-prefix=CHK-PTXAS-DEFAULT %s + +// CHK-PTXAS-DEFAULT: ptxas{{.*}}" "-c" + +/// ########################################################################### + +/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP - disable it. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fnoopenmp-relocatable-target \ +// RUN: -nogpulib -nogpuinc --offload-arch=sm_52 -save-temps %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PTXAS-NORELO %s + +// CHK-PTXAS-NORELO-NOT: ptxas{{.*}}" "-c" + +/// ########################################################################### + +/// PTXAS is passed -c flag by default when offloading to an NVIDIA device using OpenMP +/// Check that the flag is passed when -fopenmp-relocatable-target is used. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-relocatable-target \ +// RUN: -nogpulib -nogpuinc --offload-arch=sm_52 -save-temps %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PTXAS-RELO %s + +// CHK-PTXAS-RELO: ptxas{{.*}}" "-c" + +/// ########################################################################### + +/// Check that error is not thrown by toolchain when no cuda lib flag is used. +/// Check that the flag is passed when -fopenmp-relocatable-target is used. +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 \ +// RUN: -nogpulib -nogpuinc -fopenmp-relocatable-target -save-temps %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-FLAG-NOLIBDEVICE %s + +// CHK-FLAG-NOLIBDEVICE-NOT: error:{{.*}}sm_60 + +/// ########################################################################### + +/// Check that error is not thrown by toolchain when no cuda lib device is found when using -S. +/// Check that the flag is passed when -fopenmp-relocatable-target is used. +// RUN: %clang -### -S -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 \ +// RUN: -fopenmp-relocatable-target -save-temps %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-NOLIBDEVICE %s + +// CHK-NOLIBDEVICE-NOT: error:{{.*}}sm_60 + +/// ########################################################################### + +/// Check that the runtime bitcode library is part of the compile line. +/// Create a bogus bitcode library and specify it with libomptarget-nvptx-bc-path +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \ +// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ +// RUN: -fopenmp-relocatable-target -save-temps %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-BCLIB %s + +/// Specify the directory containing the bitcode lib, check clang picks the right one +// RUN: %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget \ +// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ +// RUN: -fopenmp-relocatable-target -save-temps \ +// RUN: %s 2>&1 | FileCheck -check-prefix=CHK-BCLIB-DIR %s + +/// Create a bogus bitcode library and find it with LIBRARY_PATH +// RUN: env LIBRARY_PATH=%S/Inputs/libomptarget/subdir %clang -### -no-canonical-prefixes -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ +// RUN: -fopenmp-relocatable-target -save-temps \ +// RUN: %s 2>&1 | FileCheck -check-prefix=CHK-ENV-BCLIB %s + +// CHK-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-test.bc +// CHK-BCLIB-DIR: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget{{/|\\\\}}libomptarget-nvptx.bc +// CHK-ENV-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}subdir{{/|\\\\}}libomptarget-nvptx.bc +// CHK-BCLIB-NOT: {{error:|warning:}} + +/// ########################################################################### + +/// Check that the error is thrown when the libomptarget bitcode library does not exist. +// RUN: not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ +// RUN: --libomptarget-nvptx-bc-path=not-exist.bc \ +// RUN: -fopenmp-relocatable-target -save-temps %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-BCLIB-ERROR %s + +// CHK-BCLIB-ERROR: bitcode library 'not-exist.bc' does not exist + +/// ########################################################################### + +/// Check that the error is thrown when CUDA 9.1 or lower version is used. +// RUN: not %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: -Xopenmp-target -march=sm_52 --cuda-path=%S/Inputs/CUDA_90/usr/local/cuda \ +// RUN: -fopenmp-relocatable-target -save-temps %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-CUDA-VERSION-ERROR %s + +// CHK-CUDA-VERSION-ERROR: NVPTX target requires CUDA 9.2 or above; CUDA 9.0 detected + +/// Check that debug info is emitted in dwarf-2 +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O1 --no-cuda-noopt-device-debug 2>&1 \ +// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 2>&1 \ +// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --no-cuda-noopt-device-debug 2>&1 \ +// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g0 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_DEBUG %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb0 -O3 --cuda-noopt-device-debug 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_DEBUG %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-directives-only 2>&1 \ +// RUN: | FileCheck -check-prefix=DEBUG_DIRECTIVES %s + +// DEBUG_DIRECTIVES-NOT: warning: debug +// NO_DEBUG-NOT: warning: debug +// NO_DEBUG: "-fopenmp-is-target-device" +// NO_DEBUG-NOT: "-debug-info-kind= +// NO_DEBUG: ptxas +// DEBUG_DIRECTIVES: "-triple" "nvptx64-nvidia-cuda" +// DEBUG_DIRECTIVES-SAME: "-debug-info-kind=line-directives-only" +// DEBUG_DIRECTIVES-SAME: "-fopenmp-is-target-device" +// DEBUG_DIRECTIVES: ptxas +// DEBUG_DIRECTIVES: "-lineinfo" + +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --no-cuda-noopt-device-debug 2>&1 \ +// RUN: | FileCheck -check-prefix=HAS_DEBUG %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g 2>&1 \ +// RUN: | FileCheck -check-prefix=HAS_DEBUG %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O0 --cuda-noopt-device-debug 2>&1 \ +// RUN: | FileCheck -check-prefix=HAS_DEBUG %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g -O3 --cuda-noopt-device-debug 2>&1 \ +// RUN: | FileCheck -check-prefix=HAS_DEBUG %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g2 2>&1 \ +// RUN: | FileCheck -check-prefix=HAS_DEBUG %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb2 -O0 --cuda-noopt-device-debug 2>&1 \ +// RUN: | FileCheck -check-prefix=HAS_DEBUG %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -g3 -O3 --cuda-noopt-device-debug 2>&1 \ +// RUN: | FileCheck -check-prefix=HAS_DEBUG %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb3 -O2 --cuda-noopt-device-debug 2>&1 \ +// RUN: | FileCheck -check-prefix=HAS_DEBUG %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -gline-tables-only 2>&1 \ +// RUN: | FileCheck -check-prefix=HAS_DEBUG %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -ggdb1 -O2 --cuda-noopt-device-debug 2>&1 \ +// RUN: | FileCheck -check-prefix=HAS_DEBUG %s + +// HAS_DEBUG-NOT: warning: debug +// HAS_DEBUG: "-triple" "nvptx64-nvidia-cuda" +// HAS_DEBUG-SAME: "-debug-info-kind={{constructor|line-tables-only}}" +// HAS_DEBUG-SAME: "-dwarf-version=2" +// HAS_DEBUG-SAME: "-fopenmp-is-target-device" +// HAS_DEBUG: ptxas +// HAS_DEBUG-SAME: "-g" +// HAS_DEBUG-SAME: "--dont-merge-basicblocks" +// HAS_DEBUG-SAME: "--return-at-end" + +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=CUDA_MODE %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=CUDA_MODE %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=CUDA_MODE %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode -fopenmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=CUDA_MODE %s +// CUDA_MODE: "-cc1"{{.*}}"-triple" "{{nvptx64-nvidia-cuda|amdgcn-amd-amdhsa}}" +// CUDA_MODE-SAME: "-fopenmp-cuda-mode" +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fno-openmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fno-openmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target -march=gfx906 %s -fopenmp-cuda-mode -fno-openmp-cuda-mode 2>&1 \ +// RUN: | FileCheck -check-prefix=NO_CUDA_MODE %s +// NO_CUDA_MODE-NOT: "-{{fno-|f}}openmp-cuda-mode" + +// RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-teams-reduction-recs-num=2048 2>&1 \ +// RUN: | FileCheck -check-prefix=CUDA_RED_RECS %s +// CUDA_RED_RECS: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda" +// CUDA_RED_RECS-SAME: "-fopenmp-cuda-teams-reduction-recs-num=2048" + +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ +// RUN: --offload-arch=sm_52 --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \ +// RUN: | FileCheck -check-prefix=OPENMP_NVPTX_WRAPPERS %s +// OPENMP_NVPTX_WRAPPERS: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda" +// OPENMP_NVPTX_WRAPPERS-SAME: "-internal-isystem" "{{.*}}openmp_wrappers" + +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc \ +// RUN: --offload-arch=sm_52 -save-temps -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \ +// RUN: | FileCheck -check-prefix=SAVE_TEMPS_NAMES %s + +// SAVE_TEMPS_NAMES-NOT: "GNU::Linker"{{.*}}["[[SAVE_TEMPS_INPUT1:.*\.o]]", "[[SAVE_TEMPS_INPUT1]]"] + +// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64 -Xopenmp-target=nvptx64 -march=sm_52 \ +// RUN: -nogpulib -nogpuinc -save-temps %s -o openmp-offload-gpu 2>&1 \ +// RUN: | FileCheck -check-prefix=TRIPLE %s + +// TRIPLE: "-triple" "nvptx64-nvidia-cuda" +// TRIPLE: "-target-cpu" "sm_52" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ +// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \ +// RUN: | FileCheck %s +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ +// RUN: --offload-arch=sm_52 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ +// RUN: --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \ +// RUN: | FileCheck %s + +// verify the tools invocations +// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c" +// CHECK: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "sm_52" +// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj" +// CHECK: clang-linker-wrapper{{.*}} "-o" "a.out" + +// RUN: %clang -ccc-print-phases --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-PHASES %s +// CHECK-PHASES: 0: input, "[[INPUT:.+]]", c, (host-openmp) +// CHECK-PHASES: 1: preprocessor, {0}, cpp-output, (host-openmp) +// CHECK-PHASES: 2: compiler, {1}, ir, (host-openmp) +// CHECK-PHASES: 3: input, "[[INPUT]]", c, (device-openmp, sm_52) +// CHECK-PHASES: 4: preprocessor, {3}, cpp-output, (device-openmp, sm_52) +// CHECK-PHASES: 5: compiler, {4}, ir, (device-openmp, sm_52) +// CHECK-PHASES: 6: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda:sm_52)" {5}, ir +// CHECK-PHASES: 7: backend, {6}, assembler, (device-openmp, sm_52) +// CHECK-PHASES: 8: assembler, {7}, object, (device-openmp, sm_52) +// CHECK-PHASES: 9: offload, "device-openmp (nvptx64-nvidia-cuda:sm_52)" {8}, object +// CHECK-PHASES: 10: clang-offload-packager, {9}, image +// CHECK-PHASES: 11: offload, "host-openmp (x86_64-unknown-linux-gnu)" {2}, "device-openmp (x86_64-unknown-linux-gnu)" {10}, ir +// CHECK-PHASES: 12: backend, {11}, assembler, (host-openmp) +// CHECK-PHASES: 13: assembler, {12}, object, (host-openmp) +// CHECK-PHASES: 14: clang-linker-wrapper, {13}, image, (host-openmp) + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-BINDINGS +// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]" +// CHECK-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC:.+]]" +// CHECK-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC]]"], output: "[[DEVICE_OBJ:.+]]" +// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ]]"], output: "[[BINARY:.+.out]]" +// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]" +// CHECK-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib -save-temps %s 2>&1 | FileCheck %s --check-prefix=CHECK-TEMP-BINDINGS +// CHECK-TEMP-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ:.+]]"], output: "[[BINARY:.+.out]]" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52 --offload-arch=sm_70 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_70 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --offload-arch=sm_52,sm_60,sm_70,sm_80 --no-offload-arch=sm_60,sm_80 -nogpuinc -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARCH-BINDINGS +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[HOST_BC:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_52:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_52]]"], output: "[[DEVICE_OBJ_SM_52:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[DEVICE_BC_SM_70:.*]]" +// CHECK-ARCH-BINDINGS: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_BC_SM_70]]"], output: "[[DEVICE_OBJ_SM_70:.*]]" +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[DEVICE_OBJ_SM_52]]", "[[DEVICE_OBJ_SM_70]]"], output: "[[BINARY:.*]]" +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.*]]" +// CHECK-ARCH-BINDINGS: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=nvptx64-nvidia-cuda --offload-arch=sm_70 \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa --offload-arch=gfx908 \ +// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-NVIDIA-AMDGPU + +// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[HOST_BC:.+]]" +// CHECK-NVIDIA-AMDGPU: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[AMD_BC:.+]]" +// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]", "[[HOST_BC]]"], output: "[[NVIDIA_PTX:.+]]" +// CHECK-NVIDIA-AMDGPU: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[NVIDIA_PTX]]"], output: "[[NVIDIA_CUBIN:.+]]" +// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Packager", inputs: ["[[AMD_BC]]", "[[NVIDIA_CUBIN]]"], output: "[[BINARY:.*]]" +// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[BINARY]]"], output: "[[HOST_OBJ:.+]]" +// CHECK-NVIDIA-AMDGPU: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out" + +// RUN: %clang -x ir -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp --offload-arch=sm_52 -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-IR + +// CHECK-IR: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT_IR:.+]]"], output: "[[OBJECT:.+]]" +// CHECK-IR: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[OBJECT]]"], output: "a.out" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp=libomp --offload-device-only \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR +// CHECK-EMIT-LLVM-IR: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm" + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -emit-llvm -S -fopenmp=libomp \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 -nogpulib %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-EMIT-LLVM-IR-BC +// CHECK-EMIT-LLVM-IR-BC: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-emit-llvm-bc" + +// RUN: %clang -### -fopenmp=libomp --offload-arch=sm_89 \ +// RUN: --no-cuda-version-check \ +// RUN: -nogpulib %s -o openmp-offload-gpu 2>&1 \ +// RUN: | FileCheck -check-prefix=DRIVER_EMBEDDING %s + +// DRIVER_EMBEDDING: -fembed-offload-object={{.*}}.out + // RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \ // RUN: -Xopenmp-target=nvptx64-nvidia-cuda -march=sm_52 --offload-host-only -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-HOST-ONLY // CHECK-HOST-ONLY: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT:.*]]"], output: "[[OUTPUT:.*]]" @@ -87,3 +395,13 @@ // RUN: --offload-arch=sm_52 -foffload-lto=thin -nogpulib -nogpuinc %s 2>&1 \ // RUN: | FileCheck --check-prefix=THINLTO-SM52 %s // THINLTO-SM52: --device-compiler=nvptx64-nvidia-cuda=-flto=thin + +// +// Check the requested architecture is passed if provided. +// +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \ +// RUN: --offload-arch=gfx906 -foffload-lto=thin -nogpulib -nogpuinc %s 2>&1 \ +// RUN: | FileCheck --check-prefix=SHOULD-EXTRACT %s +// +// SHOULD-EXTRACT: clang-linker-wrapper{{.*}}"--should-extract=gfx906" + From 0c9540ccea5d4f6d2abe53c391bf1437322998c5 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Mon, 21 Jul 2025 22:51:12 -0700 Subject: [PATCH 07/13] Remove commented out code. --- clang/lib/Driver/Driver.cpp | 5 ----- clang/lib/Driver/ToolChains/Clang.cpp | 4 ---- 2 files changed, 9 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 0f47267dd2a83..e95e388e5691a 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1490,11 +1490,6 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, return; } - /* for (const auto &ToolChainAndArchs : OffloadArchs) { - UniqueSYCLTriplesVec.push_back(ToolChainAndArchs.first->getTriple()); - } - */ - } else { // If -fsycl is supplied without -fsycl-targets we will assume SPIR-V. // For -fsycl-device-only, we also setup the implied triple as needed. diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 44f8bad7c29a1..8bd0628aae054 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10329,10 +10329,6 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA, if (Input.getType() == types::TY_Tempfilelist) File = C.getArgs().MakeArgString("@" + File); - // StringRef Arch = OffloadAction->getOffloadingArch() - // ? OffloadAction->getOffloadingArch() - // : TCArgs.getLastArgValue(options::OPT_march_EQ); - StringRef Arch; if (OffloadAction->getOffloadingArch()) { if (TC->getTripleString() == "spir64_gen-unknown-unknown") { From 65738b13573ac51ddf48d27c9a2d688d61e3ca57 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Tue, 22 Jul 2025 09:55:52 -0700 Subject: [PATCH 08/13] Check for specific target triple and arch matching. --- clang/lib/Driver/Driver.cpp | 4 ++++ clang/lib/Driver/ToolChains/SYCL.cpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index e95e388e5691a..c26993cbe8272 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -7594,6 +7594,10 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, if (auto Device = tools::SYCL::gen::isGPUTarget( SYCLTargetValue)) { + if (SpecificToolchain && + !(TC->getTriple().isSPIRAOT() && + TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen)) + continue; if (Device->empty()) { Diag(clang::diag::err_drv_invalid_sycl_target) << SYCLTargetValue; continue; diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 539e5c35495bf..4e00535833ca2 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -1306,7 +1306,7 @@ StringRef clang::driver::mapIntelGPUArchName(StringRef ArchName) { .Case("arl_h", "arl_h") .Case("bmg_g21", "bmg_g21") .Case("lnl_m", "lnl_m") - .Default(""); + .Default(ArchName); return Arch; } From bc570ded0a759561eb988b9044c3d34793a17d63 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Tue, 22 Jul 2025 16:02:26 -0700 Subject: [PATCH 09/13] Move Xsycl-target-backend code inside --- clang/lib/Driver/Driver.cpp | 55 +++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index c26993cbe8272..8ebe746a264cc 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -7555,19 +7555,44 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } llvm::DenseSet Archs; + StringRef Arch; for (auto *Arg : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) { // Extract any '--[no-]offload-arch' arguments intended for this toolchain. std::unique_ptr ExtractedArg = nullptr; if (Kind == Action::OFK_SYCL) { + // -Xsycl-target-backend=spir64_gen "-device pvc,bdw" + // -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" + if (TC->getTriple().isSPIRAOT() && TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) { + const ToolChain *HostTC = + C.getSingleOffloadToolChain(); + auto DeviceTC = std::make_unique( + *this, TC->getTriple(), *HostTC, C.getInputArgs()); + assert(DeviceTC && "Device toolchain not defined."); + ArgStringList TargetArgs; + DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), + C.getInputArgs(), TargetArgs); + // Look for -device and use that as the known + // arch to be associated with the current spir64_gen entry. Grab + // the right most entry. + for (int i = TargetArgs.size() - 2; i >= 0; --i) { + if (StringRef(TargetArgs[i]) == "-device") { + Arch = TargetArgs[i + 1]; + break; + } + } + } // For SYCL based offloading, we allow for -Xsycl-target-backend - // and -Xsycl-target-backend= for specifying options. - if (Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) && + // and -Xsycl-target-backend=amdgcn-amd-hsa --offload-arch=gfx908 for specifying options. + if (!(TC->getTriple().isSPIRAOT() && TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) + && Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) && llvm::Triple(Arg->getValue(0)) == TC->getTriple()) { Arg->claim(); unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); ExtractedArg = getOpts().ParseOneArg(Args, Index); Arg = ExtractedArg.get(); - } else if (Arg->getOption().matches(options::OPT_Xsycl_backend)) { + // -Xsycl-target-backend --offload-arch=gfx1150 + } else if (!(TC->getTriple().isSPIRAOT() && TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) + && Arg->getOption().matches(options::OPT_Xsycl_backend)) { unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(0)); ExtractedArg = getOpts().ParseOneArg(Args, Index); Arg = ExtractedArg.get(); @@ -7588,9 +7613,9 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } } - if (Arg->getOption().matches(options::OPT_fsycl_targets_EQ)) { + if (Kind == Action::OFK_SYCL && Arg->getOption().matches(options::OPT_fsycl_targets_EQ)) { for (StringRef SYCLTargetValue : Arg->getValues()) { - StringRef Arch; + if (auto Device = tools::SYCL::gen::isGPUTarget( SYCLTargetValue)) { @@ -7628,25 +7653,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Arch = StringRef(); } - if (TC->getTriple().isSPIRAOT() && llvm::Triple::SPIRSubArch_gen) { - const ToolChain *HostTC = - C.getSingleOffloadToolChain(); - auto DeviceTC = std::make_unique( - *this, TC->getTriple(), *HostTC, C.getInputArgs()); - assert(DeviceTC && "Device toolchain not defined."); - ArgStringList TargetArgs; - DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), - C.getInputArgs(), TargetArgs); - // Look for -device and use that as the known - // arch to be associated with the current spir64_gen entry. Grab - // the right most entry. - for (int i = TargetArgs.size() - 2; i >= 0; --i) { - if (StringRef(TargetArgs[i]) == "-device") { - Arch = TargetArgs[i + 1]; - break; - } - } - } + if (!Arch.empty()) Archs.insert(Arch); From 81ca4e126dd024d85f0f661fbb7c0278c1df0357 Mon Sep 17 00:00:00 2001 From: srividya sundaram Date: Tue, 22 Jul 2025 17:05:26 -0700 Subject: [PATCH 10/13] Add specific check for Xsycl-target-backend Arg --- clang/lib/Driver/Driver.cpp | 5 ++++- clang/test/Driver/sycl-linker-wrapper.cpp | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 32d887ed56404..16fd3bc19136a 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -7555,7 +7555,8 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, if (Kind == Action::OFK_SYCL) { // -Xsycl-target-backend=spir64_gen "-device pvc,bdw" // -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" - if (TC->getTriple().isSPIRAOT() && TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) { + if (TC->getTriple().isSPIRAOT() && TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen + && Arg->getOption().matches(options::OPT_Xsycl_backend_EQ)) { const ToolChain *HostTC = C.getSingleOffloadToolChain(); auto DeviceTC = std::make_unique( @@ -7570,6 +7571,8 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, for (int i = TargetArgs.size() - 2; i >= 0; --i) { if (StringRef(TargetArgs[i]) == "-device") { Arch = TargetArgs[i + 1]; + if (!Arch.empty()) + Archs.insert(Arch); break; } } diff --git a/clang/test/Driver/sycl-linker-wrapper.cpp b/clang/test/Driver/sycl-linker-wrapper.cpp index 8652314e055e8..0a307f124b5e9 100644 --- a/clang/test/Driver/sycl-linker-wrapper.cpp +++ b/clang/test/Driver/sycl-linker-wrapper.cpp @@ -206,7 +206,7 @@ // ------- // Generate .o file as linker wrapper input. // -// RUN: %clang %s -fsycl -fsycl-targets=native_cpu -c --offload-new-driver -o %t6.o +// RUN: %clang %s -fsycl -fsycl-targets=native_cpu -fno-sycl-libspirv -c --offload-new-driver -o %t6.o // // RUN: clang-linker-wrapper "--host-triple=x86_64-unknown-linux-gnu" "-sycl-device-library-location=%S/Inputs/native_cpu" "--sycl-post-link-options=SYCL_POST_LINK_OPTIONS" "--linker-path=/usr/bin/ld" "--" HOST_LINKER_FLAGS "-dynamic-linker" HOST_DYN_LIB "-o" "a.out" %t6.o --dry-run 2>&1 | FileCheck -check-prefix=CHK-CMDS-NATIVE-CPU %s // CHK-CMDS-NATIVE-CPU: "{{.*}}/spirv-to-ir-wrapper" {{.*}} --llvm-spirv-opts --spirv-preserve-auxdata --spirv-target-env=SPV-IR --spirv-builtin-format=global From 4ef13192e40b63ddc3419294224650f75998f7fb Mon Sep 17 00:00:00 2001 From: Michael D Toguchi Date: Tue, 22 Jul 2025 17:43:25 -0700 Subject: [PATCH 11/13] Additional changes for a few issues Update Toolchain creation behaviors when dealing with default toolchains Cleanup some formatting issues Fix sycl-unique-prefix.cpp Fix issue with -fhuge-link-device-code --- clang/lib/Basic/OffloadArch.cpp | 2 +- clang/lib/Driver/Driver.cpp | 79 ++++++++++++++---------- clang/lib/Driver/ToolChains/Gnu.cpp | 2 +- clang/test/Driver/sycl-unique-prefix.cpp | 8 +-- 4 files changed, 51 insertions(+), 40 deletions(-) diff --git a/clang/lib/Basic/OffloadArch.cpp b/clang/lib/Basic/OffloadArch.cpp index edfff61f21f5e..1cca176efb06a 100644 --- a/clang/lib/Basic/OffloadArch.cpp +++ b/clang/lib/Basic/OffloadArch.cpp @@ -144,7 +144,7 @@ static const OffloadArchToStringMap ArchNames[] = { {OffloadArch::MTL_H, "mtl_h", ""}, {OffloadArch::ARL_H, "arl_h", ""}, {OffloadArch::BMG_G21, "bmg_g21", ""}, - {OffloadArch::LNL_M, "lnl_m", ""}, + {OffloadArch::LNL_M, "lnl_m", ""}, {OffloadArch::Generic, "generic", ""}, // clang-format on }; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 16fd3bc19136a..c6e44bdeccd6a 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1205,7 +1205,6 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch))); bool IsAMDGPU = IsAMDOffloadArch( StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch))); - if (!IsNVPTX && !IsAMDGPU && !Arch.empty() && !Arch.equals_insensitive("native")) { Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch; @@ -1222,7 +1221,6 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, llvm::SmallVector Archs = getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC, /*SpecificToolchain=*/false); - if (!Archs.empty()) { C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); OffloadArchs[&TC] = Archs; @@ -1420,7 +1418,20 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, /*SpecificToolchain=*/true); - } + UniqueSYCLTriplesVec.push_back(TT); + } + if (addSYCLDefaultTriple(C, UniqueSYCLTriplesVec)) { + // Add the default triple (spir64) toolchain. + llvm::Triple DefaultTriple = + C.getDriver().getSYCLDeviceTriple(getDefaultSYCLArch(C)); + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, + DefaultTriple, + C.getDefaultToolChain().getTriple()); + C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); + OffloadArchs[&TC] = + getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, + /*SpecificToolchain=*/true); + } } else Diag(clang::diag::warn_drv_empty_joined_argument) << SYCLTargetsValues->getAsString(C.getInputArgs()); @@ -7555,40 +7566,43 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, if (Kind == Action::OFK_SYCL) { // -Xsycl-target-backend=spir64_gen "-device pvc,bdw" // -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" - if (TC->getTriple().isSPIRAOT() && TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen - && Arg->getOption().matches(options::OPT_Xsycl_backend_EQ)) { - const ToolChain *HostTC = - C.getSingleOffloadToolChain(); - auto DeviceTC = std::make_unique( - *this, TC->getTriple(), *HostTC, C.getInputArgs()); - assert(DeviceTC && "Device toolchain not defined."); - ArgStringList TargetArgs; - DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), - C.getInputArgs(), TargetArgs); - // Look for -device and use that as the known - // arch to be associated with the current spir64_gen entry. Grab - // the right most entry. - for (int i = TargetArgs.size() - 2; i >= 0; --i) { - if (StringRef(TargetArgs[i]) == "-device") { - Arch = TargetArgs[i + 1]; - if (!Arch.empty()) - Archs.insert(Arch); - break; - } + if (TC->getTriple().isSPIRAOT() && + TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen && + Arg->getOption().matches(options::OPT_Xsycl_backend_EQ)) { + const ToolChain *HostTC = + C.getSingleOffloadToolChain(); + auto DeviceTC = std::make_unique( + *this, TC->getTriple(), *HostTC, C.getInputArgs()); + assert(DeviceTC && "Device toolchain not defined."); + ArgStringList TargetArgs; + DeviceTC->TranslateBackendTargetArgs(DeviceTC->getTriple(), + C.getInputArgs(), TargetArgs); + // Look for -device and use that as the known + // arch to be associated with the current spir64_gen entry. Grab + // the right most entry. + for (int i = TargetArgs.size() - 2; i >= 0; --i) { + if (StringRef(TargetArgs[i]) == "-device") { + Arch = TargetArgs[i + 1]; + if (!Arch.empty()) + Archs.insert(Arch); + break; } } + } // For SYCL based offloading, we allow for -Xsycl-target-backend - // and -Xsycl-target-backend=amdgcn-amd-hsa --offload-arch=gfx908 for specifying options. - if (!(TC->getTriple().isSPIRAOT() && TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) - && Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) && + // and -Xsycl-target-backend=amdgcn-amd-hsa --offload-arch=gfx908 for + // specifying options. + if (!(TC->getTriple().isSPIRAOT() && + TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) && + Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) && llvm::Triple(Arg->getValue(0)) == TC->getTriple()) { Arg->claim(); unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); ExtractedArg = getOpts().ParseOneArg(Args, Index); Arg = ExtractedArg.get(); // -Xsycl-target-backend --offload-arch=gfx1150 - } else if (!(TC->getTriple().isSPIRAOT() && TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) - && Arg->getOption().matches(options::OPT_Xsycl_backend)) { + } else if (!(TC->getTriple().isSPIRAOT() && + TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) && Arg->getOption().matches(options::OPT_Xsycl_backend)) { unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(0)); ExtractedArg = getOpts().ParseOneArg(Args, Index); Arg = ExtractedArg.get(); @@ -7609,9 +7623,9 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } } - if (Kind == Action::OFK_SYCL && Arg->getOption().matches(options::OPT_fsycl_targets_EQ)) { + if (Kind == Action::OFK_SYCL && + Arg->getOption().matches(options::OPT_fsycl_targets_EQ)) { for (StringRef SYCLTargetValue : Arg->getValues()) { - if (auto Device = tools::SYCL::gen::isGPUTarget( SYCLTargetValue)) { @@ -7648,9 +7662,6 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } else { Arch = StringRef(); } - - - if (!Arch.empty()) Archs.insert(Arch); } @@ -7686,7 +7697,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, C, Args, Arch, TC->getTriple(), SpecificToolchain); if (!CanonicalStr.empty()) Archs.insert(CanonicalStr); - else if (CanonicalStr.empty() && SpecificToolchain) + else if (SpecificToolchain) return llvm::SmallVector(); } } diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 9570f81dfa968..cb16c9ce6f998 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -533,7 +533,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, // If requested, use a custom linker script to handle very large device code // sections. - if (Args.hasArg(options::OPT_fsycl, options::OPT_fopenmp_targets_EQ) && + if (Args.hasArg(options::OPT_fsycl, options::OPT_offload_targets_EQ) && Args.hasFlag(options::OPT_flink_huge_device_code, options::OPT_fno_link_huge_device_code, false)) { // Create temporary linker script. Keep it if save-temps is enabled. diff --git a/clang/test/Driver/sycl-unique-prefix.cpp b/clang/test/Driver/sycl-unique-prefix.cpp index 6f39f96337020..73d874474c79f 100644 --- a/clang/test/Driver/sycl-unique-prefix.cpp +++ b/clang/test/Driver/sycl-unique-prefix.cpp @@ -3,11 +3,11 @@ // RUN: touch %t_file2.cpp // RUN: %clangxx -fsycl --offload-new-driver -fsycl-targets=spir64-unknown-unknown,spir64_gen-unknown-unknown -c %t_file1.cpp %t_file2.cpp -### 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK_PREFIX %s -// CHECK_PREFIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX1:uid([A-z0-9]){16}]]"{{.*}} "{{.*}}_file1.cpp" -// CHECK_PREFIX: clang{{.*}} "-triple" "spir64-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX1]]"{{.*}} "{{.*}}_file1.cpp" +// CHECK_PREFIX: clang{{.*}} "-triple" "spir64-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX1:uid([A-z0-9]){16}]]"{{.*}} "{{.*}}_file1.cpp" +// CHECK_PREFIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX1]]"{{.*}} "{{.*}}_file1.cpp" // CHECK_PREFIX: clang{{.*}} "-fsycl-is-host"{{.*}} "-fsycl-unique-prefix=[[PREFIX1]]"{{.*}} "{{.*}}_file1.cpp" -// CHECK_PREFIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX2:uid([A-z0-9]){16}]]"{{.*}} "{{.*}}_file2.cpp" -// CHECK_PREFIX: clang{{.*}} "-triple" "spir64-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX2]]"{{.*}} "{{.*}}_file2.cpp" +// CHECK_PREFIX: clang{{.*}} "-triple" "spir64-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX2:uid([A-z0-9]){16}]]"{{.*}} "{{.*}}_file2.cpp" +// CHECK_PREFIX: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-unique-prefix=[[PREFIX2]]"{{.*}} "{{.*}}_file2.cpp" // CHECK_PREFIX: clang{{.*}} "-fsycl-is-host"{{.*}} "-fsycl-unique-prefix=[[PREFIX2]]"{{.*}} "{{.*}}_file2.cpp" /// Check for prefix with preprocessed input From 9108e43d7dfd910d7ac88896a35448c769b83df3 Mon Sep 17 00:00:00 2001 From: Michael D Toguchi Date: Tue, 22 Jul 2025 17:45:57 -0700 Subject: [PATCH 12/13] Clang format --- clang/lib/Driver/Driver.cpp | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index c6e44bdeccd6a..8520271aba65b 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1419,19 +1419,19 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, /*SpecificToolchain=*/true); UniqueSYCLTriplesVec.push_back(TT); - } - if (addSYCLDefaultTriple(C, UniqueSYCLTriplesVec)) { - // Add the default triple (spir64) toolchain. - llvm::Triple DefaultTriple = - C.getDriver().getSYCLDeviceTriple(getDefaultSYCLArch(C)); - auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, - DefaultTriple, - C.getDefaultToolChain().getTriple()); - C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); - OffloadArchs[&TC] = - getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, - /*SpecificToolchain=*/true); - } + } + if (addSYCLDefaultTriple(C, UniqueSYCLTriplesVec)) { + // Add the default triple (spir64) toolchain. + llvm::Triple DefaultTriple = + C.getDriver().getSYCLDeviceTriple(getDefaultSYCLArch(C)); + auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, + DefaultTriple, + C.getDefaultToolChain().getTriple()); + C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL); + OffloadArchs[&TC] = + getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC, + /*SpecificToolchain=*/true); + } } else Diag(clang::diag::warn_drv_empty_joined_argument) << SYCLTargetsValues->getAsString(C.getInputArgs()); @@ -7568,7 +7568,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, // -fsycl-targets=spir64_gen -Xsycl-target-backend "-device pvc" if (TC->getTriple().isSPIRAOT() && TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen && - Arg->getOption().matches(options::OPT_Xsycl_backend_EQ)) { + Arg->getOption().matches(options::OPT_Xsycl_backend_EQ)) { const ToolChain *HostTC = C.getSingleOffloadToolChain(); auto DeviceTC = std::make_unique( @@ -7584,7 +7584,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, if (StringRef(TargetArgs[i]) == "-device") { Arch = TargetArgs[i + 1]; if (!Arch.empty()) - Archs.insert(Arch); + Archs.insert(Arch); break; } } @@ -7602,7 +7602,9 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Arg = ExtractedArg.get(); // -Xsycl-target-backend --offload-arch=gfx1150 } else if (!(TC->getTriple().isSPIRAOT() && - TC->getTriple().getSubArch() == llvm::Triple::SPIRSubArch_gen) && Arg->getOption().matches(options::OPT_Xsycl_backend)) { + TC->getTriple().getSubArch() == + llvm::Triple::SPIRSubArch_gen) && + Arg->getOption().matches(options::OPT_Xsycl_backend)) { unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(0)); ExtractedArg = getOpts().ParseOneArg(Args, Index); Arg = ExtractedArg.get(); From a6db7989371cea570b145c397b5dc57a5318b6cb Mon Sep 17 00:00:00 2001 From: Michael D Toguchi Date: Tue, 22 Jul 2025 17:57:47 -0700 Subject: [PATCH 13/13] restore tests in sycl-offload.c --- clang/test/Driver/sycl-offload.c | 154 +++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index cc300f92a1163..d53e8dde56fe1 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -431,3 +431,157 @@ // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 16: assembler, {15}, object, (host-sycl) // CHK-PHASE-MULTI-TARG-BOUND-ARCH2: 17: clang-linker-wrapper, {16}, image, (host-sycl) +/// ########################################################################### + +// Check if valid bound arch behaviour occurs when compiling for spir-v,nvidia-gpu, and amd-gpu +// RUN: %clang -target x86_64-unknown-linux-gnu -fsycl --offload-new-driver -fno-sycl-instrument-device-code -fno-sycl-device-lib=all -fsycl-targets=spir64,nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=sm_75 -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=gfx908 -ccc-print-phases %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD %s +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 1: preprocessor, {0}, c++-cpp-output, (host-sycl) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 2: compiler, {1}, ir, (host-sycl) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 3: input, "[[INPUT]]", c++, (device-sycl, gfx908) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 4: preprocessor, {3}, c++-cpp-output, (device-sycl, gfx908) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 5: compiler, {4}, ir, (device-sycl, gfx908) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 6: backend, {5}, ir, (device-sycl, gfx908) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 7: offload, "device-sycl (amdgcn-amd-amdhsa:gfx908)" {6}, ir +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 8: input, "[[INPUT]]", c++, (device-sycl, sm_75) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 9: preprocessor, {8}, c++-cpp-output, (device-sycl, sm_75) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 10: compiler, {9}, ir, (device-sycl, sm_75) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 11: backend, {10}, ir, (device-sycl, sm_75) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 12: offload, "device-sycl (nvptx64-nvidia-cuda:sm_75)" {11}, ir +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 13: input, "[[INPUT]]", c++, (device-sycl) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 14: preprocessor, {13}, c++-cpp-output, (device-sycl) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 15: compiler, {14}, ir, (device-sycl) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 16: backend, {15}, ir, (device-sycl) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 17: offload, "device-sycl (spir64-unknown-unknown)" {16}, ir +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 18: clang-offload-packager, {7, 12, 17}, image, (device-sycl) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 19: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (x86_64-unknown-linux-gnu)" {18}, ir +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 20: backend, {19}, assembler, (host-sycl) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 21: assembler, {20}, object, (host-sycl) +// CHK-PHASE-MULTI-TARG-SPIRV-NVIDIA-AMD: 22: clang-linker-wrapper, {21}, image, (host-sycl) + +/// -fsycl --offload-new-driver with /Fo testing +// RUN: %clang_cl -fsycl --offload-new-driver /Fosomefile.obj -c %s -### 2>&1 \ +// RUN: | FileCheck -check-prefix=FO-CHECK %s +// FO-CHECK: clang{{.*}} "-fsycl-int-header=[[HEADER:.+\.h]]" "-fsycl-int-footer={{.*}}" +// FO-CHECK: clang{{.*}} "-include-internal-header" "[[HEADER]]" {{.*}} "-o" "somefile.obj" + +/// passing of a library should not trigger the unbundler +// RUN: touch %t.a +// RUN: touch %t.lib +// RUN: %clang -ccc-print-phases -fsycl --offload-new-driver -fno-sycl-instrument-device-code -fno-sycl-device-lib=all %t.a %s 2>&1 \ +// RUN: | FileCheck -check-prefix=LIB-UNBUNDLE-CHECK %s +// RUN: %clang_cl -ccc-print-phases -fsycl --offload-new-driver -fno-sycl-instrument-device-code -fno-sycl-device-lib=all %t.lib %s 2>&1 \ +// RUN: | FileCheck -check-prefix=LIB-UNBUNDLE-CHECK %s +// LIB-UNBUNDLE-CHECK-NOT: clang-offload-unbundler + +/// passing of only a library should not create a device link +// RUN: %clang -ccc-print-phases -fsycl --offload-new-driver -lsomelib 2>&1 \ +// RUN: | FileCheck -check-prefix=LIB-NODEVICE %s +// LIB-NODEVICE: 0: input, "somelib", object, (host-sycl) +// LIB-NODEVICE: 1: clang-linker-wrapper, {0}, image, (host-sycl) + +// Checking for an error if c-compilation is forced +// RUN: not %clangxx -### -c -fsycl --offload-new-driver -xc %s 2>&1 | FileCheck -check-prefixes=CHECK_XC_FSYCL %s +// RUN: not %clangxx -### -c -fsycl --offload-new-driver -xc-header %s 2>&1 | FileCheck -check-prefixes=CHECK_XC_FSYCL %s +// RUN: not %clangxx -### -c -fsycl --offload-new-driver -xcpp-output %s 2>&1 | FileCheck -check-prefixes=CHECK_XC_FSYCL %s +// CHECK_XC_FSYCL: '-x c{{.*}}' must not be used in conjunction with '-fsycl' + +// -std=c++17 check (check all 3 compilations) +// RUN: %clangxx -### -c -fsycl --offload-new-driver -xc++ %s 2>&1 | FileCheck -check-prefix=CHECK-STD %s +// RUN: %clang_cl -### -c -fsycl --offload-new-driver -TP %s 2>&1 | FileCheck -check-prefix=CHECK-STD %s +// CHECK-STD: clang{{.*}} "-emit-llvm-bc" {{.*}} "-std=c++17" +// CHECK-STD: clang{{.*}} "-emit-obj" {{.*}} "-std=c++17" + +// -std=c++17 override check +// RUN: %clangxx -### -c -fsycl --offload-new-driver -std=c++14 -xc++ %s 2>&1 | FileCheck -check-prefix=CHECK-STD-OVR %s +// RUN: %clang_cl -### -c -fsycl --offload-new-driver /std:c++14 -TP %s 2>&1 | FileCheck -check-prefix=CHECK-STD-OVR %s +// CHECK-STD-OVR: clang{{.*}} "-emit-llvm-bc" {{.*}} "-std=c++14" +// CHECK-STD-OVR: clang{{.*}} "-emit-obj" {{.*}} "-std=c++14" +// CHECK-STD-OVR-NOT: clang{{.*}} "-std=c++17" + +// Check sycl-post-link optimization level. +// Default is O2 +// RUN: %clang -### -fsycl --offload-new-driver %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O2 +// RUN: %clang_cl -### -fsycl --offload-new-driver %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O2 +// Common options for %clang and %clang_cl +// RUN: %clang -### -fsycl --offload-new-driver -O1 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O1 +// RUN: %clang_cl -### -fsycl --offload-new-driver /O1 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-Os +// RUN: %clang -### -fsycl --offload-new-driver -O2 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O2 +// RUN: %clang_cl -### -fsycl --offload-new-driver /O2 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O3 +// RUN: %clang -### -fsycl --offload-new-driver -Os %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-Os +// RUN: %clang_cl -### -fsycl --offload-new-driver /Os %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-Os +// %clang options +// RUN: %clang -### -fsycl --offload-new-driver -O0 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O0 +// RUN: %clang -### -fsycl --offload-new-driver -Ofast %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O3 +// RUN: %clang -### -fsycl --offload-new-driver -O3 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O3 +// RUN: %clang -### -fsycl --offload-new-driver -Oz %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-Oz +// RUN: %clang -### -fsycl --offload-new-driver -Og %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O1 +// %clang_cl options +// RUN: %clang_cl -### -fsycl --offload-new-driver /Od %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O0 +// RUN: %clang_cl -### -fsycl --offload-new-driver /Ot %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O3 +// only the last option is considered +// RUN: %clang -### -fsycl --offload-new-driver -O2 -O1 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-O1 +// RUN: %clang_cl -### -fsycl --offload-new-driver /O2 /O1 %s 2>&1 | FileCheck %s -check-prefixes=CHK-POST-LINK-OPT-LEVEL-Os +// CHK-POST-LINK-OPT-LEVEL-O0: --sycl-post-link-options=-O2 +// CHK-POST-LINK-OPT-LEVEL-O1: --sycl-post-link-options=-O1 +// CHK-POST-LINK-OPT-LEVEL-O2: --sycl-post-link-options=-O2 +// CHK-POST-LINK-OPT-LEVEL-O3: --sycl-post-link-options=-O3 +// CHK-POST-LINK-OPT-LEVEL-Os: --sycl-post-link-options=-Os +// CHK-POST-LINK-OPT-LEVEL-Oz: --sycl-post-link-options=-Oz + +// Verify header search dirs are added with -fsycl +// RUN: %clang -### -fsycl --offload-new-driver %s 2>&1 | FileCheck %s -check-prefixes=CHECK-HEADER-DIR +// RUN: %clang_cl -### -fsycl --offload-new-driver %s 2>&1 | FileCheck %s -check-prefixes=CHECK-HEADER-DIR +// CHECK-HEADER-DIR: clang{{.*}} "-fsycl-is-device" +// CHECK-HEADER-DIR-SAME: "-internal-isystem" "[[ROOT:[^"]*]]bin{{[/\\]+}}..{{[/\\]+}}include{{[/\\]+}}sycl{{[/\\]+}}stl_wrappers" +// CHECK-HEADER-DIR-NOT: -internal-isystem +// CHECK-HEADER-DIR-SAME: "-internal-isystem" "[[ROOT]]bin{{[/\\]+}}..{{[/\\]+}}include" +// CHECK-HEADER-DIR: clang{{.*}} "-fsycl-is-host" +// CHECK-HEADER-DIR-SAME: "-internal-isystem" "[[ROOT]]bin{{[/\\]+}}..{{[/\\]+}}include{{[/\\]+}}sycl{{[/\\]+}}stl_wrappers" +// CHECK-HEADER-DIR-NOT: -internal-isystem +// CHECK-HEADER-DIR-SAME: "-internal-isystem" "[[ROOT]]bin{{[/\\]+}}..{{[/\\]+}}include" + +/// Check for option incompatibility with -fsycl +// RUN: not %clang -### -fsycl --offload-new-driver -ffreestanding %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-INCOMPATIBILITY %s -DINCOMPATOPT=-ffreestanding +// RUN: not %clang -### -fsycl --offload-new-driver -static-libstdc++ %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-INCOMPATIBILITY %s -DINCOMPATOPT=-static-libstdc++ +// CHK-INCOMPATIBILITY: error: invalid argument '[[INCOMPATOPT]]' not allowed with '-fsycl' + +/// Using -fsyntax-only with -fsycl --offload-new-driver should not emit IR +// RUN: %clang -### -fsycl --offload-new-driver -fsyntax-only %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=CHK-FSYNTAX-ONLY %s +// RUN: %clang -### -fsycl --offload-new-driver -fsycl-device-only -fsyntax-only %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=CHK-FSYNTAX-ONLY %s +// CHK-FSYNTAX-ONLY-NOT: "-emit-llvm-bc" +// CHK-FSYNTAX-ONLY: "-fsyntax-only" + +// Emit warning for treating 'c' input as 'c++' when -fsycl --offload-new-driver is used +// RUN: %clang -### -fsycl --offload-new-driver %s 2>&1 | FileCheck -check-prefix FSYCL-CHECK %s +// RUN: %clang_cl -### -fsycl --offload-new-driver %s 2>&1 | FileCheck -check-prefix FSYCL-CHECK %s +// FSYCL-CHECK: warning: treating 'c' input as 'c++' when -fsycl is used [-Wexpected-file-type] + +/// Check for linked sycl lib when using -fpreview-breaking-changes with -fsycl +// RUN: %clang -### -fsycl --offload-new-driver -fpreview-breaking-changes -target x86_64-unknown-windows-msvc %s 2>&1 | FileCheck -check-prefix FSYCL-PREVIEW-BREAKING-CHANGES-CHECK %s +// RUN: %clang_cl -### -fsycl --offload-new-driver -fpreview-breaking-changes %s 2>&1 | FileCheck -check-prefix FSYCL-PREVIEW-BREAKING-CHANGES-CHECK-CL %s +// FSYCL-PREVIEW-BREAKING-CHANGES-CHECK: -defaultlib:sycl{{[0-9]*}}-preview.lib +// FSYCL-PREVIEW-BREAKING-CHANGES-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}.lib +// FSYCL-PREVIEW-BREAKING-CHANGES-CHECK-CL: "--dependent-lib=sycl{{[0-9]*}}-preview" + +/// Check for linked sycl lib when using -fpreview-breaking-changes with -fsycl +// RUN: %clang -### -fsycl --offload-new-driver -fpreview-breaking-changes -target x86_64-unknown-windows-msvc -Xclang --dependent-lib=msvcrtd %s 2>&1 | FileCheck -check-prefix FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK %s +// RUN: %clang_cl -### -fsycl --offload-new-driver -fpreview-breaking-changes /MDd %s 2>&1 | FileCheck -check-prefix FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK %s +// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK: --dependent-lib=sycl{{[0-9]*}}-previewd +// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}.lib +// FSYCL-PREVIEW-BREAKING-CHANGES-DEBUG-CHECK-NOT: -defaultlib:sycl{{[0-9]*}}-preview.lib + +/// ########################################################################### + +/// Check -fsycl-decompose-functor behaviors from source +// RUN: %clang -### -fsycl-decompose-functor -target x86_64-unknown-linux-gnu -fsycl -o %t.out %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-DECOMP %s +// RUN: %clang -### -fno-sycl-decompose-functor -target x86_64-unknown-linux-gnu -fsycl -o %t.out %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-NODECOMP %s +// CHK-DECOMP: -fsycl-decompose-functor +// CHK-NODECOMP: -fno-sycl-decompose-functor